blob: 056c412e3c9d261cfebb8c7cffa67281f2bb66e9 [file] [log] [blame]
<!DOCTYPE html>
<html lang="en" data-content_root="../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
<title>Aggregation &#8212; Apache Arrow DataFusion documentation</title>
<link href="../../_static/styles/theme.css?digest=1999514e3f237ded88cf" rel="stylesheet">
<link href="../../_static/styles/pydata-sphinx-theme.css?digest=1999514e3f237ded88cf" rel="stylesheet">
<link rel="stylesheet"
href="../../_static/vendor/fontawesome/5.13.0/css/all.min.css">
<link rel="preload" as="font" type="font/woff2" crossorigin
href="../../_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff2">
<link rel="preload" as="font" type="font/woff2" crossorigin
href="../../_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff2">
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=8f2a1f02" />
<link rel="stylesheet" type="text/css" href="../../_static/styles/pydata-sphinx-theme.css?v=1140d252" />
<link rel="stylesheet" type="text/css" href="../../_static/graphviz.css?v=4ae1632d" />
<link rel="stylesheet" type="text/css" href="../../_static/theme_overrides.css?v=dca7052a" />
<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=1999514e3f237ded88cf">
<script src="../../_static/documentation_options.js?v=8a448e45"></script>
<script src="../../_static/doctools.js?v=9bcbadda"></script>
<script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
<link rel="index" title="Index" href="../../genindex.html" />
<link rel="search" title="Search" href="../../search.html" />
<link rel="next" title="Window Functions" href="windows.html" />
<link rel="prev" title="Functions" href="functions.html" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="docsearch:language" content="en">
<!-- Google Analytics -->
</head>
<body data-spy="scroll" data-target="#bd-toc-nav" data-offset="80">
<div class="container-fluid" id="banner"></div>
<div class="container-xl">
<div class="row">
<!-- Only show if we have sidebars configured, else just a small margin -->
<div class="col-12 col-md-3 bd-sidebar">
<div class="sidebar-start-items">
<a class="navbar-brand" href="../../index.html">
<img src="../../_static/images/2x_bgwhite_original.png" class="logo" alt="logo">
</a>
<form class="bd-search d-flex align-items-center" action="../../search.html" method="get">
<i class="icon fas fa-search"></i>
<input type="search" class="form-control" name="q" id="search-input" placeholder="Search the docs ..." aria-label="Search the docs ..." autocomplete="off" >
</form>
<nav class="bd-links" id="bd-docs-nav" aria-label="Main navigation">
<div class="bd-toc-item active">
<p aria-level="2" class="caption" role="heading">
<span class="caption-text">
LINKS
</span>
</p>
<ul class="nav bd-sidenav">
<li class="toctree-l1">
<a class="reference external" href="https://github.com/apache/datafusion-python">
Github and Issue Tracker
</a>
</li>
<li class="toctree-l1">
<a class="reference external" href="https://docs.rs/datafusion/latest/datafusion/">
Rust's API Docs
</a>
</li>
<li class="toctree-l1">
<a class="reference external" href="https://github.com/apache/datafusion/blob/main/CODE_OF_CONDUCT.md">
Code of conduct
</a>
</li>
<li class="toctree-l1">
<a class="reference external" href="https://github.com/apache/datafusion-python/tree/main/examples">
Examples
</a>
</li>
</ul>
<p aria-level="2" class="caption" role="heading">
<span class="caption-text">
USER GUIDE
</span>
</p>
<ul class="current nav bd-sidenav">
<li class="toctree-l1">
<a class="reference internal" href="../introduction.html">
Introduction
</a>
</li>
<li class="toctree-l1">
<a class="reference internal" href="../basics.html">
Concepts
</a>
</li>
<li class="toctree-l1">
<a class="reference internal" href="../data-sources.html">
Data Sources
</a>
</li>
<li class="toctree-l1 has-children">
<a class="reference internal" href="../dataframe/index.html">
DataFrames
</a>
<input class="toctree-checkbox" id="toctree-checkbox-1" name="toctree-checkbox-1" type="checkbox"/>
<label for="toctree-checkbox-1">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l2">
<a class="reference internal" href="../dataframe/rendering.html">
HTML Rendering in Jupyter
</a>
</li>
</ul>
</li>
<li class="toctree-l1 current active has-children">
<a class="reference internal" href="index.html">
Common Operations
</a>
<input checked="" class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" type="checkbox"/>
<label for="toctree-checkbox-2">
<i class="fas fa-chevron-down">
</i>
</label>
<ul class="current">
<li class="toctree-l2">
<a class="reference internal" href="views.html">
Registering Views
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="basic-info.html">
Basic Operations
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="select-and-filter.html">
Column Selections
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="expressions.html">
Expressions
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="joins.html">
Joins
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="functions.html">
Functions
</a>
</li>
<li class="toctree-l2 current active">
<a class="current reference internal" href="#">
Aggregation
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="windows.html">
Window Functions
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="udf-and-udfa.html">
User-Defined Functions
</a>
</li>
</ul>
</li>
<li class="toctree-l1 has-children">
<a class="reference internal" href="../io/index.html">
IO
</a>
<input class="toctree-checkbox" id="toctree-checkbox-3" name="toctree-checkbox-3" type="checkbox"/>
<label for="toctree-checkbox-3">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l2">
<a class="reference internal" href="../io/arrow.html">
Arrow
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../io/avro.html">
Avro
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../io/csv.html">
CSV
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../io/json.html">
JSON
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../io/parquet.html">
Parquet
</a>
</li>
<li class="toctree-l2">
<a class="reference internal" href="../io/table_provider.html">
Custom Table Provider
</a>
</li>
</ul>
</li>
<li class="toctree-l1">
<a class="reference internal" href="../configuration.html">
Configuration
</a>
</li>
<li class="toctree-l1">
<a class="reference internal" href="../sql.html">
SQL
</a>
</li>
</ul>
<p aria-level="2" class="caption" role="heading">
<span class="caption-text">
CONTRIBUTOR GUIDE
</span>
</p>
<ul class="nav bd-sidenav">
<li class="toctree-l1">
<a class="reference internal" href="../../contributor-guide/introduction.html">
Introduction
</a>
</li>
<li class="toctree-l1">
<a class="reference internal" href="../../contributor-guide/ffi.html">
Python Extensions
</a>
</li>
</ul>
<p aria-level="2" class="caption" role="heading">
<span class="caption-text">
API
</span>
</p>
<ul class="nav bd-sidenav">
<li class="toctree-l1 has-children">
<a class="reference internal" href="../../autoapi/index.html">
API Reference
</a>
<input class="toctree-checkbox" id="toctree-checkbox-4" name="toctree-checkbox-4" type="checkbox"/>
<label for="toctree-checkbox-4">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l2 has-children">
<a class="reference internal" href="../../autoapi/datafusion/index.html">
datafusion
</a>
<input class="toctree-checkbox" id="toctree-checkbox-5" name="toctree-checkbox-5" type="checkbox"/>
<label for="toctree-checkbox-5">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l3">
<a class="reference internal" href="../../autoapi/datafusion/catalog/index.html">
datafusion.catalog
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../../autoapi/datafusion/context/index.html">
datafusion.context
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../../autoapi/datafusion/dataframe/index.html">
datafusion.dataframe
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../../autoapi/datafusion/dataframe_formatter/index.html">
datafusion.dataframe_formatter
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../../autoapi/datafusion/expr/index.html">
datafusion.expr
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../../autoapi/datafusion/functions/index.html">
datafusion.functions
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../../autoapi/datafusion/html_formatter/index.html">
datafusion.html_formatter
</a>
</li>
<li class="toctree-l3 has-children">
<a class="reference internal" href="../../autoapi/datafusion/input/index.html">
datafusion.input
</a>
<input class="toctree-checkbox" id="toctree-checkbox-6" name="toctree-checkbox-6" type="checkbox"/>
<label for="toctree-checkbox-6">
<i class="fas fa-chevron-down">
</i>
</label>
<ul>
<li class="toctree-l4">
<a class="reference internal" href="../../autoapi/datafusion/input/base/index.html">
datafusion.input.base
</a>
</li>
<li class="toctree-l4">
<a class="reference internal" href="../../autoapi/datafusion/input/location/index.html">
datafusion.input.location
</a>
</li>
</ul>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../../autoapi/datafusion/io/index.html">
datafusion.io
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../../autoapi/datafusion/object_store/index.html">
datafusion.object_store
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../../autoapi/datafusion/plan/index.html">
datafusion.plan
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../../autoapi/datafusion/record_batch/index.html">
datafusion.record_batch
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../../autoapi/datafusion/substrait/index.html">
datafusion.substrait
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../../autoapi/datafusion/unparser/index.html">
datafusion.unparser
</a>
</li>
<li class="toctree-l3">
<a class="reference internal" href="../../autoapi/datafusion/user_defined/index.html">
datafusion.user_defined
</a>
</li>
</ul>
</li>
</ul>
</li>
</ul>
</div>
</nav>
</div>
<div class="sidebar-end-items">
</div>
</div>
<div class="d-none d-xl-block col-xl-2 bd-toc">
<div class="toc-item">
<div class="tocsection onthispage pt-5 pb-3">
<i class="fas fa-list"></i> On this page
</div>
<nav id="bd-toc-nav">
<ul class="visible nav section-nav flex-column">
<li class="toc-h2 nav-item toc-entry">
<a class="reference internal nav-link" href="#setting-parameters">
Setting Parameters
</a>
<ul class="visible nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry">
<a class="reference internal nav-link" href="#ordering">
Ordering
</a>
</li>
<li class="toc-h3 nav-item toc-entry">
<a class="reference internal nav-link" href="#distinct">
Distinct
</a>
</li>
<li class="toc-h3 nav-item toc-entry">
<a class="reference internal nav-link" href="#null-treatment">
Null Treatment
</a>
</li>
<li class="toc-h3 nav-item toc-entry">
<a class="reference internal nav-link" href="#filter">
Filter
</a>
</li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry">
<a class="reference internal nav-link" href="#aggregate-functions">
Aggregate Functions
</a>
</li>
</ul>
</nav>
</div>
<div class="toc-item">
</div>
</div>
<main class="col-12 col-md-9 col-xl-7 py-md-5 pl-md-5 pr-md-4 bd-content" role="main">
<div>
<section id="aggregation">
<span id="id1"></span><h1>Aggregation<a class="headerlink" href="#aggregation" title="Link to this heading">¶</a></h1>
<p>An aggregate or aggregation is a function where the values of multiple rows are processed together
to form a single summary value. For performing an aggregation, DataFusion provides the
<a class="reference internal" href="../../autoapi/datafusion/dataframe/index.html#datafusion.dataframe.DataFrame.aggregate" title="datafusion.dataframe.DataFrame.aggregate"><code class="xref py py-func docutils literal notranslate"><span class="pre">aggregate()</span></code></a></p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="n">In</span> <span class="p">[</span><span class="mi">1</span><span class="p">]:</span> <span class="kn">from</span><span class="w"> </span><span class="nn">datafusion</span><span class="w"> </span><span class="kn">import</span> <span class="n">SessionContext</span><span class="p">,</span> <span class="n">col</span><span class="p">,</span> <span class="n">lit</span><span class="p">,</span> <span class="n">functions</span> <span class="k">as</span> <span class="n">f</span>
<span class="n">In</span> <span class="p">[</span><span class="mi">2</span><span class="p">]:</span> <span class="n">ctx</span> <span class="o">=</span> <span class="n">SessionContext</span><span class="p">()</span>
<span class="n">In</span> <span class="p">[</span><span class="mi">3</span><span class="p">]:</span> <span class="n">df</span> <span class="o">=</span> <span class="n">ctx</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="s2">&quot;pokemon.csv&quot;</span><span class="p">)</span>
<span class="n">In</span> <span class="p">[</span><span class="mi">4</span><span class="p">]:</span> <span class="n">col_type_1</span> <span class="o">=</span> <span class="n">col</span><span class="p">(</span><span class="s1">&#39;&quot;Type 1&quot;&#39;</span><span class="p">)</span>
<span class="n">In</span> <span class="p">[</span><span class="mi">5</span><span class="p">]:</span> <span class="n">col_type_2</span> <span class="o">=</span> <span class="n">col</span><span class="p">(</span><span class="s1">&#39;&quot;Type 2&quot;&#39;</span><span class="p">)</span>
<span class="n">In</span> <span class="p">[</span><span class="mi">6</span><span class="p">]:</span> <span class="n">col_speed</span> <span class="o">=</span> <span class="n">col</span><span class="p">(</span><span class="s1">&#39;&quot;Speed&quot;&#39;</span><span class="p">)</span>
<span class="n">In</span> <span class="p">[</span><span class="mi">7</span><span class="p">]:</span> <span class="n">col_attack</span> <span class="o">=</span> <span class="n">col</span><span class="p">(</span><span class="s1">&#39;&quot;Attack&quot;&#39;</span><span class="p">)</span>
<span class="n">In</span> <span class="p">[</span><span class="mi">8</span><span class="p">]:</span> <span class="n">df</span><span class="o">.</span><span class="n">aggregate</span><span class="p">([</span><span class="n">col_type_1</span><span class="p">],</span> <span class="p">[</span>
<span class="o">...</span><span class="p">:</span> <span class="n">f</span><span class="o">.</span><span class="n">approx_distinct</span><span class="p">(</span><span class="n">col_speed</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">&quot;Count&quot;</span><span class="p">),</span>
<span class="o">...</span><span class="p">:</span> <span class="n">f</span><span class="o">.</span><span class="n">approx_median</span><span class="p">(</span><span class="n">col_speed</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">&quot;Median Speed&quot;</span><span class="p">),</span>
<span class="o">...</span><span class="p">:</span> <span class="n">f</span><span class="o">.</span><span class="n">approx_percentile_cont</span><span class="p">(</span><span class="n">col_speed</span><span class="p">,</span> <span class="mf">0.9</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">&quot;90% Speed&quot;</span><span class="p">)])</span>
<span class="o">...</span><span class="p">:</span>
<span class="n">Out</span><span class="p">[</span><span class="mi">8</span><span class="p">]:</span>
<span class="n">DataFrame</span><span class="p">()</span>
<span class="o">+----------+-------+--------------+-----------+</span>
<span class="o">|</span> <span class="n">Type</span> <span class="mi">1</span> <span class="o">|</span> <span class="n">Count</span> <span class="o">|</span> <span class="n">Median</span> <span class="n">Speed</span> <span class="o">|</span> <span class="mi">90</span><span class="o">%</span> <span class="n">Speed</span> <span class="o">|</span>
<span class="o">+----------+-------+--------------+-----------+</span>
<span class="o">|</span> <span class="n">Bug</span> <span class="o">|</span> <span class="mi">11</span> <span class="o">|</span> <span class="mi">63</span> <span class="o">|</span> <span class="mi">107</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Poison</span> <span class="o">|</span> <span class="mi">12</span> <span class="o">|</span> <span class="mi">55</span> <span class="o">|</span> <span class="mi">85</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Electric</span> <span class="o">|</span> <span class="mi">8</span> <span class="o">|</span> <span class="mi">100</span> <span class="o">|</span> <span class="mi">136</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Fairy</span> <span class="o">|</span> <span class="mi">2</span> <span class="o">|</span> <span class="mi">47</span> <span class="o">|</span> <span class="mi">60</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Normal</span> <span class="o">|</span> <span class="mi">20</span> <span class="o">|</span> <span class="mi">71</span> <span class="o">|</span> <span class="mi">110</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Ice</span> <span class="o">|</span> <span class="mi">2</span> <span class="o">|</span> <span class="mi">90</span> <span class="o">|</span> <span class="mi">95</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Grass</span> <span class="o">|</span> <span class="mi">8</span> <span class="o">|</span> <span class="mi">55</span> <span class="o">|</span> <span class="mi">80</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Fire</span> <span class="o">|</span> <span class="mi">8</span> <span class="o">|</span> <span class="mi">91</span> <span class="o">|</span> <span class="mi">100</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Water</span> <span class="o">|</span> <span class="mi">21</span> <span class="o">|</span> <span class="mi">70</span> <span class="o">|</span> <span class="mi">90</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Ground</span> <span class="o">|</span> <span class="mi">7</span> <span class="o">|</span> <span class="mi">40</span> <span class="o">|</span> <span class="mi">112</span> <span class="o">|</span>
<span class="o">+----------+-------+--------------+-----------+</span>
<span class="n">Data</span> <span class="n">truncated</span><span class="o">.</span>
</pre></div>
</div>
<p>When the <code class="code docutils literal notranslate"><span class="pre">group_by</span></code> list is empty the aggregation is done over the whole <a class="reference internal" href="../../autoapi/datafusion/dataframe/index.html#datafusion.dataframe.DataFrame" title="datafusion.dataframe.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DataFrame</span></code></a>.
For grouping the <code class="code docutils literal notranslate"><span class="pre">group_by</span></code> list must contain at least one column.</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="n">In</span> <span class="p">[</span><span class="mi">9</span><span class="p">]:</span> <span class="n">df</span><span class="o">.</span><span class="n">aggregate</span><span class="p">([</span><span class="n">col_type_1</span><span class="p">],</span> <span class="p">[</span>
<span class="o">...</span><span class="p">:</span> <span class="n">f</span><span class="o">.</span><span class="n">max</span><span class="p">(</span><span class="n">col_speed</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">&quot;Max Speed&quot;</span><span class="p">),</span>
<span class="o">...</span><span class="p">:</span> <span class="n">f</span><span class="o">.</span><span class="n">avg</span><span class="p">(</span><span class="n">col_speed</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">&quot;Avg Speed&quot;</span><span class="p">),</span>
<span class="o">...</span><span class="p">:</span> <span class="n">f</span><span class="o">.</span><span class="n">min</span><span class="p">(</span><span class="n">col_speed</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">&quot;Min Speed&quot;</span><span class="p">)])</span>
<span class="o">...</span><span class="p">:</span>
<span class="n">Out</span><span class="p">[</span><span class="mi">9</span><span class="p">]:</span>
<span class="n">DataFrame</span><span class="p">()</span>
<span class="o">+----------+-----------+--------------------+-----------+</span>
<span class="o">|</span> <span class="n">Type</span> <span class="mi">1</span> <span class="o">|</span> <span class="n">Max</span> <span class="n">Speed</span> <span class="o">|</span> <span class="n">Avg</span> <span class="n">Speed</span> <span class="o">|</span> <span class="n">Min</span> <span class="n">Speed</span> <span class="o">|</span>
<span class="o">+----------+-----------+--------------------+-----------+</span>
<span class="o">|</span> <span class="n">Bug</span> <span class="o">|</span> <span class="mi">145</span> <span class="o">|</span> <span class="mf">66.78571428571429</span> <span class="o">|</span> <span class="mi">25</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Poison</span> <span class="o">|</span> <span class="mi">90</span> <span class="o">|</span> <span class="mf">58.785714285714285</span> <span class="o">|</span> <span class="mi">25</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Electric</span> <span class="o">|</span> <span class="mi">140</span> <span class="o">|</span> <span class="mf">98.88888888888889</span> <span class="o">|</span> <span class="mi">45</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Fairy</span> <span class="o">|</span> <span class="mi">60</span> <span class="o">|</span> <span class="mf">47.5</span> <span class="o">|</span> <span class="mi">35</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Normal</span> <span class="o">|</span> <span class="mi">121</span> <span class="o">|</span> <span class="mf">72.75</span> <span class="o">|</span> <span class="mi">20</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Ice</span> <span class="o">|</span> <span class="mi">95</span> <span class="o">|</span> <span class="mf">90.0</span> <span class="o">|</span> <span class="mi">85</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Grass</span> <span class="o">|</span> <span class="mi">80</span> <span class="o">|</span> <span class="mf">54.23076923076923</span> <span class="o">|</span> <span class="mi">30</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Fire</span> <span class="o">|</span> <span class="mi">105</span> <span class="o">|</span> <span class="mf">86.28571428571429</span> <span class="o">|</span> <span class="mi">60</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Water</span> <span class="o">|</span> <span class="mi">115</span> <span class="o">|</span> <span class="mf">67.25806451612904</span> <span class="o">|</span> <span class="mi">15</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Ground</span> <span class="o">|</span> <span class="mi">120</span> <span class="o">|</span> <span class="mf">58.125</span> <span class="o">|</span> <span class="mi">25</span> <span class="o">|</span>
<span class="o">+----------+-----------+--------------------+-----------+</span>
<span class="n">Data</span> <span class="n">truncated</span><span class="o">.</span>
</pre></div>
</div>
<p>More than one column can be used for grouping</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="n">In</span> <span class="p">[</span><span class="mi">10</span><span class="p">]:</span> <span class="n">df</span><span class="o">.</span><span class="n">aggregate</span><span class="p">([</span><span class="n">col_type_1</span><span class="p">,</span> <span class="n">col_type_2</span><span class="p">],</span> <span class="p">[</span>
<span class="o">....</span><span class="p">:</span> <span class="n">f</span><span class="o">.</span><span class="n">max</span><span class="p">(</span><span class="n">col_speed</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">&quot;Max Speed&quot;</span><span class="p">),</span>
<span class="o">....</span><span class="p">:</span> <span class="n">f</span><span class="o">.</span><span class="n">avg</span><span class="p">(</span><span class="n">col_speed</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">&quot;Avg Speed&quot;</span><span class="p">),</span>
<span class="o">....</span><span class="p">:</span> <span class="n">f</span><span class="o">.</span><span class="n">min</span><span class="p">(</span><span class="n">col_speed</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">&quot;Min Speed&quot;</span><span class="p">)])</span>
<span class="o">....</span><span class="p">:</span>
<span class="n">Out</span><span class="p">[</span><span class="mi">10</span><span class="p">]:</span>
<span class="n">DataFrame</span><span class="p">()</span>
<span class="o">+----------+---------+-----------+--------------------+-----------+</span>
<span class="o">|</span> <span class="n">Type</span> <span class="mi">1</span> <span class="o">|</span> <span class="n">Type</span> <span class="mi">2</span> <span class="o">|</span> <span class="n">Max</span> <span class="n">Speed</span> <span class="o">|</span> <span class="n">Avg</span> <span class="n">Speed</span> <span class="o">|</span> <span class="n">Min</span> <span class="n">Speed</span> <span class="o">|</span>
<span class="o">+----------+---------+-----------+--------------------+-----------+</span>
<span class="o">|</span> <span class="n">Bug</span> <span class="o">|</span> <span class="o">|</span> <span class="mi">85</span> <span class="o">|</span> <span class="mf">53.333333333333336</span> <span class="o">|</span> <span class="mi">30</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Normal</span> <span class="o">|</span> <span class="n">Flying</span> <span class="o">|</span> <span class="mi">121</span> <span class="o">|</span> <span class="mf">83.77777777777777</span> <span class="o">|</span> <span class="mi">56</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Poison</span> <span class="o">|</span> <span class="o">|</span> <span class="mi">80</span> <span class="o">|</span> <span class="mf">51.7</span> <span class="o">|</span> <span class="mi">25</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Electric</span> <span class="o">|</span> <span class="o">|</span> <span class="mi">140</span> <span class="o">|</span> <span class="mf">112.5</span> <span class="o">|</span> <span class="mi">90</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Fairy</span> <span class="o">|</span> <span class="o">|</span> <span class="mi">60</span> <span class="o">|</span> <span class="mf">47.5</span> <span class="o">|</span> <span class="mi">35</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Water</span> <span class="o">|</span> <span class="n">Ice</span> <span class="o">|</span> <span class="mi">70</span> <span class="o">|</span> <span class="mf">66.66666666666667</span> <span class="o">|</span> <span class="mi">60</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Ice</span> <span class="o">|</span> <span class="n">Psychic</span> <span class="o">|</span> <span class="mi">95</span> <span class="o">|</span> <span class="mf">95.0</span> <span class="o">|</span> <span class="mi">95</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Ice</span> <span class="o">|</span> <span class="n">Flying</span> <span class="o">|</span> <span class="mi">85</span> <span class="o">|</span> <span class="mf">85.0</span> <span class="o">|</span> <span class="mi">85</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Fire</span> <span class="o">|</span> <span class="n">Flying</span> <span class="o">|</span> <span class="mi">100</span> <span class="o">|</span> <span class="mf">96.66666666666667</span> <span class="o">|</span> <span class="mi">90</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Fire</span> <span class="o">|</span> <span class="n">Dragon</span> <span class="o">|</span> <span class="mi">100</span> <span class="o">|</span> <span class="mf">100.0</span> <span class="o">|</span> <span class="mi">100</span> <span class="o">|</span>
<span class="o">+----------+---------+-----------+--------------------+-----------+</span>
<span class="n">Data</span> <span class="n">truncated</span><span class="o">.</span>
</pre></div>
</div>
<section id="setting-parameters">
<h2>Setting Parameters<a class="headerlink" href="#setting-parameters" title="Link to this heading">¶</a></h2>
<p>Each of the built in aggregate functions provides arguments for the parameters that affect their
operation. These can also be overridden using the builder approach to setting any of the following
parameters. When you use the builder, you must call <code class="docutils literal notranslate"><span class="pre">build()</span></code> to finish. For example, these two
expressions are equivalent.</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="n">In</span> <span class="p">[</span><span class="mi">11</span><span class="p">]:</span> <span class="n">first_1</span> <span class="o">=</span> <span class="n">f</span><span class="o">.</span><span class="n">first_value</span><span class="p">(</span><span class="n">col</span><span class="p">(</span><span class="s2">&quot;a&quot;</span><span class="p">),</span> <span class="n">order_by</span><span class="o">=</span><span class="p">[</span><span class="n">col</span><span class="p">(</span><span class="s2">&quot;a&quot;</span><span class="p">)])</span>
<span class="n">In</span> <span class="p">[</span><span class="mi">12</span><span class="p">]:</span> <span class="n">first_2</span> <span class="o">=</span> <span class="n">f</span><span class="o">.</span><span class="n">first_value</span><span class="p">(</span><span class="n">col</span><span class="p">(</span><span class="s2">&quot;a&quot;</span><span class="p">))</span><span class="o">.</span><span class="n">order_by</span><span class="p">(</span><span class="n">col</span><span class="p">(</span><span class="s2">&quot;a&quot;</span><span class="p">))</span><span class="o">.</span><span class="n">build</span><span class="p">()</span>
</pre></div>
</div>
<section id="ordering">
<h3>Ordering<a class="headerlink" href="#ordering" title="Link to this heading">¶</a></h3>
<p>You can control the order in which rows are processed by window functions by providing
a list of <code class="docutils literal notranslate"><span class="pre">order_by</span></code> functions for the <code class="docutils literal notranslate"><span class="pre">order_by</span></code> parameter. In the following example, we
sort the Pokemon by their attack in increasing order and take the first value, which gives us the
Pokemon with the smallest attack value in each <code class="docutils literal notranslate"><span class="pre">Type</span> <span class="pre">1</span></code>.</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="n">In</span> <span class="p">[</span><span class="mi">13</span><span class="p">]:</span> <span class="n">df</span><span class="o">.</span><span class="n">aggregate</span><span class="p">(</span>
<span class="o">....</span><span class="p">:</span> <span class="p">[</span><span class="n">col</span><span class="p">(</span><span class="s1">&#39;&quot;Type 1&quot;&#39;</span><span class="p">)],</span>
<span class="o">....</span><span class="p">:</span> <span class="p">[</span><span class="n">f</span><span class="o">.</span><span class="n">first_value</span><span class="p">(</span>
<span class="o">....</span><span class="p">:</span> <span class="n">col</span><span class="p">(</span><span class="s1">&#39;&quot;Name&quot;&#39;</span><span class="p">),</span>
<span class="o">....</span><span class="p">:</span> <span class="n">order_by</span><span class="o">=</span><span class="p">[</span><span class="n">col</span><span class="p">(</span><span class="s1">&#39;&quot;Attack&quot;&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">sort</span><span class="p">(</span><span class="n">ascending</span><span class="o">=</span><span class="kc">True</span><span class="p">)]</span>
<span class="o">....</span><span class="p">:</span> <span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">&quot;Smallest Attack&quot;</span><span class="p">)</span>
<span class="o">....</span><span class="p">:</span> <span class="p">])</span>
<span class="o">....</span><span class="p">:</span>
<span class="n">Out</span><span class="p">[</span><span class="mi">13</span><span class="p">]:</span>
<span class="n">DataFrame</span><span class="p">()</span>
<span class="o">+----------+-----------------+</span>
<span class="o">|</span> <span class="n">Type</span> <span class="mi">1</span> <span class="o">|</span> <span class="n">Smallest</span> <span class="n">Attack</span> <span class="o">|</span>
<span class="o">+----------+-----------------+</span>
<span class="o">|</span> <span class="n">Bug</span> <span class="o">|</span> <span class="n">Metapod</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Poison</span> <span class="o">|</span> <span class="n">Zubat</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Electric</span> <span class="o">|</span> <span class="n">Voltorb</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Fairy</span> <span class="o">|</span> <span class="n">Clefairy</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Normal</span> <span class="o">|</span> <span class="n">Chansey</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Ice</span> <span class="o">|</span> <span class="n">Jynx</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Grass</span> <span class="o">|</span> <span class="n">Exeggcute</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Fire</span> <span class="o">|</span> <span class="n">Vulpix</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Water</span> <span class="o">|</span> <span class="n">Magikarp</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Ground</span> <span class="o">|</span> <span class="n">Cubone</span> <span class="o">|</span>
<span class="o">+----------+-----------------+</span>
<span class="n">Data</span> <span class="n">truncated</span><span class="o">.</span>
</pre></div>
</div>
</section>
<section id="distinct">
<h3>Distinct<a class="headerlink" href="#distinct" title="Link to this heading">¶</a></h3>
<p>When you set the parameter <code class="docutils literal notranslate"><span class="pre">distinct</span></code> to <code class="docutils literal notranslate"><span class="pre">True</span></code>, then unique values will only be evaluated one
time each. Suppose we want to create an array of all of the <code class="docutils literal notranslate"><span class="pre">Type</span> <span class="pre">2</span></code> for each <code class="docutils literal notranslate"><span class="pre">Type</span> <span class="pre">1</span></code> of our
Pokemon set. Since there will be many entries of <code class="docutils literal notranslate"><span class="pre">Type</span> <span class="pre">2</span></code> we only one each distinct value.</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="n">In</span> <span class="p">[</span><span class="mi">14</span><span class="p">]:</span> <span class="n">df</span><span class="o">.</span><span class="n">aggregate</span><span class="p">([</span><span class="n">col_type_1</span><span class="p">],</span> <span class="p">[</span><span class="n">f</span><span class="o">.</span><span class="n">array_agg</span><span class="p">(</span><span class="n">col_type_2</span><span class="p">,</span> <span class="n">distinct</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">&quot;Type 2 List&quot;</span><span class="p">)])</span>
<span class="n">Out</span><span class="p">[</span><span class="mi">14</span><span class="p">]:</span>
<span class="n">DataFrame</span><span class="p">()</span>
<span class="o">+----------+--------------------------------------------------+</span>
<span class="o">|</span> <span class="n">Type</span> <span class="mi">1</span> <span class="o">|</span> <span class="n">Type</span> <span class="mi">2</span> <span class="n">List</span> <span class="o">|</span>
<span class="o">+----------+--------------------------------------------------+</span>
<span class="o">|</span> <span class="n">Bug</span> <span class="o">|</span> <span class="p">[</span><span class="n">Flying</span><span class="p">,</span> <span class="p">,</span> <span class="n">Poison</span><span class="p">,</span> <span class="n">Grass</span><span class="p">]</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Poison</span> <span class="o">|</span> <span class="p">[,</span> <span class="n">Ground</span><span class="p">,</span> <span class="n">Flying</span><span class="p">]</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Electric</span> <span class="o">|</span> <span class="p">[</span><span class="n">Flying</span><span class="p">,</span> <span class="p">,</span> <span class="n">Steel</span><span class="p">]</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Fairy</span> <span class="o">|</span> <span class="p">[]</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Normal</span> <span class="o">|</span> <span class="p">[</span><span class="n">Flying</span><span class="p">,</span> <span class="p">,</span> <span class="n">Fairy</span><span class="p">]</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Ice</span> <span class="o">|</span> <span class="p">[</span><span class="n">Flying</span><span class="p">,</span> <span class="n">Psychic</span><span class="p">]</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Grass</span> <span class="o">|</span> <span class="p">[,</span> <span class="n">Poison</span><span class="p">,</span> <span class="n">Psychic</span><span class="p">]</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Fire</span> <span class="o">|</span> <span class="p">[</span><span class="n">Dragon</span><span class="p">,</span> <span class="p">,</span> <span class="n">Flying</span><span class="p">]</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Water</span> <span class="o">|</span> <span class="p">[</span><span class="n">Poison</span><span class="p">,</span> <span class="n">Flying</span><span class="p">,</span> <span class="n">Dark</span><span class="p">,</span> <span class="n">Psychic</span><span class="p">,</span> <span class="n">Ice</span><span class="p">,</span> <span class="n">Fighting</span><span class="p">,</span> <span class="p">]</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Ground</span> <span class="o">|</span> <span class="p">[,</span> <span class="n">Rock</span><span class="p">]</span> <span class="o">|</span>
<span class="o">+----------+--------------------------------------------------+</span>
<span class="n">Data</span> <span class="n">truncated</span><span class="o">.</span>
</pre></div>
</div>
<p>In the output of the above we can see that there are some <code class="docutils literal notranslate"><span class="pre">Type</span> <span class="pre">1</span></code> for which the <code class="docutils literal notranslate"><span class="pre">Type</span> <span class="pre">2</span></code> entry
is <code class="docutils literal notranslate"><span class="pre">null</span></code>. In reality, we probably want to filter those out. We can do this in two ways. First,
we can filter DataFrame rows that have no <code class="docutils literal notranslate"><span class="pre">Type</span> <span class="pre">2</span></code>. If we do this, we might have some <code class="docutils literal notranslate"><span class="pre">Type</span> <span class="pre">1</span></code>
entries entirely removed. The second is we can use the <code class="docutils literal notranslate"><span class="pre">filter</span></code> argument described below.</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="n">In</span> <span class="p">[</span><span class="mi">15</span><span class="p">]:</span> <span class="n">df</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">col_type_2</span><span class="o">.</span><span class="n">is_not_null</span><span class="p">())</span><span class="o">.</span><span class="n">aggregate</span><span class="p">([</span><span class="n">col_type_1</span><span class="p">],</span> <span class="p">[</span><span class="n">f</span><span class="o">.</span><span class="n">array_agg</span><span class="p">(</span><span class="n">col_type_2</span><span class="p">,</span> <span class="n">distinct</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">&quot;Type 2 List&quot;</span><span class="p">)])</span>
<span class="n">Out</span><span class="p">[</span><span class="mi">15</span><span class="p">]:</span>
<span class="n">DataFrame</span><span class="p">()</span>
<span class="o">+----------+------------------------------------------------+</span>
<span class="o">|</span> <span class="n">Type</span> <span class="mi">1</span> <span class="o">|</span> <span class="n">Type</span> <span class="mi">2</span> <span class="n">List</span> <span class="o">|</span>
<span class="o">+----------+------------------------------------------------+</span>
<span class="o">|</span> <span class="n">Bug</span> <span class="o">|</span> <span class="p">[</span><span class="n">Flying</span><span class="p">,</span> <span class="n">Grass</span><span class="p">,</span> <span class="n">Poison</span><span class="p">]</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Poison</span> <span class="o">|</span> <span class="p">[</span><span class="n">Ground</span><span class="p">,</span> <span class="n">Flying</span><span class="p">]</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Electric</span> <span class="o">|</span> <span class="p">[</span><span class="n">Flying</span><span class="p">,</span> <span class="n">Steel</span><span class="p">]</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Normal</span> <span class="o">|</span> <span class="p">[</span><span class="n">Fairy</span><span class="p">,</span> <span class="n">Flying</span><span class="p">]</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Ice</span> <span class="o">|</span> <span class="p">[</span><span class="n">Flying</span><span class="p">,</span> <span class="n">Psychic</span><span class="p">]</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Grass</span> <span class="o">|</span> <span class="p">[</span><span class="n">Poison</span><span class="p">,</span> <span class="n">Psychic</span><span class="p">]</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Fire</span> <span class="o">|</span> <span class="p">[</span><span class="n">Dragon</span><span class="p">,</span> <span class="n">Flying</span><span class="p">]</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Water</span> <span class="o">|</span> <span class="p">[</span><span class="n">Flying</span><span class="p">,</span> <span class="n">Poison</span><span class="p">,</span> <span class="n">Psychic</span><span class="p">,</span> <span class="n">Fighting</span><span class="p">,</span> <span class="n">Ice</span><span class="p">,</span> <span class="n">Dark</span><span class="p">]</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Rock</span> <span class="o">|</span> <span class="p">[</span><span class="n">Water</span><span class="p">,</span> <span class="n">Flying</span><span class="p">,</span> <span class="n">Ground</span><span class="p">]</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Ghost</span> <span class="o">|</span> <span class="p">[</span><span class="n">Poison</span><span class="p">]</span> <span class="o">|</span>
<span class="o">+----------+------------------------------------------------+</span>
<span class="n">Data</span> <span class="n">truncated</span><span class="o">.</span>
<span class="n">In</span> <span class="p">[</span><span class="mi">16</span><span class="p">]:</span> <span class="n">df</span><span class="o">.</span><span class="n">aggregate</span><span class="p">([</span><span class="n">col_type_1</span><span class="p">],</span> <span class="p">[</span><span class="n">f</span><span class="o">.</span><span class="n">array_agg</span><span class="p">(</span><span class="n">col_type_2</span><span class="p">,</span> <span class="n">distinct</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="nb">filter</span><span class="o">=</span><span class="n">col_type_2</span><span class="o">.</span><span class="n">is_not_null</span><span class="p">())</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">&quot;Type 2 List&quot;</span><span class="p">)])</span>
<span class="n">Out</span><span class="p">[</span><span class="mi">16</span><span class="p">]:</span>
<span class="n">DataFrame</span><span class="p">()</span>
<span class="o">+----------+------------------------------------------------+</span>
<span class="o">|</span> <span class="n">Type</span> <span class="mi">1</span> <span class="o">|</span> <span class="n">Type</span> <span class="mi">2</span> <span class="n">List</span> <span class="o">|</span>
<span class="o">+----------+------------------------------------------------+</span>
<span class="o">|</span> <span class="n">Bug</span> <span class="o">|</span> <span class="p">[</span><span class="n">Flying</span><span class="p">,</span> <span class="n">Grass</span><span class="p">,</span> <span class="n">Poison</span><span class="p">]</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Poison</span> <span class="o">|</span> <span class="p">[</span><span class="n">Ground</span><span class="p">,</span> <span class="n">Flying</span><span class="p">]</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Electric</span> <span class="o">|</span> <span class="p">[</span><span class="n">Flying</span><span class="p">,</span> <span class="n">Steel</span><span class="p">]</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Fairy</span> <span class="o">|</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Normal</span> <span class="o">|</span> <span class="p">[</span><span class="n">Flying</span><span class="p">,</span> <span class="n">Fairy</span><span class="p">]</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Ice</span> <span class="o">|</span> <span class="p">[</span><span class="n">Flying</span><span class="p">,</span> <span class="n">Psychic</span><span class="p">]</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Grass</span> <span class="o">|</span> <span class="p">[</span><span class="n">Psychic</span><span class="p">,</span> <span class="n">Poison</span><span class="p">]</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Fire</span> <span class="o">|</span> <span class="p">[</span><span class="n">Flying</span><span class="p">,</span> <span class="n">Dragon</span><span class="p">]</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Water</span> <span class="o">|</span> <span class="p">[</span><span class="n">Poison</span><span class="p">,</span> <span class="n">Ice</span><span class="p">,</span> <span class="n">Flying</span><span class="p">,</span> <span class="n">Fighting</span><span class="p">,</span> <span class="n">Psychic</span><span class="p">,</span> <span class="n">Dark</span><span class="p">]</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Ground</span> <span class="o">|</span> <span class="p">[</span><span class="n">Rock</span><span class="p">]</span> <span class="o">|</span>
<span class="o">+----------+------------------------------------------------+</span>
<span class="n">Data</span> <span class="n">truncated</span><span class="o">.</span>
</pre></div>
</div>
<p>Which approach you take should depend on your use case.</p>
</section>
<section id="null-treatment">
<h3>Null Treatment<a class="headerlink" href="#null-treatment" title="Link to this heading">¶</a></h3>
<p>This option allows you to either respect or ignore null values.</p>
<p>One common usage for handling nulls is the case where you want to find the first value within a
partition. By setting the null treatment to ignore nulls, we can find the first non-null value
in our partition.</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="n">In</span> <span class="p">[</span><span class="mi">17</span><span class="p">]:</span> <span class="kn">from</span><span class="w"> </span><span class="nn">datafusion.common</span><span class="w"> </span><span class="kn">import</span> <span class="n">NullTreatment</span>
<span class="n">In</span> <span class="p">[</span><span class="mi">18</span><span class="p">]:</span> <span class="n">df</span><span class="o">.</span><span class="n">aggregate</span><span class="p">([</span><span class="n">col_type_1</span><span class="p">],</span> <span class="p">[</span>
<span class="o">....</span><span class="p">:</span> <span class="n">f</span><span class="o">.</span><span class="n">first_value</span><span class="p">(</span>
<span class="o">....</span><span class="p">:</span> <span class="n">col_type_2</span><span class="p">,</span>
<span class="o">....</span><span class="p">:</span> <span class="n">order_by</span><span class="o">=</span><span class="p">[</span><span class="n">col_attack</span><span class="p">],</span>
<span class="o">....</span><span class="p">:</span> <span class="n">null_treatment</span><span class="o">=</span><span class="n">NullTreatment</span><span class="o">.</span><span class="n">RESPECT_NULLS</span>
<span class="o">....</span><span class="p">:</span> <span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">&quot;Lowest Attack Type 2&quot;</span><span class="p">)])</span>
<span class="o">....</span><span class="p">:</span>
<span class="n">Out</span><span class="p">[</span><span class="mi">18</span><span class="p">]:</span>
<span class="n">DataFrame</span><span class="p">()</span>
<span class="o">+----------+----------------------+</span>
<span class="o">|</span> <span class="n">Type</span> <span class="mi">1</span> <span class="o">|</span> <span class="n">Lowest</span> <span class="n">Attack</span> <span class="n">Type</span> <span class="mi">2</span> <span class="o">|</span>
<span class="o">+----------+----------------------+</span>
<span class="o">|</span> <span class="n">Bug</span> <span class="o">|</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Poison</span> <span class="o">|</span> <span class="n">Flying</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Electric</span> <span class="o">|</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Fairy</span> <span class="o">|</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Normal</span> <span class="o">|</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Ice</span> <span class="o">|</span> <span class="n">Psychic</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Grass</span> <span class="o">|</span> <span class="n">Psychic</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Fire</span> <span class="o">|</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Water</span> <span class="o">|</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Ground</span> <span class="o">|</span> <span class="o">|</span>
<span class="o">+----------+----------------------+</span>
<span class="n">Data</span> <span class="n">truncated</span><span class="o">.</span>
<span class="n">In</span> <span class="p">[</span><span class="mi">19</span><span class="p">]:</span> <span class="n">df</span><span class="o">.</span><span class="n">aggregate</span><span class="p">([</span><span class="n">col_type_1</span><span class="p">],</span> <span class="p">[</span>
<span class="o">....</span><span class="p">:</span> <span class="n">f</span><span class="o">.</span><span class="n">first_value</span><span class="p">(</span>
<span class="o">....</span><span class="p">:</span> <span class="n">col_type_2</span><span class="p">,</span>
<span class="o">....</span><span class="p">:</span> <span class="n">order_by</span><span class="o">=</span><span class="p">[</span><span class="n">col_attack</span><span class="p">],</span>
<span class="o">....</span><span class="p">:</span> <span class="n">null_treatment</span><span class="o">=</span><span class="n">NullTreatment</span><span class="o">.</span><span class="n">IGNORE_NULLS</span>
<span class="o">....</span><span class="p">:</span> <span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">&quot;Lowest Attack Type 2&quot;</span><span class="p">)])</span>
<span class="o">....</span><span class="p">:</span>
<span class="n">Out</span><span class="p">[</span><span class="mi">19</span><span class="p">]:</span>
<span class="n">DataFrame</span><span class="p">()</span>
<span class="o">+----------+----------------------+</span>
<span class="o">|</span> <span class="n">Type</span> <span class="mi">1</span> <span class="o">|</span> <span class="n">Lowest</span> <span class="n">Attack</span> <span class="n">Type</span> <span class="mi">2</span> <span class="o">|</span>
<span class="o">+----------+----------------------+</span>
<span class="o">|</span> <span class="n">Bug</span> <span class="o">|</span> <span class="n">Poison</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Poison</span> <span class="o">|</span> <span class="n">Flying</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Electric</span> <span class="o">|</span> <span class="n">Steel</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Fairy</span> <span class="o">|</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Normal</span> <span class="o">|</span> <span class="n">Flying</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Ice</span> <span class="o">|</span> <span class="n">Psychic</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Grass</span> <span class="o">|</span> <span class="n">Psychic</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Fire</span> <span class="o">|</span> <span class="n">Flying</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Water</span> <span class="o">|</span> <span class="n">Poison</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Ground</span> <span class="o">|</span> <span class="n">Rock</span> <span class="o">|</span>
<span class="o">+----------+----------------------+</span>
<span class="n">Data</span> <span class="n">truncated</span><span class="o">.</span>
</pre></div>
</div>
</section>
<section id="filter">
<h3>Filter<a class="headerlink" href="#filter" title="Link to this heading">¶</a></h3>
<p>Using the filter option is useful for filtering results to include in the aggregate function. It can
be seen in the example above on how this can be useful to only filter rows evaluated by the
aggregate function without filtering rows from the entire DataFrame.</p>
<p>Filter takes a single expression.</p>
<p>Suppose we want to find the speed values for only Pokemon that have low Attack values.</p>
<div class="highlight-ipython notranslate"><div class="highlight"><pre><span></span><span class="n">In</span> <span class="p">[</span><span class="mi">20</span><span class="p">]:</span> <span class="n">df</span><span class="o">.</span><span class="n">aggregate</span><span class="p">([</span><span class="n">col_type_1</span><span class="p">],</span> <span class="p">[</span>
<span class="o">....</span><span class="p">:</span> <span class="n">f</span><span class="o">.</span><span class="n">avg</span><span class="p">(</span><span class="n">col_speed</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">&quot;Avg Speed All&quot;</span><span class="p">),</span>
<span class="o">....</span><span class="p">:</span> <span class="n">f</span><span class="o">.</span><span class="n">avg</span><span class="p">(</span><span class="n">col_speed</span><span class="p">,</span> <span class="nb">filter</span><span class="o">=</span><span class="n">col_attack</span> <span class="o">&lt;</span> <span class="n">lit</span><span class="p">(</span><span class="mi">50</span><span class="p">))</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">&quot;Avg Speed Low Attack&quot;</span><span class="p">)])</span>
<span class="o">....</span><span class="p">:</span>
<span class="n">Out</span><span class="p">[</span><span class="mi">20</span><span class="p">]:</span>
<span class="n">DataFrame</span><span class="p">()</span>
<span class="o">+----------+--------------------+----------------------+</span>
<span class="o">|</span> <span class="n">Type</span> <span class="mi">1</span> <span class="o">|</span> <span class="n">Avg</span> <span class="n">Speed</span> <span class="n">All</span> <span class="o">|</span> <span class="n">Avg</span> <span class="n">Speed</span> <span class="n">Low</span> <span class="n">Attack</span> <span class="o">|</span>
<span class="o">+----------+--------------------+----------------------+</span>
<span class="o">|</span> <span class="n">Bug</span> <span class="o">|</span> <span class="mf">66.78571428571429</span> <span class="o">|</span> <span class="mf">46.0</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Poison</span> <span class="o">|</span> <span class="mf">58.785714285714285</span> <span class="o">|</span> <span class="mf">48.0</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Electric</span> <span class="o">|</span> <span class="mf">98.88888888888889</span> <span class="o">|</span> <span class="mf">72.5</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Fairy</span> <span class="o">|</span> <span class="mf">47.5</span> <span class="o">|</span> <span class="mf">35.0</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Normal</span> <span class="o">|</span> <span class="mf">72.75</span> <span class="o">|</span> <span class="mf">52.8</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Ice</span> <span class="o">|</span> <span class="mf">90.0</span> <span class="o">|</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Grass</span> <span class="o">|</span> <span class="mf">54.23076923076923</span> <span class="o">|</span> <span class="mf">42.5</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Fire</span> <span class="o">|</span> <span class="mf">86.28571428571429</span> <span class="o">|</span> <span class="mf">65.0</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Water</span> <span class="o">|</span> <span class="mf">67.25806451612904</span> <span class="o">|</span> <span class="mf">63.833333333333336</span> <span class="o">|</span>
<span class="o">|</span> <span class="n">Ground</span> <span class="o">|</span> <span class="mf">58.125</span> <span class="o">|</span> <span class="o">|</span>
<span class="o">+----------+--------------------+----------------------+</span>
<span class="n">Data</span> <span class="n">truncated</span><span class="o">.</span>
</pre></div>
</div>
</section>
</section>
<section id="aggregate-functions">
<h2>Aggregate Functions<a class="headerlink" href="#aggregate-functions" title="Link to this heading">¶</a></h2>
<p>The available aggregate functions are:</p>
<ol class="arabic simple">
<li><dl class="simple">
<dt>Comparison Functions</dt><dd><ul class="simple">
<li><p><a class="reference internal" href="../../autoapi/datafusion/functions/index.html#datafusion.functions.min" title="datafusion.functions.min"><code class="xref py py-func docutils literal notranslate"><span class="pre">datafusion.functions.min()</span></code></a></p></li>
<li><p><a class="reference internal" href="../../autoapi/datafusion/functions/index.html#datafusion.functions.max" title="datafusion.functions.max"><code class="xref py py-func docutils literal notranslate"><span class="pre">datafusion.functions.max()</span></code></a></p></li>
</ul>
</dd>
</dl>
</li>
<li><dl class="simple">
<dt>Math Functions</dt><dd><ul class="simple">
<li><p><a class="reference internal" href="../../autoapi/datafusion/functions/index.html#datafusion.functions.sum" title="datafusion.functions.sum"><code class="xref py py-func docutils literal notranslate"><span class="pre">datafusion.functions.sum()</span></code></a></p></li>
<li><p><a class="reference internal" href="../../autoapi/datafusion/functions/index.html#datafusion.functions.avg" title="datafusion.functions.avg"><code class="xref py py-func docutils literal notranslate"><span class="pre">datafusion.functions.avg()</span></code></a></p></li>
<li><p><a class="reference internal" href="../../autoapi/datafusion/functions/index.html#datafusion.functions.median" title="datafusion.functions.median"><code class="xref py py-func docutils literal notranslate"><span class="pre">datafusion.functions.median()</span></code></a></p></li>
</ul>
</dd>
</dl>
</li>
<li><dl class="simple">
<dt>Array Functions</dt><dd><ul class="simple">
<li><p><a class="reference internal" href="../../autoapi/datafusion/functions/index.html#datafusion.functions.array_agg" title="datafusion.functions.array_agg"><code class="xref py py-func docutils literal notranslate"><span class="pre">datafusion.functions.array_agg()</span></code></a></p></li>
</ul>
</dd>
</dl>
</li>
<li><dl class="simple">
<dt>Logical Functions</dt><dd><ul class="simple">
<li><p><a class="reference internal" href="../../autoapi/datafusion/functions/index.html#datafusion.functions.bit_and" title="datafusion.functions.bit_and"><code class="xref py py-func docutils literal notranslate"><span class="pre">datafusion.functions.bit_and()</span></code></a></p></li>
<li><p><a class="reference internal" href="../../autoapi/datafusion/functions/index.html#datafusion.functions.bit_or" title="datafusion.functions.bit_or"><code class="xref py py-func docutils literal notranslate"><span class="pre">datafusion.functions.bit_or()</span></code></a></p></li>
<li><p><a class="reference internal" href="../../autoapi/datafusion/functions/index.html#datafusion.functions.bit_xor" title="datafusion.functions.bit_xor"><code class="xref py py-func docutils literal notranslate"><span class="pre">datafusion.functions.bit_xor()</span></code></a></p></li>
<li><p><a class="reference internal" href="../../autoapi/datafusion/functions/index.html#datafusion.functions.bool_and" title="datafusion.functions.bool_and"><code class="xref py py-func docutils literal notranslate"><span class="pre">datafusion.functions.bool_and()</span></code></a></p></li>
<li><p><a class="reference internal" href="../../autoapi/datafusion/functions/index.html#datafusion.functions.bool_or" title="datafusion.functions.bool_or"><code class="xref py py-func docutils literal notranslate"><span class="pre">datafusion.functions.bool_or()</span></code></a></p></li>
</ul>
</dd>
</dl>
</li>
<li><dl class="simple">
<dt>Statistical Functions</dt><dd><ul class="simple">
<li><p><a class="reference internal" href="../../autoapi/datafusion/functions/index.html#datafusion.functions.count" title="datafusion.functions.count"><code class="xref py py-func docutils literal notranslate"><span class="pre">datafusion.functions.count()</span></code></a></p></li>
<li><p><a class="reference internal" href="../../autoapi/datafusion/functions/index.html#datafusion.functions.corr" title="datafusion.functions.corr"><code class="xref py py-func docutils literal notranslate"><span class="pre">datafusion.functions.corr()</span></code></a></p></li>
<li><p><a class="reference internal" href="../../autoapi/datafusion/functions/index.html#datafusion.functions.covar_samp" title="datafusion.functions.covar_samp"><code class="xref py py-func docutils literal notranslate"><span class="pre">datafusion.functions.covar_samp()</span></code></a></p></li>
<li><p><a class="reference internal" href="../../autoapi/datafusion/functions/index.html#datafusion.functions.covar_pop" title="datafusion.functions.covar_pop"><code class="xref py py-func docutils literal notranslate"><span class="pre">datafusion.functions.covar_pop()</span></code></a></p></li>
<li><p><a class="reference internal" href="../../autoapi/datafusion/functions/index.html#datafusion.functions.stddev" title="datafusion.functions.stddev"><code class="xref py py-func docutils literal notranslate"><span class="pre">datafusion.functions.stddev()</span></code></a></p></li>
<li><p><a class="reference internal" href="../../autoapi/datafusion/functions/index.html#datafusion.functions.stddev_pop" title="datafusion.functions.stddev_pop"><code class="xref py py-func docutils literal notranslate"><span class="pre">datafusion.functions.stddev_pop()</span></code></a></p></li>
<li><p><a class="reference internal" href="../../autoapi/datafusion/functions/index.html#datafusion.functions.var_samp" title="datafusion.functions.var_samp"><code class="xref py py-func docutils literal notranslate"><span class="pre">datafusion.functions.var_samp()</span></code></a></p></li>
<li><p><a class="reference internal" href="../../autoapi/datafusion/functions/index.html#datafusion.functions.var_pop" title="datafusion.functions.var_pop"><code class="xref py py-func docutils literal notranslate"><span class="pre">datafusion.functions.var_pop()</span></code></a></p></li>
</ul>
</dd>
</dl>
</li>
<li><dl class="simple">
<dt>Linear Regression Functions</dt><dd><ul class="simple">
<li><p><a class="reference internal" href="../../autoapi/datafusion/functions/index.html#datafusion.functions.regr_count" title="datafusion.functions.regr_count"><code class="xref py py-func docutils literal notranslate"><span class="pre">datafusion.functions.regr_count()</span></code></a></p></li>
<li><p><a class="reference internal" href="../../autoapi/datafusion/functions/index.html#datafusion.functions.regr_slope" title="datafusion.functions.regr_slope"><code class="xref py py-func docutils literal notranslate"><span class="pre">datafusion.functions.regr_slope()</span></code></a></p></li>
<li><p><a class="reference internal" href="../../autoapi/datafusion/functions/index.html#datafusion.functions.regr_intercept" title="datafusion.functions.regr_intercept"><code class="xref py py-func docutils literal notranslate"><span class="pre">datafusion.functions.regr_intercept()</span></code></a></p></li>
<li><p><a class="reference internal" href="../../autoapi/datafusion/functions/index.html#datafusion.functions.regr_r2" title="datafusion.functions.regr_r2"><code class="xref py py-func docutils literal notranslate"><span class="pre">datafusion.functions.regr_r2()</span></code></a></p></li>
<li><p><a class="reference internal" href="../../autoapi/datafusion/functions/index.html#datafusion.functions.regr_avgx" title="datafusion.functions.regr_avgx"><code class="xref py py-func docutils literal notranslate"><span class="pre">datafusion.functions.regr_avgx()</span></code></a></p></li>
<li><p><a class="reference internal" href="../../autoapi/datafusion/functions/index.html#datafusion.functions.regr_avgy" title="datafusion.functions.regr_avgy"><code class="xref py py-func docutils literal notranslate"><span class="pre">datafusion.functions.regr_avgy()</span></code></a></p></li>
<li><p><a class="reference internal" href="../../autoapi/datafusion/functions/index.html#datafusion.functions.regr_sxx" title="datafusion.functions.regr_sxx"><code class="xref py py-func docutils literal notranslate"><span class="pre">datafusion.functions.regr_sxx()</span></code></a></p></li>
<li><p><a class="reference internal" href="../../autoapi/datafusion/functions/index.html#datafusion.functions.regr_syy" title="datafusion.functions.regr_syy"><code class="xref py py-func docutils literal notranslate"><span class="pre">datafusion.functions.regr_syy()</span></code></a></p></li>
<li><p><a class="reference internal" href="../../autoapi/datafusion/functions/index.html#datafusion.functions.regr_slope" title="datafusion.functions.regr_slope"><code class="xref py py-func docutils literal notranslate"><span class="pre">datafusion.functions.regr_slope()</span></code></a></p></li>
</ul>
</dd>
</dl>
</li>
<li><dl class="simple">
<dt>Positional Functions</dt><dd><ul class="simple">
<li><p><a class="reference internal" href="../../autoapi/datafusion/functions/index.html#datafusion.functions.first_value" title="datafusion.functions.first_value"><code class="xref py py-func docutils literal notranslate"><span class="pre">datafusion.functions.first_value()</span></code></a></p></li>
<li><p><a class="reference internal" href="../../autoapi/datafusion/functions/index.html#datafusion.functions.last_value" title="datafusion.functions.last_value"><code class="xref py py-func docutils literal notranslate"><span class="pre">datafusion.functions.last_value()</span></code></a></p></li>
<li><p><a class="reference internal" href="../../autoapi/datafusion/functions/index.html#datafusion.functions.nth_value" title="datafusion.functions.nth_value"><code class="xref py py-func docutils literal notranslate"><span class="pre">datafusion.functions.nth_value()</span></code></a></p></li>
</ul>
</dd>
</dl>
</li>
<li><dl class="simple">
<dt>String Functions</dt><dd><ul class="simple">
<li><p><a class="reference internal" href="../../autoapi/datafusion/functions/index.html#datafusion.functions.string_agg" title="datafusion.functions.string_agg"><code class="xref py py-func docutils literal notranslate"><span class="pre">datafusion.functions.string_agg()</span></code></a></p></li>
</ul>
</dd>
</dl>
</li>
<li><dl class="simple">
<dt>Approximation Functions</dt><dd><ul class="simple">
<li><p><a class="reference internal" href="../../autoapi/datafusion/functions/index.html#datafusion.functions.approx_distinct" title="datafusion.functions.approx_distinct"><code class="xref py py-func docutils literal notranslate"><span class="pre">datafusion.functions.approx_distinct()</span></code></a></p></li>
<li><p><a class="reference internal" href="../../autoapi/datafusion/functions/index.html#datafusion.functions.approx_median" title="datafusion.functions.approx_median"><code class="xref py py-func docutils literal notranslate"><span class="pre">datafusion.functions.approx_median()</span></code></a></p></li>
<li><p><a class="reference internal" href="../../autoapi/datafusion/functions/index.html#datafusion.functions.approx_percentile_cont" title="datafusion.functions.approx_percentile_cont"><code class="xref py py-func docutils literal notranslate"><span class="pre">datafusion.functions.approx_percentile_cont()</span></code></a></p></li>
<li><p><a class="reference internal" href="../../autoapi/datafusion/functions/index.html#datafusion.functions.approx_percentile_cont_with_weight" title="datafusion.functions.approx_percentile_cont_with_weight"><code class="xref py py-func docutils literal notranslate"><span class="pre">datafusion.functions.approx_percentile_cont_with_weight()</span></code></a></p></li>
</ul>
</dd>
</dl>
</li>
</ol>
</section>
</section>
</div>
<!-- Previous / next buttons -->
<div class='prev-next-area'>
<a class='left-prev' id="prev-link" href="functions.html" title="previous page">
<i class="fas fa-angle-left"></i>
<div class="prev-next-info">
<p class="prev-next-subtitle">previous</p>
<p class="prev-next-title">Functions</p>
</div>
</a>
<a class='right-next' id="next-link" href="windows.html" title="next page">
<div class="prev-next-info">
<p class="prev-next-subtitle">next</p>
<p class="prev-next-title">Window Functions</p>
</div>
<i class="fas fa-angle-right"></i>
</a>
</div>
</main>
</div>
</div>
<script src="../../_static/scripts/pydata-sphinx-theme.js?digest=1999514e3f237ded88cf"></script>
<!-- Based on pydata_sphinx_theme/footer.html -->
<footer class="footer mt-5 mt-md-0">
<div class="container">
<div class="footer-item">
<p class="copyright">
&copy; Copyright 2019-2024, Apache Software Foundation.<br>
</p>
</div>
<div class="footer-item">
<p class="sphinx-version">
Created using <a href="http://sphinx-doc.org/">Sphinx</a> 8.1.3.<br>
</p>
</div>
<div class="footer-item">
<p>Apache Arrow DataFusion, Arrow DataFusion, Apache, the Apache feather logo, and the Apache Arrow DataFusion project logo</p>
<p>are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.</p>
</div>
</div>
</footer>
</body>
</html>