blob: 7395c7054c5234e9451eef45fff9d6e10474d38c [file] [log] [blame]
<!DOCTYPE html>
<html class="no-js">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>User Defined Aggregate Functions (UDAFs) - Spark 4.1.0-preview1 Documentation</title>
<link rel="stylesheet" href="css/bootstrap.min.css">
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=DM+Sans:ital,wght@0,400;0,500;0,700;1,400;1,500;1,700&Courier+Prime:wght@400;700&display=swap" rel="stylesheet">
<link href="css/custom.css" rel="stylesheet">
<script src="js/vendor/modernizr-2.6.1-respond-1.1.0.min.js"></script>
<link rel="stylesheet" href="css/pygments-default.css">
<link rel="stylesheet" href="css/docsearch.min.css" />
<link rel="stylesheet" href="css/docsearch.css">
<!-- Matomo -->
<script>
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
_paq.push(["disableCookies"]);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '40']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo Code -->
</head>
<body class="global">
<!-- This code is taken from http://twitter.github.com/bootstrap/examples/hero.html -->
<nav class="navbar navbar-expand-lg navbar-dark p-0 px-4 fixed-top" style="background: #1d6890;" id="topbar">
<div class="navbar-brand"><a href="index.html">
<img src="https://spark.apache.org/images/spark-logo-rev.svg" width="141" height="72"/></a><span class="version">4.1.0-preview1</span>
</div>
<button class="navbar-toggler" type="button" data-toggle="collapse"
data-target="#navbarCollapse" aria-controls="navbarCollapse"
aria-expanded="false" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
</button>
<div class="collapse navbar-collapse" id="navbarCollapse">
<ul class="navbar-nav me-auto">
<li class="nav-item"><a href="index.html" class="nav-link">Overview</a></li>
<li class="nav-item dropdown">
<a href="#" class="nav-link dropdown-toggle" id="navbarQuickStart" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">Programming Guides</a>
<div class="dropdown-menu" aria-labelledby="navbarQuickStart">
<a class="dropdown-item" href="quick-start.html">Quick Start</a>
<a class="dropdown-item" href="rdd-programming-guide.html">RDDs, Accumulators, Broadcasts Vars</a>
<a class="dropdown-item" href="sql-programming-guide.html">SQL, DataFrames, and Datasets</a>
<a class="dropdown-item" href="streaming/index.html">Structured Streaming</a>
<a class="dropdown-item" href="streaming-programming-guide.html">Spark Streaming (DStreams)</a>
<a class="dropdown-item" href="ml-guide.html">MLlib (Machine Learning)</a>
<a class="dropdown-item" href="graphx-programming-guide.html">GraphX (Graph Processing)</a>
<a class="dropdown-item" href="sparkr.html">SparkR (R on Spark)</a>
<a class="dropdown-item" href="api/python/getting_started/index.html">PySpark (Python on Spark)</a>
<a class="dropdown-item" href="declarative-pipelines-programming-guide.html">Declarative Pipelines</a>
</div>
</li>
<li class="nav-item dropdown">
<a href="#" class="nav-link dropdown-toggle" id="navbarAPIDocs" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">API Docs</a>
<div class="dropdown-menu" aria-labelledby="navbarAPIDocs">
<a class="dropdown-item" href="api/python/index.html">Python</a>
<a class="dropdown-item" href="api/scala/org/apache/spark/index.html">Scala</a>
<a class="dropdown-item" href="api/java/index.html">Java</a>
<a class="dropdown-item" href="api/R/index.html">R</a>
<a class="dropdown-item" href="api/sql/index.html">SQL, Built-in Functions</a>
</div>
</li>
<li class="nav-item dropdown">
<a href="#" class="nav-link dropdown-toggle" id="navbarDeploying" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">Deploying</a>
<div class="dropdown-menu" aria-labelledby="navbarDeploying">
<a class="dropdown-item" href="cluster-overview.html">Overview</a>
<a class="dropdown-item" href="submitting-applications.html">Submitting Applications</a>
<div class="dropdown-divider"></div>
<a class="dropdown-item" href="spark-standalone.html">Spark Standalone</a>
<a class="dropdown-item" href="running-on-yarn.html">YARN</a>
<a class="dropdown-item" href="running-on-kubernetes.html">Kubernetes</a>
</div>
</li>
<li class="nav-item dropdown">
<a href="#" class="nav-link dropdown-toggle" id="navbarMore" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">More</a>
<div class="dropdown-menu" aria-labelledby="navbarMore">
<a class="dropdown-item" href="configuration.html">Configuration</a>
<a class="dropdown-item" href="monitoring.html">Monitoring</a>
<a class="dropdown-item" href="tuning.html">Tuning Guide</a>
<a class="dropdown-item" href="job-scheduling.html">Job Scheduling</a>
<a class="dropdown-item" href="security.html">Security</a>
<a class="dropdown-item" href="hardware-provisioning.html">Hardware Provisioning</a>
<a class="dropdown-item" href="migration-guide.html">Migration Guide</a>
<div class="dropdown-divider"></div>
<a class="dropdown-item" href="building-spark.html">Building Spark</a>
<a class="dropdown-item" href="https://spark.apache.org/contributing.html">Contributing to Spark</a>
<a class="dropdown-item" href="https://spark.apache.org/third-party-projects.html">Third Party Projects</a>
</div>
</li>
<li class="nav-item">
<input type="text" id="docsearch-input" placeholder="Search the docs…">
</li>
</ul>
<!--<span class="navbar-text navbar-right"><span class="version-text">v4.1.0-preview1</span></span>-->
</div>
</nav>
<div class="container">
<div class="left-menu-wrapper">
<div class="left-menu">
<h3><a href="sql-programming-guide.html">Spark SQL Guide</a></h3>
<ul>
<li>
<a href="sql-getting-started.html">
Getting Started
</a>
</li>
<li>
<a href="sql-data-sources.html">
Data Sources
</a>
</li>
<li>
<a href="sql-performance-tuning.html">
Performance Tuning
</a>
</li>
<li>
<a href="sql-distributed-sql-engine.html">
Distributed SQL Engine
</a>
</li>
<li>
<a href="sql-pyspark-pandas-with-arrow.html">
PySpark Usage Guide for Pandas with Apache Arrow
</a>
</li>
<li>
<a href="sql-migration-guide.html">
Migration Guide
</a>
</li>
<li>
<a href="sql-ref.html">
SQL Reference
</a>
</li>
<ul>
<li>
<a href="sql-ref-ansi-compliance.html">
ANSI Compliance
</a>
</li>
<li>
<a href="sql-ref-datatypes.html">
Data Types
</a>
</li>
<li>
<a href="sql-ref-datetime-pattern.html">
Datetime Pattern
</a>
</li>
<li>
<a href="sql-ref-number-pattern.html">
Number Pattern
</a>
</li>
<li>
<a href="sql-ref-operators.html">
Operators
</a>
</li>
<li>
<a href="sql-ref-functions.html">
Functions
</a>
</li>
<li>
<a href="sql-ref-identifier.html">
Identifiers
</a>
</li>
<li>
<a href="sql-ref-identifier-clause.html">
IDENTIFIER clause
</a>
</li>
<li>
<a href="sql-ref-literals.html">
Literals
</a>
</li>
<li>
<a href="sql-ref-null-semantics.html">
Null Semantics
</a>
</li>
<li>
<a href="sql-ref-syntax.html">
SQL Syntax
</a>
</li>
</ul>
<li>
<a href="sql-error-conditions.html">
Error Conditions
</a>
</li>
</ul>
</div>
</div>
<input id="nav-trigger" class="nav-trigger" checked type="checkbox">
<label for="nav-trigger"></label>
<div class="content-with-sidebar mr-3" id="content">
<h1 class="title">User Defined Aggregate Functions (UDAFs)</h1>
<h3 id="description">Description</h3>
<p>User-Defined Aggregate Functions (UDAFs) are user-programmable routines that act on multiple rows at once and return a single aggregated value as a result. This documentation lists the classes that are required for creating and registering UDAFs. It also contains examples that demonstrate how to define and register UDAFs in Scala and invoke them in Spark SQL.</p>
<h3 id="aggregator-in-buf-out">Aggregator[-IN, BUF, OUT]</h3>
<p>A base class for user-defined aggregations, which can be used in Dataset operations to take all of the elements of a group and reduce them to a single value.</p>
<p><strong><em>IN</em></strong> - The input type for the aggregation.</p>
<p><strong><em>BUF</em></strong> - The type of the intermediate value of the reduction.</p>
<p><strong><em>OUT</em></strong> - The type of the final output result.</p>
<ul>
<li>
<p><strong>bufferEncoder: Encoder[BUF]</strong></p>
<p>Specifies the Encoder for the intermediate value type.</p>
</li>
<li>
<p><strong>finish(reduction: BUF): OUT</strong></p>
<p>Transform the output of the reduction.</p>
</li>
<li>
<p><strong>merge(b1: BUF, b2: BUF): BUF</strong></p>
<p>Merge two intermediate values.</p>
</li>
<li>
<p><strong>outputEncoder: Encoder[OUT]</strong></p>
<p>Specifies the Encoder for the final output value type.</p>
</li>
<li>
<p><strong>reduce(b: BUF, a: IN): BUF</strong></p>
<p>Aggregate input value <code class="language-plaintext highlighter-rouge">a</code> into current intermediate value. For performance, the function may modify <code class="language-plaintext highlighter-rouge">b</code> and return it instead of constructing new object for <code class="language-plaintext highlighter-rouge">b</code>.</p>
</li>
<li>
<p><strong>zero: BUF</strong></p>
<p>The initial value of the intermediate result for this aggregation.</p>
</li>
</ul>
<h3 id="examples">Examples</h3>
<h4 id="type-safe-user-defined-aggregate-functions">Type-Safe User-Defined Aggregate Functions</h4>
<p>User-defined aggregations for strongly typed Datasets revolve around the <a href="api/scala/org/apache/spark/sql/expressions/Aggregator.html">Aggregator</a> abstract class.
For example, a type-safe user-defined average can look like:</p>
<div class="codetabs">
<div data-lang="scala">
<div class="highlight"><pre class="codehilite"><code><span class="k">import</span> <span class="nn">org.apache.spark.sql.</span><span class="o">{</span><span class="nc">Encoder</span><span class="o">,</span> <span class="nc">Encoders</span><span class="o">,</span> <span class="nc">SparkSession</span><span class="o">}</span>
<span class="k">import</span> <span class="nn">org.apache.spark.sql.expressions.Aggregator</span>
<span class="k">case</span> <span class="k">class</span> <span class="nc">Employee</span><span class="o">(</span><span class="n">name</span><span class="k">:</span> <span class="kt">String</span><span class="o">,</span> <span class="n">salary</span><span class="k">:</span> <span class="kt">Long</span><span class="o">)</span>
<span class="k">case</span> <span class="k">class</span> <span class="nc">Average</span><span class="o">(</span><span class="k">var</span> <span class="n">sum</span><span class="k">:</span> <span class="kt">Long</span><span class="o">,</span> <span class="k">var</span> <span class="n">count</span><span class="k">:</span> <span class="kt">Long</span><span class="o">)</span>
<span class="k">object</span> <span class="nc">MyAverage</span> <span class="k">extends</span> <span class="nc">Aggregator</span><span class="o">[</span><span class="kt">Employee</span>, <span class="kt">Average</span>, <span class="kt">Double</span><span class="o">]</span> <span class="o">{</span>
<span class="c1">// A zero value for this aggregation. Should satisfy the property that any b + zero = b</span>
<span class="k">def</span> <span class="nf">zero</span><span class="k">:</span> <span class="kt">Average</span> <span class="o">=</span> <span class="nc">Average</span><span class="o">(</span><span class="mi">0L</span><span class="o">,</span> <span class="mi">0L</span><span class="o">)</span>
<span class="c1">// Combine two values to produce a new value. For performance, the function may modify `buffer`</span>
<span class="c1">// and return it instead of constructing a new object</span>
<span class="k">def</span> <span class="nf">reduce</span><span class="o">(</span><span class="n">buffer</span><span class="k">:</span> <span class="kt">Average</span><span class="o">,</span> <span class="n">employee</span><span class="k">:</span> <span class="kt">Employee</span><span class="o">)</span><span class="k">:</span> <span class="kt">Average</span> <span class="o">=</span> <span class="o">{</span>
<span class="nv">buffer</span><span class="o">.</span><span class="py">sum</span> <span class="o">+=</span> <span class="nv">employee</span><span class="o">.</span><span class="py">salary</span>
<span class="nv">buffer</span><span class="o">.</span><span class="py">count</span> <span class="o">+=</span> <span class="mi">1</span>
<span class="n">buffer</span>
<span class="o">}</span>
<span class="c1">// Merge two intermediate values</span>
<span class="k">def</span> <span class="nf">merge</span><span class="o">(</span><span class="n">b1</span><span class="k">:</span> <span class="kt">Average</span><span class="o">,</span> <span class="n">b2</span><span class="k">:</span> <span class="kt">Average</span><span class="o">)</span><span class="k">:</span> <span class="kt">Average</span> <span class="o">=</span> <span class="o">{</span>
<span class="nv">b1</span><span class="o">.</span><span class="py">sum</span> <span class="o">+=</span> <span class="nv">b2</span><span class="o">.</span><span class="py">sum</span>
<span class="nv">b1</span><span class="o">.</span><span class="py">count</span> <span class="o">+=</span> <span class="nv">b2</span><span class="o">.</span><span class="py">count</span>
<span class="n">b1</span>
<span class="o">}</span>
<span class="c1">// Transform the output of the reduction</span>
<span class="k">def</span> <span class="nf">finish</span><span class="o">(</span><span class="n">reduction</span><span class="k">:</span> <span class="kt">Average</span><span class="o">)</span><span class="k">:</span> <span class="kt">Double</span> <span class="o">=</span> <span class="nv">reduction</span><span class="o">.</span><span class="py">sum</span><span class="o">.</span><span class="py">toDouble</span> <span class="o">/</span> <span class="nv">reduction</span><span class="o">.</span><span class="py">count</span>
<span class="c1">// Specifies the Encoder for the intermediate value type</span>
<span class="k">def</span> <span class="nf">bufferEncoder</span><span class="k">:</span> <span class="kt">Encoder</span><span class="o">[</span><span class="kt">Average</span><span class="o">]</span> <span class="k">=</span> <span class="nv">Encoders</span><span class="o">.</span><span class="py">product</span>
<span class="c1">// Specifies the Encoder for the final output value type</span>
<span class="k">def</span> <span class="nf">outputEncoder</span><span class="k">:</span> <span class="kt">Encoder</span><span class="o">[</span><span class="kt">Double</span><span class="o">]</span> <span class="k">=</span> <span class="nv">Encoders</span><span class="o">.</span><span class="py">scalaDouble</span>
<span class="o">}</span>
<span class="k">val</span> <span class="nv">ds</span> <span class="k">=</span> <span class="nv">spark</span><span class="o">.</span><span class="py">read</span><span class="o">.</span><span class="py">json</span><span class="o">(</span><span class="s">"examples/src/main/resources/employees.json"</span><span class="o">).</span><span class="py">as</span><span class="o">[</span><span class="kt">Employee</span><span class="o">]</span>
<span class="nv">ds</span><span class="o">.</span><span class="py">show</span><span class="o">()</span>
<span class="c1">// +-------+------+</span>
<span class="c1">// | name|salary|</span>
<span class="c1">// +-------+------+</span>
<span class="c1">// |Michael| 3000|</span>
<span class="c1">// | Andy| 4500|</span>
<span class="c1">// | Justin| 3500|</span>
<span class="c1">// | Berta| 4000|</span>
<span class="c1">// +-------+------+</span>
<span class="c1">// Convert the function to a `TypedColumn` and give it a name</span>
<span class="k">val</span> <span class="nv">averageSalary</span> <span class="k">=</span> <span class="nv">MyAverage</span><span class="o">.</span><span class="py">toColumn</span><span class="o">.</span><span class="py">name</span><span class="o">(</span><span class="s">"average_salary"</span><span class="o">)</span>
<span class="k">val</span> <span class="nv">result</span> <span class="k">=</span> <span class="nv">ds</span><span class="o">.</span><span class="py">select</span><span class="o">(</span><span class="n">averageSalary</span><span class="o">)</span>
<span class="nv">result</span><span class="o">.</span><span class="py">show</span><span class="o">()</span>
<span class="c1">// +--------------+</span>
<span class="c1">// |average_salary|</span>
<span class="c1">// +--------------+</span>
<span class="c1">// | 3750.0|</span>
<span class="c1">// +--------------+</span></code></pre></div>
<div><small>Find full example code at "examples/src/main/scala/org/apache/spark/examples/sql/UserDefinedTypedAggregation.scala" in the Spark repo.</small></div>
</div>
<div data-lang="java">
<div class="highlight"><pre class="codehilite"><code><span class="kn">import</span> <span class="nn">java.io.Serializable</span><span class="o">;</span>
<span class="kn">import</span> <span class="nn">org.apache.spark.sql.Dataset</span><span class="o">;</span>
<span class="kn">import</span> <span class="nn">org.apache.spark.sql.Encoder</span><span class="o">;</span>
<span class="kn">import</span> <span class="nn">org.apache.spark.sql.Encoders</span><span class="o">;</span>
<span class="kn">import</span> <span class="nn">org.apache.spark.sql.SparkSession</span><span class="o">;</span>
<span class="kn">import</span> <span class="nn">org.apache.spark.sql.TypedColumn</span><span class="o">;</span>
<span class="kn">import</span> <span class="nn">org.apache.spark.sql.expressions.Aggregator</span><span class="o">;</span>
<span class="kd">public</span> <span class="kd">static</span> <span class="kd">class</span> <span class="nc">Employee</span> <span class="kd">implements</span> <span class="nc">Serializable</span> <span class="o">{</span>
<span class="kd">private</span> <span class="nc">String</span> <span class="n">name</span><span class="o">;</span>
<span class="kd">private</span> <span class="kt">long</span> <span class="n">salary</span><span class="o">;</span>
<span class="c1">// Constructors, getters, setters...</span>
<span class="o">}</span>
<span class="kd">public</span> <span class="kd">static</span> <span class="kd">class</span> <span class="nc">Average</span> <span class="kd">implements</span> <span class="nc">Serializable</span> <span class="o">{</span>
<span class="kd">private</span> <span class="kt">long</span> <span class="n">sum</span><span class="o">;</span>
<span class="kd">private</span> <span class="kt">long</span> <span class="n">count</span><span class="o">;</span>
<span class="c1">// Constructors, getters, setters...</span>
<span class="o">}</span>
<span class="kd">public</span> <span class="kd">static</span> <span class="kd">class</span> <span class="nc">MyAverage</span> <span class="kd">extends</span> <span class="nc">Aggregator</span><span class="o">&lt;</span><span class="nc">Employee</span><span class="o">,</span> <span class="nc">Average</span><span class="o">,</span> <span class="nc">Double</span><span class="o">&gt;</span> <span class="o">{</span>
<span class="c1">// A zero value for this aggregation. Should satisfy the property that any b + zero = b</span>
<span class="nd">@Override</span>
<span class="kd">public</span> <span class="nc">Average</span> <span class="nf">zero</span><span class="o">()</span> <span class="o">{</span>
<span class="k">return</span> <span class="k">new</span> <span class="nf">Average</span><span class="o">(</span><span class="mi">0L</span><span class="o">,</span> <span class="mi">0L</span><span class="o">);</span>
<span class="o">}</span>
<span class="c1">// Combine two values to produce a new value. For performance, the function may modify `buffer`</span>
<span class="c1">// and return it instead of constructing a new object</span>
<span class="nd">@Override</span>
<span class="kd">public</span> <span class="nc">Average</span> <span class="nf">reduce</span><span class="o">(</span><span class="nc">Average</span> <span class="n">buffer</span><span class="o">,</span> <span class="nc">Employee</span> <span class="n">employee</span><span class="o">)</span> <span class="o">{</span>
<span class="kt">long</span> <span class="n">newSum</span> <span class="o">=</span> <span class="n">buffer</span><span class="o">.</span><span class="na">getSum</span><span class="o">()</span> <span class="o">+</span> <span class="n">employee</span><span class="o">.</span><span class="na">getSalary</span><span class="o">();</span>
<span class="kt">long</span> <span class="n">newCount</span> <span class="o">=</span> <span class="n">buffer</span><span class="o">.</span><span class="na">getCount</span><span class="o">()</span> <span class="o">+</span> <span class="mi">1</span><span class="o">;</span>
<span class="n">buffer</span><span class="o">.</span><span class="na">setSum</span><span class="o">(</span><span class="n">newSum</span><span class="o">);</span>
<span class="n">buffer</span><span class="o">.</span><span class="na">setCount</span><span class="o">(</span><span class="n">newCount</span><span class="o">);</span>
<span class="k">return</span> <span class="n">buffer</span><span class="o">;</span>
<span class="o">}</span>
<span class="c1">// Merge two intermediate values</span>
<span class="nd">@Override</span>
<span class="kd">public</span> <span class="nc">Average</span> <span class="nf">merge</span><span class="o">(</span><span class="nc">Average</span> <span class="n">b1</span><span class="o">,</span> <span class="nc">Average</span> <span class="n">b2</span><span class="o">)</span> <span class="o">{</span>
<span class="kt">long</span> <span class="n">mergedSum</span> <span class="o">=</span> <span class="n">b1</span><span class="o">.</span><span class="na">getSum</span><span class="o">()</span> <span class="o">+</span> <span class="n">b2</span><span class="o">.</span><span class="na">getSum</span><span class="o">();</span>
<span class="kt">long</span> <span class="n">mergedCount</span> <span class="o">=</span> <span class="n">b1</span><span class="o">.</span><span class="na">getCount</span><span class="o">()</span> <span class="o">+</span> <span class="n">b2</span><span class="o">.</span><span class="na">getCount</span><span class="o">();</span>
<span class="n">b1</span><span class="o">.</span><span class="na">setSum</span><span class="o">(</span><span class="n">mergedSum</span><span class="o">);</span>
<span class="n">b1</span><span class="o">.</span><span class="na">setCount</span><span class="o">(</span><span class="n">mergedCount</span><span class="o">);</span>
<span class="k">return</span> <span class="n">b1</span><span class="o">;</span>
<span class="o">}</span>
<span class="c1">// Transform the output of the reduction</span>
<span class="nd">@Override</span>
<span class="kd">public</span> <span class="nc">Double</span> <span class="nf">finish</span><span class="o">(</span><span class="nc">Average</span> <span class="n">reduction</span><span class="o">)</span> <span class="o">{</span>
<span class="k">return</span> <span class="o">((</span><span class="kt">double</span><span class="o">)</span> <span class="n">reduction</span><span class="o">.</span><span class="na">getSum</span><span class="o">())</span> <span class="o">/</span> <span class="n">reduction</span><span class="o">.</span><span class="na">getCount</span><span class="o">();</span>
<span class="o">}</span>
<span class="c1">// Specifies the Encoder for the intermediate value type</span>
<span class="nd">@Override</span>
<span class="kd">public</span> <span class="nc">Encoder</span><span class="o">&lt;</span><span class="nc">Average</span><span class="o">&gt;</span> <span class="nf">bufferEncoder</span><span class="o">()</span> <span class="o">{</span>
<span class="k">return</span> <span class="nc">Encoders</span><span class="o">.</span><span class="na">bean</span><span class="o">(</span><span class="nc">Average</span><span class="o">.</span><span class="na">class</span><span class="o">);</span>
<span class="o">}</span>
<span class="c1">// Specifies the Encoder for the final output value type</span>
<span class="nd">@Override</span>
<span class="kd">public</span> <span class="nc">Encoder</span><span class="o">&lt;</span><span class="nc">Double</span><span class="o">&gt;</span> <span class="nf">outputEncoder</span><span class="o">()</span> <span class="o">{</span>
<span class="k">return</span> <span class="nc">Encoders</span><span class="o">.</span><span class="na">DOUBLE</span><span class="o">();</span>
<span class="o">}</span>
<span class="o">}</span>
<span class="nc">Encoder</span><span class="o">&lt;</span><span class="nc">Employee</span><span class="o">&gt;</span> <span class="n">employeeEncoder</span> <span class="o">=</span> <span class="nc">Encoders</span><span class="o">.</span><span class="na">bean</span><span class="o">(</span><span class="nc">Employee</span><span class="o">.</span><span class="na">class</span><span class="o">);</span>
<span class="nc">String</span> <span class="n">path</span> <span class="o">=</span> <span class="s">"examples/src/main/resources/employees.json"</span><span class="o">;</span>
<span class="nc">Dataset</span><span class="o">&lt;</span><span class="nc">Employee</span><span class="o">&gt;</span> <span class="n">ds</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="na">read</span><span class="o">().</span><span class="na">json</span><span class="o">(</span><span class="n">path</span><span class="o">).</span><span class="na">as</span><span class="o">(</span><span class="n">employeeEncoder</span><span class="o">);</span>
<span class="n">ds</span><span class="o">.</span><span class="na">show</span><span class="o">();</span>
<span class="c1">// +-------+------+</span>
<span class="c1">// | name|salary|</span>
<span class="c1">// +-------+------+</span>
<span class="c1">// |Michael| 3000|</span>
<span class="c1">// | Andy| 4500|</span>
<span class="c1">// | Justin| 3500|</span>
<span class="c1">// | Berta| 4000|</span>
<span class="c1">// +-------+------+</span>
<span class="nc">MyAverage</span> <span class="n">myAverage</span> <span class="o">=</span> <span class="k">new</span> <span class="nc">MyAverage</span><span class="o">();</span>
<span class="c1">// Convert the function to a `TypedColumn` and give it a name</span>
<span class="nc">TypedColumn</span><span class="o">&lt;</span><span class="nc">Employee</span><span class="o">,</span> <span class="nc">Double</span><span class="o">&gt;</span> <span class="n">averageSalary</span> <span class="o">=</span> <span class="n">myAverage</span><span class="o">.</span><span class="na">toColumn</span><span class="o">().</span><span class="na">name</span><span class="o">(</span><span class="s">"average_salary"</span><span class="o">);</span>
<span class="nc">Dataset</span><span class="o">&lt;</span><span class="nc">Double</span><span class="o">&gt;</span> <span class="n">result</span> <span class="o">=</span> <span class="n">ds</span><span class="o">.</span><span class="na">select</span><span class="o">(</span><span class="n">averageSalary</span><span class="o">);</span>
<span class="n">result</span><span class="o">.</span><span class="na">show</span><span class="o">();</span>
<span class="c1">// +--------------+</span>
<span class="c1">// |average_salary|</span>
<span class="c1">// +--------------+</span>
<span class="c1">// | 3750.0|</span>
<span class="c1">// +--------------+</span></code></pre></div>
<div><small>Find full example code at "examples/src/main/java/org/apache/spark/examples/sql/JavaUserDefinedTypedAggregation.java" in the Spark repo.</small></div>
</div>
</div>
<h4 id="untyped-user-defined-aggregate-functions">Untyped User-Defined Aggregate Functions</h4>
<p>Typed aggregations, as described above, may also be registered as untyped aggregating UDFs for use with DataFrames.
For example, a user-defined average for untyped DataFrames can look like:</p>
<div class="codetabs">
<div data-lang="scala">
<div class="highlight"><pre class="codehilite"><code><span class="k">import</span> <span class="nn">org.apache.spark.sql.</span><span class="o">{</span><span class="nc">Encoder</span><span class="o">,</span> <span class="nc">Encoders</span><span class="o">,</span> <span class="nc">SparkSession</span><span class="o">}</span>
<span class="k">import</span> <span class="nn">org.apache.spark.sql.expressions.Aggregator</span>
<span class="k">import</span> <span class="nn">org.apache.spark.sql.functions</span>
<span class="k">case</span> <span class="k">class</span> <span class="nc">Average</span><span class="o">(</span><span class="k">var</span> <span class="n">sum</span><span class="k">:</span> <span class="kt">Long</span><span class="o">,</span> <span class="k">var</span> <span class="n">count</span><span class="k">:</span> <span class="kt">Long</span><span class="o">)</span>
<span class="k">object</span> <span class="nc">MyAverage</span> <span class="k">extends</span> <span class="nc">Aggregator</span><span class="o">[</span><span class="kt">Long</span>, <span class="kt">Average</span>, <span class="kt">Double</span><span class="o">]</span> <span class="o">{</span>
<span class="c1">// A zero value for this aggregation. Should satisfy the property that any b + zero = b</span>
<span class="k">def</span> <span class="nf">zero</span><span class="k">:</span> <span class="kt">Average</span> <span class="o">=</span> <span class="nc">Average</span><span class="o">(</span><span class="mi">0L</span><span class="o">,</span> <span class="mi">0L</span><span class="o">)</span>
<span class="c1">// Combine two values to produce a new value. For performance, the function may modify `buffer`</span>
<span class="c1">// and return it instead of constructing a new object</span>
<span class="k">def</span> <span class="nf">reduce</span><span class="o">(</span><span class="n">buffer</span><span class="k">:</span> <span class="kt">Average</span><span class="o">,</span> <span class="n">data</span><span class="k">:</span> <span class="kt">Long</span><span class="o">)</span><span class="k">:</span> <span class="kt">Average</span> <span class="o">=</span> <span class="o">{</span>
<span class="nv">buffer</span><span class="o">.</span><span class="py">sum</span> <span class="o">+=</span> <span class="n">data</span>
<span class="nv">buffer</span><span class="o">.</span><span class="py">count</span> <span class="o">+=</span> <span class="mi">1</span>
<span class="n">buffer</span>
<span class="o">}</span>
<span class="c1">// Merge two intermediate values</span>
<span class="k">def</span> <span class="nf">merge</span><span class="o">(</span><span class="n">b1</span><span class="k">:</span> <span class="kt">Average</span><span class="o">,</span> <span class="n">b2</span><span class="k">:</span> <span class="kt">Average</span><span class="o">)</span><span class="k">:</span> <span class="kt">Average</span> <span class="o">=</span> <span class="o">{</span>
<span class="nv">b1</span><span class="o">.</span><span class="py">sum</span> <span class="o">+=</span> <span class="nv">b2</span><span class="o">.</span><span class="py">sum</span>
<span class="nv">b1</span><span class="o">.</span><span class="py">count</span> <span class="o">+=</span> <span class="nv">b2</span><span class="o">.</span><span class="py">count</span>
<span class="n">b1</span>
<span class="o">}</span>
<span class="c1">// Transform the output of the reduction</span>
<span class="k">def</span> <span class="nf">finish</span><span class="o">(</span><span class="n">reduction</span><span class="k">:</span> <span class="kt">Average</span><span class="o">)</span><span class="k">:</span> <span class="kt">Double</span> <span class="o">=</span> <span class="nv">reduction</span><span class="o">.</span><span class="py">sum</span><span class="o">.</span><span class="py">toDouble</span> <span class="o">/</span> <span class="nv">reduction</span><span class="o">.</span><span class="py">count</span>
<span class="c1">// Specifies the Encoder for the intermediate value type</span>
<span class="k">def</span> <span class="nf">bufferEncoder</span><span class="k">:</span> <span class="kt">Encoder</span><span class="o">[</span><span class="kt">Average</span><span class="o">]</span> <span class="k">=</span> <span class="nv">Encoders</span><span class="o">.</span><span class="py">product</span>
<span class="c1">// Specifies the Encoder for the final output value type</span>
<span class="k">def</span> <span class="nf">outputEncoder</span><span class="k">:</span> <span class="kt">Encoder</span><span class="o">[</span><span class="kt">Double</span><span class="o">]</span> <span class="k">=</span> <span class="nv">Encoders</span><span class="o">.</span><span class="py">scalaDouble</span>
<span class="o">}</span>
<span class="c1">// Register the function to access it</span>
<span class="nv">spark</span><span class="o">.</span><span class="py">udf</span><span class="o">.</span><span class="py">register</span><span class="o">(</span><span class="s">"myAverage"</span><span class="o">,</span> <span class="nv">functions</span><span class="o">.</span><span class="py">udaf</span><span class="o">(</span><span class="nc">MyAverage</span><span class="o">))</span>
<span class="k">val</span> <span class="nv">df</span> <span class="k">=</span> <span class="nv">spark</span><span class="o">.</span><span class="py">read</span><span class="o">.</span><span class="py">json</span><span class="o">(</span><span class="s">"examples/src/main/resources/employees.json"</span><span class="o">)</span>
<span class="nv">df</span><span class="o">.</span><span class="py">createOrReplaceTempView</span><span class="o">(</span><span class="s">"employees"</span><span class="o">)</span>
<span class="nv">df</span><span class="o">.</span><span class="py">show</span><span class="o">()</span>
<span class="c1">// +-------+------+</span>
<span class="c1">// | name|salary|</span>
<span class="c1">// +-------+------+</span>
<span class="c1">// |Michael| 3000|</span>
<span class="c1">// | Andy| 4500|</span>
<span class="c1">// | Justin| 3500|</span>
<span class="c1">// | Berta| 4000|</span>
<span class="c1">// +-------+------+</span>
<span class="k">val</span> <span class="nv">result</span> <span class="k">=</span> <span class="nv">spark</span><span class="o">.</span><span class="py">sql</span><span class="o">(</span><span class="s">"SELECT myAverage(salary) as average_salary FROM employees"</span><span class="o">)</span>
<span class="nv">result</span><span class="o">.</span><span class="py">show</span><span class="o">()</span>
<span class="c1">// +--------------+</span>
<span class="c1">// |average_salary|</span>
<span class="c1">// +--------------+</span>
<span class="c1">// | 3750.0|</span>
<span class="c1">// +--------------+</span></code></pre></div>
<div><small>Find full example code at "examples/src/main/scala/org/apache/spark/examples/sql/UserDefinedUntypedAggregation.scala" in the Spark repo.</small></div>
</div>
<div data-lang="java">
<div class="highlight"><pre class="codehilite"><code><span class="kn">import</span> <span class="nn">java.io.Serializable</span><span class="o">;</span>
<span class="kn">import</span> <span class="nn">org.apache.spark.sql.Dataset</span><span class="o">;</span>
<span class="kn">import</span> <span class="nn">org.apache.spark.sql.Encoder</span><span class="o">;</span>
<span class="kn">import</span> <span class="nn">org.apache.spark.sql.Encoders</span><span class="o">;</span>
<span class="kn">import</span> <span class="nn">org.apache.spark.sql.Row</span><span class="o">;</span>
<span class="kn">import</span> <span class="nn">org.apache.spark.sql.SparkSession</span><span class="o">;</span>
<span class="kn">import</span> <span class="nn">org.apache.spark.sql.expressions.Aggregator</span><span class="o">;</span>
<span class="kn">import</span> <span class="nn">org.apache.spark.sql.functions</span><span class="o">;</span>
<span class="kd">public</span> <span class="kd">static</span> <span class="kd">class</span> <span class="nc">Average</span> <span class="kd">implements</span> <span class="nc">Serializable</span> <span class="o">{</span>
<span class="kd">private</span> <span class="kt">long</span> <span class="n">sum</span><span class="o">;</span>
<span class="kd">private</span> <span class="kt">long</span> <span class="n">count</span><span class="o">;</span>
<span class="c1">// Constructors, getters, setters...</span>
<span class="kd">public</span> <span class="nf">Average</span><span class="o">()</span> <span class="o">{</span>
<span class="o">}</span>
<span class="kd">public</span> <span class="nf">Average</span><span class="o">(</span><span class="kt">long</span> <span class="n">sum</span><span class="o">,</span> <span class="kt">long</span> <span class="n">count</span><span class="o">)</span> <span class="o">{</span>
<span class="k">this</span><span class="o">.</span><span class="na">sum</span> <span class="o">=</span> <span class="n">sum</span><span class="o">;</span>
<span class="k">this</span><span class="o">.</span><span class="na">count</span> <span class="o">=</span> <span class="n">count</span><span class="o">;</span>
<span class="o">}</span>
<span class="kd">public</span> <span class="kt">long</span> <span class="nf">getSum</span><span class="o">()</span> <span class="o">{</span>
<span class="k">return</span> <span class="n">sum</span><span class="o">;</span>
<span class="o">}</span>
<span class="kd">public</span> <span class="kt">void</span> <span class="nf">setSum</span><span class="o">(</span><span class="kt">long</span> <span class="n">sum</span><span class="o">)</span> <span class="o">{</span>
<span class="k">this</span><span class="o">.</span><span class="na">sum</span> <span class="o">=</span> <span class="n">sum</span><span class="o">;</span>
<span class="o">}</span>
<span class="kd">public</span> <span class="kt">long</span> <span class="nf">getCount</span><span class="o">()</span> <span class="o">{</span>
<span class="k">return</span> <span class="n">count</span><span class="o">;</span>
<span class="o">}</span>
<span class="kd">public</span> <span class="kt">void</span> <span class="nf">setCount</span><span class="o">(</span><span class="kt">long</span> <span class="n">count</span><span class="o">)</span> <span class="o">{</span>
<span class="k">this</span><span class="o">.</span><span class="na">count</span> <span class="o">=</span> <span class="n">count</span><span class="o">;</span>
<span class="o">}</span>
<span class="o">}</span>
<span class="kd">public</span> <span class="kd">static</span> <span class="kd">class</span> <span class="nc">MyAverage</span> <span class="kd">extends</span> <span class="nc">Aggregator</span><span class="o">&lt;</span><span class="nc">Long</span><span class="o">,</span> <span class="nc">Average</span><span class="o">,</span> <span class="nc">Double</span><span class="o">&gt;</span> <span class="o">{</span>
<span class="c1">// A zero value for this aggregation. Should satisfy the property that any b + zero = b</span>
<span class="nd">@Override</span>
<span class="kd">public</span> <span class="nc">Average</span> <span class="nf">zero</span><span class="o">()</span> <span class="o">{</span>
<span class="k">return</span> <span class="k">new</span> <span class="nf">Average</span><span class="o">(</span><span class="mi">0L</span><span class="o">,</span> <span class="mi">0L</span><span class="o">);</span>
<span class="o">}</span>
<span class="c1">// Combine two values to produce a new value. For performance, the function may modify `buffer`</span>
<span class="c1">// and return it instead of constructing a new object</span>
<span class="nd">@Override</span>
<span class="kd">public</span> <span class="nc">Average</span> <span class="nf">reduce</span><span class="o">(</span><span class="nc">Average</span> <span class="n">buffer</span><span class="o">,</span> <span class="nc">Long</span> <span class="n">data</span><span class="o">)</span> <span class="o">{</span>
<span class="kt">long</span> <span class="n">newSum</span> <span class="o">=</span> <span class="n">buffer</span><span class="o">.</span><span class="na">getSum</span><span class="o">()</span> <span class="o">+</span> <span class="n">data</span><span class="o">;</span>
<span class="kt">long</span> <span class="n">newCount</span> <span class="o">=</span> <span class="n">buffer</span><span class="o">.</span><span class="na">getCount</span><span class="o">()</span> <span class="o">+</span> <span class="mi">1</span><span class="o">;</span>
<span class="n">buffer</span><span class="o">.</span><span class="na">setSum</span><span class="o">(</span><span class="n">newSum</span><span class="o">);</span>
<span class="n">buffer</span><span class="o">.</span><span class="na">setCount</span><span class="o">(</span><span class="n">newCount</span><span class="o">);</span>
<span class="k">return</span> <span class="n">buffer</span><span class="o">;</span>
<span class="o">}</span>
<span class="c1">// Merge two intermediate values</span>
<span class="nd">@Override</span>
<span class="kd">public</span> <span class="nc">Average</span> <span class="nf">merge</span><span class="o">(</span><span class="nc">Average</span> <span class="n">b1</span><span class="o">,</span> <span class="nc">Average</span> <span class="n">b2</span><span class="o">)</span> <span class="o">{</span>
<span class="kt">long</span> <span class="n">mergedSum</span> <span class="o">=</span> <span class="n">b1</span><span class="o">.</span><span class="na">getSum</span><span class="o">()</span> <span class="o">+</span> <span class="n">b2</span><span class="o">.</span><span class="na">getSum</span><span class="o">();</span>
<span class="kt">long</span> <span class="n">mergedCount</span> <span class="o">=</span> <span class="n">b1</span><span class="o">.</span><span class="na">getCount</span><span class="o">()</span> <span class="o">+</span> <span class="n">b2</span><span class="o">.</span><span class="na">getCount</span><span class="o">();</span>
<span class="n">b1</span><span class="o">.</span><span class="na">setSum</span><span class="o">(</span><span class="n">mergedSum</span><span class="o">);</span>
<span class="n">b1</span><span class="o">.</span><span class="na">setCount</span><span class="o">(</span><span class="n">mergedCount</span><span class="o">);</span>
<span class="k">return</span> <span class="n">b1</span><span class="o">;</span>
<span class="o">}</span>
<span class="c1">// Transform the output of the reduction</span>
<span class="nd">@Override</span>
<span class="kd">public</span> <span class="nc">Double</span> <span class="nf">finish</span><span class="o">(</span><span class="nc">Average</span> <span class="n">reduction</span><span class="o">)</span> <span class="o">{</span>
<span class="k">return</span> <span class="o">((</span><span class="kt">double</span><span class="o">)</span> <span class="n">reduction</span><span class="o">.</span><span class="na">getSum</span><span class="o">())</span> <span class="o">/</span> <span class="n">reduction</span><span class="o">.</span><span class="na">getCount</span><span class="o">();</span>
<span class="o">}</span>
<span class="c1">// Specifies the Encoder for the intermediate value type</span>
<span class="nd">@Override</span>
<span class="kd">public</span> <span class="nc">Encoder</span><span class="o">&lt;</span><span class="nc">Average</span><span class="o">&gt;</span> <span class="nf">bufferEncoder</span><span class="o">()</span> <span class="o">{</span>
<span class="k">return</span> <span class="nc">Encoders</span><span class="o">.</span><span class="na">bean</span><span class="o">(</span><span class="nc">Average</span><span class="o">.</span><span class="na">class</span><span class="o">);</span>
<span class="o">}</span>
<span class="c1">// Specifies the Encoder for the final output value type</span>
<span class="nd">@Override</span>
<span class="kd">public</span> <span class="nc">Encoder</span><span class="o">&lt;</span><span class="nc">Double</span><span class="o">&gt;</span> <span class="nf">outputEncoder</span><span class="o">()</span> <span class="o">{</span>
<span class="k">return</span> <span class="nc">Encoders</span><span class="o">.</span><span class="na">DOUBLE</span><span class="o">();</span>
<span class="o">}</span>
<span class="o">}</span>
<span class="c1">// Register the function to access it</span>
<span class="n">spark</span><span class="o">.</span><span class="na">udf</span><span class="o">().</span><span class="na">register</span><span class="o">(</span><span class="s">"myAverage"</span><span class="o">,</span> <span class="n">functions</span><span class="o">.</span><span class="na">udaf</span><span class="o">(</span><span class="k">new</span> <span class="nc">MyAverage</span><span class="o">(),</span> <span class="nc">Encoders</span><span class="o">.</span><span class="na">LONG</span><span class="o">()));</span>
<span class="nc">Dataset</span><span class="o">&lt;</span><span class="nc">Row</span><span class="o">&gt;</span> <span class="n">df</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="na">read</span><span class="o">().</span><span class="na">json</span><span class="o">(</span><span class="s">"examples/src/main/resources/employees.json"</span><span class="o">);</span>
<span class="n">df</span><span class="o">.</span><span class="na">createOrReplaceTempView</span><span class="o">(</span><span class="s">"employees"</span><span class="o">);</span>
<span class="n">df</span><span class="o">.</span><span class="na">show</span><span class="o">();</span>
<span class="c1">// +-------+------+</span>
<span class="c1">// | name|salary|</span>
<span class="c1">// +-------+------+</span>
<span class="c1">// |Michael| 3000|</span>
<span class="c1">// | Andy| 4500|</span>
<span class="c1">// | Justin| 3500|</span>
<span class="c1">// | Berta| 4000|</span>
<span class="c1">// +-------+------+</span>
<span class="nc">Dataset</span><span class="o">&lt;</span><span class="nc">Row</span><span class="o">&gt;</span> <span class="n">result</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="na">sql</span><span class="o">(</span><span class="s">"SELECT myAverage(salary) as average_salary FROM employees"</span><span class="o">);</span>
<span class="n">result</span><span class="o">.</span><span class="na">show</span><span class="o">();</span>
<span class="c1">// +--------------+</span>
<span class="c1">// |average_salary|</span>
<span class="c1">// +--------------+</span>
<span class="c1">// | 3750.0|</span>
<span class="c1">// +--------------+</span></code></pre></div>
<div><small>Find full example code at "examples/src/main/java/org/apache/spark/examples/sql/JavaUserDefinedUntypedAggregation.java" in the Spark repo.</small></div>
</div>
<div data-lang="SQL">
<div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="c1">-- Compile and place UDAF MyAverage in a JAR file called `MyAverage.jar` in /tmp.</span>
<span class="k">CREATE</span> <span class="k">FUNCTION</span> <span class="n">myAverage</span> <span class="k">AS</span> <span class="s1">'MyAverage'</span> <span class="k">USING</span> <span class="n">JAR</span> <span class="s1">'/tmp/MyAverage.jar'</span><span class="p">;</span>
<span class="k">SHOW</span> <span class="k">USER</span> <span class="n">FUNCTIONS</span><span class="p">;</span>
<span class="o">+</span><span class="c1">------------------+</span>
<span class="o">|</span> <span class="k">function</span><span class="o">|</span>
<span class="o">+</span><span class="c1">------------------+</span>
<span class="o">|</span> <span class="k">default</span><span class="p">.</span><span class="n">myAverage</span><span class="o">|</span>
<span class="o">+</span><span class="c1">------------------+</span>
<span class="k">CREATE</span> <span class="k">TEMPORARY</span> <span class="k">VIEW</span> <span class="n">employees</span>
<span class="k">USING</span> <span class="n">org</span><span class="p">.</span><span class="n">apache</span><span class="p">.</span><span class="n">spark</span><span class="p">.</span><span class="k">sql</span><span class="p">.</span><span class="n">json</span>
<span class="k">OPTIONS</span> <span class="p">(</span>
<span class="n">path</span> <span class="nv">"examples/src/main/resources/employees.json"</span>
<span class="p">);</span>
<span class="k">SELECT</span> <span class="o">*</span> <span class="k">FROM</span> <span class="n">employees</span><span class="p">;</span>
<span class="o">+</span><span class="c1">-------+------+</span>
<span class="o">|</span> <span class="n">name</span><span class="o">|</span><span class="n">salary</span><span class="o">|</span>
<span class="o">+</span><span class="c1">-------+------+</span>
<span class="o">|</span><span class="n">Michael</span><span class="o">|</span> <span class="mi">3000</span><span class="o">|</span>
<span class="o">|</span> <span class="n">Andy</span><span class="o">|</span> <span class="mi">4500</span><span class="o">|</span>
<span class="o">|</span> <span class="n">Justin</span><span class="o">|</span> <span class="mi">3500</span><span class="o">|</span>
<span class="o">|</span> <span class="n">Berta</span><span class="o">|</span> <span class="mi">4000</span><span class="o">|</span>
<span class="o">+</span><span class="c1">-------+------+</span>
<span class="k">SELECT</span> <span class="n">myAverage</span><span class="p">(</span><span class="n">salary</span><span class="p">)</span> <span class="k">as</span> <span class="n">average_salary</span> <span class="k">FROM</span> <span class="n">employees</span><span class="p">;</span>
<span class="o">+</span><span class="c1">--------------+</span>
<span class="o">|</span><span class="n">average_salary</span><span class="o">|</span>
<span class="o">+</span><span class="c1">--------------+</span>
<span class="o">|</span> <span class="mi">3750</span><span class="p">.</span><span class="mi">0</span><span class="o">|</span>
<span class="o">+</span><span class="c1">--------------+</span>
</code></pre></div> </div>
</div>
</div>
<h3 id="related-statements">Related Statements</h3>
<ul>
<li><a href="sql-ref-functions-udf-scalar.html">Scalar User Defined Functions (UDFs)</a></li>
<li><a href="sql-ref-functions-udf-hive.html">Integration with Hive UDFs/UDAFs/UDTFs</a></li>
</ul>
</div>
<!-- /container -->
</div>
<script src="js/vendor/jquery-3.5.1.min.js"></script>
<script src="js/vendor/bootstrap.bundle.min.js"></script>
<script src="js/vendor/anchor.min.js"></script>
<script src="js/main.js"></script>
<script type="text/javascript" src="js/vendor/docsearch.min.js"></script>
<script type="text/javascript">
// DocSearch is entirely free and automated. DocSearch is built in two parts:
// 1. a crawler which we run on our own infrastructure every 24 hours. It follows every link
// in your website and extract content from every page it traverses. It then pushes this
// content to an Algolia index.
// 2. a JavaScript snippet to be inserted in your website that will bind this Algolia index
// to your search input and display its results in a dropdown UI. If you want to find more
// details on how works DocSearch, check the docs of DocSearch.
docsearch({
apiKey: 'd62f962a82bc9abb53471cb7b89da35e',
appId: 'RAI69RXRSK',
indexName: 'apache_spark',
inputSelector: '#docsearch-input',
enhancedSearchInput: true,
algoliaOptions: {
'facetFilters': ["version:4.1.0-preview1"]
},
debug: false // Set debug to true if you want to inspect the dropdown
});
</script>
<!-- MathJax Section -->
<script type="text/x-mathjax-config">
MathJax.Hub.Config({
TeX: { equationNumbers: { autoNumber: "AMS" } }
});
</script>
<script>
// Note that we load MathJax this way to work with local file (file://), HTTP and HTTPS.
// We could use "//cdn.mathjax...", but that won't support "file://".
(function(d, script) {
script = d.createElement('script');
script.type = 'text/javascript';
script.async = true;
script.onload = function(){
MathJax.Hub.Config({
tex2jax: {
inlineMath: [ ["$", "$"], ["\\\\(","\\\\)"] ],
displayMath: [ ["$$","$$"], ["\\[", "\\]"] ],
processEscapes: true,
skipTags: ['script', 'noscript', 'style', 'textarea', 'pre']
}
});
};
script.src = ('https:' == document.location.protocol ? 'https://' : 'http://') +
'cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js' +
'?config=TeX-AMS-MML_HTMLorMML';
d.getElementsByTagName('head')[0].appendChild(script);
}(document));
</script>
</body>
</html>