blob: 434e5efb43afd1d6296a5e352d7d747df045a07a [file] [log] [blame]
<!DOCTYPE html>
<!--[if lt IE 7]> <html class="no-js lt-ie9 lt-ie8 lt-ie7"> <![endif]-->
<!--[if IE 7]> <html class="no-js lt-ie9 lt-ie8"> <![endif]-->
<!--[if IE 8]> <html class="no-js lt-ie9"> <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js"> <!--<![endif]-->
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
<title>User Defined Aggregate Functions (UDAFs) - Spark 3.0.0 Documentation</title>
<link rel="stylesheet" href="css/bootstrap.min.css">
<style>
body {
padding-top: 60px;
padding-bottom: 40px;
}
</style>
<meta name="viewport" content="width=device-width">
<link rel="stylesheet" href="css/bootstrap-responsive.min.css">
<link rel="stylesheet" href="css/main.css">
<script src="js/vendor/modernizr-2.6.1-respond-1.1.0.min.js"></script>
<link rel="stylesheet" href="css/pygments-default.css">
<!-- Google analytics script -->
<script type="text/javascript">
var _gaq = _gaq || [];
_gaq.push(['_setAccount', 'UA-32518208-2']);
_gaq.push(['_trackPageview']);
(function() {
var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
})();
</script>
</head>
<body>
<!--[if lt IE 7]>
<p class="chromeframe">You are using an outdated browser. <a href="https://browsehappy.com/">Upgrade your browser today</a> or <a href="http://www.google.com/chromeframe/?redirect=true">install Google Chrome Frame</a> to better experience this site.</p>
<![endif]-->
<!-- This code is taken from http://twitter.github.com/bootstrap/examples/hero.html -->
<div class="navbar navbar-fixed-top" id="topbar">
<div class="navbar-inner">
<div class="container">
<div class="brand"><a href="index.html">
<img src="img/spark-logo-hd.png" style="height:50px;"/></a><span class="version">3.0.0</span>
</div>
<ul class="nav">
<!--TODO(andyk): Add class="active" attribute to li some how.-->
<li><a href="index.html">Overview</a></li>
<li class="dropdown">
<a href="#" class="dropdown-toggle" data-toggle="dropdown">Programming Guides<b class="caret"></b></a>
<ul class="dropdown-menu">
<li><a href="quick-start.html">Quick Start</a></li>
<li><a href="rdd-programming-guide.html">RDDs, Accumulators, Broadcasts Vars</a></li>
<li><a href="sql-programming-guide.html">SQL, DataFrames, and Datasets</a></li>
<li><a href="structured-streaming-programming-guide.html">Structured Streaming</a></li>
<li><a href="streaming-programming-guide.html">Spark Streaming (DStreams)</a></li>
<li><a href="ml-guide.html">MLlib (Machine Learning)</a></li>
<li><a href="graphx-programming-guide.html">GraphX (Graph Processing)</a></li>
<li><a href="sparkr.html">SparkR (R on Spark)</a></li>
</ul>
</li>
<li class="dropdown">
<a href="#" class="dropdown-toggle" data-toggle="dropdown">API Docs<b class="caret"></b></a>
<ul class="dropdown-menu">
<li><a href="api/scala/org/apache/spark/index.html">Scala</a></li>
<li><a href="api/java/index.html">Java</a></li>
<li><a href="api/python/index.html">Python</a></li>
<li><a href="api/R/index.html">R</a></li>
<li><a href="api/sql/index.html">SQL, Built-in Functions</a></li>
</ul>
</li>
<li class="dropdown">
<a href="#" class="dropdown-toggle" data-toggle="dropdown">Deploying<b class="caret"></b></a>
<ul class="dropdown-menu">
<li><a href="cluster-overview.html">Overview</a></li>
<li><a href="submitting-applications.html">Submitting Applications</a></li>
<li class="divider"></li>
<li><a href="spark-standalone.html">Spark Standalone</a></li>
<li><a href="running-on-mesos.html">Mesos</a></li>
<li><a href="running-on-yarn.html">YARN</a></li>
<li><a href="running-on-kubernetes.html">Kubernetes</a></li>
</ul>
</li>
<li class="dropdown">
<a href="api.html" class="dropdown-toggle" data-toggle="dropdown">More<b class="caret"></b></a>
<ul class="dropdown-menu">
<li><a href="configuration.html">Configuration</a></li>
<li><a href="monitoring.html">Monitoring</a></li>
<li><a href="tuning.html">Tuning Guide</a></li>
<li><a href="job-scheduling.html">Job Scheduling</a></li>
<li><a href="security.html">Security</a></li>
<li><a href="hardware-provisioning.html">Hardware Provisioning</a></li>
<li><a href="migration-guide.html">Migration Guide</a></li>
<li class="divider"></li>
<li><a href="building-spark.html">Building Spark</a></li>
<li><a href="https://spark.apache.org/contributing.html">Contributing to Spark</a></li>
<li><a href="https://spark.apache.org/third-party-projects.html">Third Party Projects</a></li>
</ul>
</li>
</ul>
<!--<p class="navbar-text pull-right"><span class="version-text">v3.0.0</span></p>-->
</div>
</div>
</div>
<div class="container-wrapper">
<div class="left-menu-wrapper">
<div class="left-menu">
<h3><a href="sql-programming-guide.html">Spark SQL Guide</a></h3>
<ul>
<li>
<a href="sql-getting-started.html">
Getting Started
</a>
</li>
<li>
<a href="sql-data-sources.html">
Data Sources
</a>
</li>
<li>
<a href="sql-performance-tuning.html">
Performance Tuning
</a>
</li>
<li>
<a href="sql-distributed-sql-engine.html">
Distributed SQL Engine
</a>
</li>
<li>
<a href="sql-pyspark-pandas-with-arrow.html">
PySpark Usage Guide for Pandas with Apache Arrow
</a>
</li>
<li>
<a href="sql-migration-old.html">
Migration Guide
</a>
</li>
<li>
<a href="sql-ref.html">
SQL Reference
</a>
</li>
<ul>
<li>
<a href="sql-ref-ansi-compliance.html">
ANSI Compliance
</a>
</li>
<li>
<a href="sql-ref-datatypes.html">
Data Types
</a>
</li>
<li>
<a href="sql-ref-datetime-pattern.html">
Datetime Pattern
</a>
</li>
<li>
<a href="sql-ref-functions.html">
Functions
</a>
</li>
<ul>
<li>
<a href="sql-ref-functions-builtin.html">
Built-in Functions
</a>
</li>
<li>
<a href="sql-ref-functions-udf-scalar.html">
Scalar UDFs (User-Defined Functions)
</a>
</li>
<li>
<a href="sql-ref-functions-udf-aggregate.html">
<b>UDAFs (User-Defined Aggregate Functions)</b>
</a>
</li>
<li>
<a href="sql-ref-functions-udf-hive.html">
Integration with Hive UDFs/UDAFs/UDTFs
</a>
</li>
</ul>
<li>
<a href="sql-ref-identifier.html">
Identifiers
</a>
</li>
<li>
<a href="sql-ref-literals.html">
Literals
</a>
</li>
<li>
<a href="sql-ref-null-semantics.html">
Null Semantics
</a>
</li>
<li>
<a href="sql-ref-syntax.html">
SQL Syntax
</a>
</li>
</ul>
</ul>
</div>
</div>
<input id="nav-trigger" class="nav-trigger" checked type="checkbox">
<label for="nav-trigger"></label>
<div class="content-with-sidebar" id="content">
<h1 class="title">User Defined Aggregate Functions (UDAFs)</h1>
<h3 id="description">Description</h3>
<p>User-Defined Aggregate Functions (UDAFs) are user-programmable routines that act on multiple rows at once and return a single aggregated value as a result. This documentation lists the classes that are required for creating and registering UDAFs. It also contains examples that demonstrate how to define and register UDAFs in Scala and invoke them in Spark SQL.</p>
<h3 id="aggregator-in-buf-out">Aggregator[-IN, BUF, OUT]</h3>
<p>A base class for user-defined aggregations, which can be used in Dataset operations to take all of the elements of a group and reduce them to a single value.</p>
<p><strong><em>IN</em></strong> - The input type for the aggregation.</p>
<p><strong><em>BUF</em></strong> - The type of the intermediate value of the reduction.</p>
<p><strong><em>OUT</em></strong> - The type of the final output result.</p>
<ul>
<li>
<p><strong>bufferEncoder: Encoder[BUF]</strong></p>
<p>Specifies the Encoder for the intermediate value type.</p>
</li>
<li>
<p><strong>finish(reduction: BUF): OUT</strong></p>
<p>Transform the output of the reduction.</p>
</li>
<li>
<p><strong>merge(b1: BUF, b2: BUF): BUF</strong></p>
<p>Merge two intermediate values.</p>
</li>
<li>
<p><strong>outputEncoder: Encoder[OUT]</strong></p>
<p>Specifies the Encoder for the final output value type.</p>
</li>
<li>
<p><strong>reduce(b: BUF, a: IN): BUF</strong></p>
<p>Aggregate input value <code class="highlighter-rouge">a</code> into current intermediate value. For performance, the function may modify <code class="highlighter-rouge">b</code> and return it instead of constructing new object for <code class="highlighter-rouge">b</code>.</p>
</li>
<li>
<p><strong>zero: BUF</strong></p>
<p>The initial value of the intermediate result for this aggregation.</p>
</li>
</ul>
<h3 id="examples">Examples</h3>
<h4 id="type-safe-user-defined-aggregate-functions">Type-Safe User-Defined Aggregate Functions</h4>
<p>User-defined aggregations for strongly typed Datasets revolve around the <a href="api/scala/org/apache/spark/sql/expressions/Aggregator.html">Aggregator</a> abstract class.
For example, a type-safe user-defined average can look like:</p>
<div class="codetabs">
<div data-lang="scala">
<div class="highlight"><pre class="codehilite"><code><span class="k">import</span> <span class="nn">org.apache.spark.sql.</span><span class="o">{</span><span class="nc">Encoder</span><span class="o">,</span> <span class="nc">Encoders</span><span class="o">,</span> <span class="nc">SparkSession</span><span class="o">}</span>
<span class="k">import</span> <span class="nn">org.apache.spark.sql.expressions.Aggregator</span>
<span class="k">case</span> <span class="k">class</span> <span class="nc">Employee</span><span class="o">(</span><span class="n">name</span><span class="k">:</span> <span class="kt">String</span><span class="o">,</span> <span class="n">salary</span><span class="k">:</span> <span class="kt">Long</span><span class="o">)</span>
<span class="k">case</span> <span class="k">class</span> <span class="nc">Average</span><span class="o">(</span><span class="k">var</span> <span class="n">sum</span><span class="k">:</span> <span class="kt">Long</span><span class="o">,</span> <span class="k">var</span> <span class="n">count</span><span class="k">:</span> <span class="kt">Long</span><span class="o">)</span>
<span class="k">object</span> <span class="nc">MyAverage</span> <span class="k">extends</span> <span class="nc">Aggregator</span><span class="o">[</span><span class="kt">Employee</span>, <span class="kt">Average</span>, <span class="kt">Double</span><span class="o">]</span> <span class="o">{</span>
<span class="c1">// A zero value for this aggregation. Should satisfy the property that any b + zero = b</span>
<span class="k">def</span> <span class="nf">zero</span><span class="k">:</span> <span class="kt">Average</span> <span class="o">=</span> <span class="nc">Average</span><span class="o">(</span><span class="mi">0L</span><span class="o">,</span> <span class="mi">0L</span><span class="o">)</span>
<span class="c1">// Combine two values to produce a new value. For performance, the function may modify `buffer`</span>
<span class="c1">// and return it instead of constructing a new object</span>
<span class="k">def</span> <span class="nf">reduce</span><span class="o">(</span><span class="n">buffer</span><span class="k">:</span> <span class="kt">Average</span><span class="o">,</span> <span class="n">employee</span><span class="k">:</span> <span class="kt">Employee</span><span class="o">)</span><span class="k">:</span> <span class="kt">Average</span> <span class="o">=</span> <span class="o">{</span>
<span class="nv">buffer</span><span class="o">.</span><span class="py">sum</span> <span class="o">+=</span> <span class="nv">employee</span><span class="o">.</span><span class="py">salary</span>
<span class="nv">buffer</span><span class="o">.</span><span class="py">count</span> <span class="o">+=</span> <span class="mi">1</span>
<span class="n">buffer</span>
<span class="o">}</span>
<span class="c1">// Merge two intermediate values</span>
<span class="k">def</span> <span class="nf">merge</span><span class="o">(</span><span class="n">b1</span><span class="k">:</span> <span class="kt">Average</span><span class="o">,</span> <span class="n">b2</span><span class="k">:</span> <span class="kt">Average</span><span class="o">)</span><span class="k">:</span> <span class="kt">Average</span> <span class="o">=</span> <span class="o">{</span>
<span class="nv">b1</span><span class="o">.</span><span class="py">sum</span> <span class="o">+=</span> <span class="nv">b2</span><span class="o">.</span><span class="py">sum</span>
<span class="nv">b1</span><span class="o">.</span><span class="py">count</span> <span class="o">+=</span> <span class="nv">b2</span><span class="o">.</span><span class="py">count</span>
<span class="n">b1</span>
<span class="o">}</span>
<span class="c1">// Transform the output of the reduction</span>
<span class="k">def</span> <span class="nf">finish</span><span class="o">(</span><span class="n">reduction</span><span class="k">:</span> <span class="kt">Average</span><span class="o">)</span><span class="k">:</span> <span class="kt">Double</span> <span class="o">=</span> <span class="nv">reduction</span><span class="o">.</span><span class="py">sum</span><span class="o">.</span><span class="py">toDouble</span> <span class="o">/</span> <span class="nv">reduction</span><span class="o">.</span><span class="py">count</span>
<span class="c1">// Specifies the Encoder for the intermediate value type</span>
<span class="k">def</span> <span class="nf">bufferEncoder</span><span class="k">:</span> <span class="kt">Encoder</span><span class="o">[</span><span class="kt">Average</span><span class="o">]</span> <span class="k">=</span> <span class="nv">Encoders</span><span class="o">.</span><span class="py">product</span>
<span class="c1">// Specifies the Encoder for the final output value type</span>
<span class="k">def</span> <span class="nf">outputEncoder</span><span class="k">:</span> <span class="kt">Encoder</span><span class="o">[</span><span class="kt">Double</span><span class="o">]</span> <span class="k">=</span> <span class="nv">Encoders</span><span class="o">.</span><span class="py">scalaDouble</span>
<span class="o">}</span>
<span class="k">val</span> <span class="nv">ds</span> <span class="k">=</span> <span class="nv">spark</span><span class="o">.</span><span class="py">read</span><span class="o">.</span><span class="py">json</span><span class="o">(</span><span class="s">"examples/src/main/resources/employees.json"</span><span class="o">).</span><span class="py">as</span><span class="o">[</span><span class="kt">Employee</span><span class="o">]</span>
<span class="nv">ds</span><span class="o">.</span><span class="py">show</span><span class="o">()</span>
<span class="c1">// +-------+------+</span>
<span class="c1">// | name|salary|</span>
<span class="c1">// +-------+------+</span>
<span class="c1">// |Michael| 3000|</span>
<span class="c1">// | Andy| 4500|</span>
<span class="c1">// | Justin| 3500|</span>
<span class="c1">// | Berta| 4000|</span>
<span class="c1">// +-------+------+</span>
<span class="c1">// Convert the function to a `TypedColumn` and give it a name</span>
<span class="k">val</span> <span class="nv">averageSalary</span> <span class="k">=</span> <span class="nv">MyAverage</span><span class="o">.</span><span class="py">toColumn</span><span class="o">.</span><span class="py">name</span><span class="o">(</span><span class="s">"average_salary"</span><span class="o">)</span>
<span class="k">val</span> <span class="nv">result</span> <span class="k">=</span> <span class="nv">ds</span><span class="o">.</span><span class="py">select</span><span class="o">(</span><span class="n">averageSalary</span><span class="o">)</span>
<span class="nv">result</span><span class="o">.</span><span class="py">show</span><span class="o">()</span>
<span class="c1">// +--------------+</span>
<span class="c1">// |average_salary|</span>
<span class="c1">// +--------------+</span>
<span class="c1">// | 3750.0|</span>
<span class="c1">// +--------------+</span></code></pre></div>
<div><small>Find full example code at "examples/src/main/scala/org/apache/spark/examples/sql/UserDefinedTypedAggregation.scala" in the Spark repo.</small></div>
</div>
<div data-lang="java">
<div class="highlight"><pre class="codehilite"><code><span class="kn">import</span> <span class="nn">java.io.Serializable</span><span class="o">;</span>
<span class="kn">import</span> <span class="nn">org.apache.spark.sql.Dataset</span><span class="o">;</span>
<span class="kn">import</span> <span class="nn">org.apache.spark.sql.Encoder</span><span class="o">;</span>
<span class="kn">import</span> <span class="nn">org.apache.spark.sql.Encoders</span><span class="o">;</span>
<span class="kn">import</span> <span class="nn">org.apache.spark.sql.SparkSession</span><span class="o">;</span>
<span class="kn">import</span> <span class="nn">org.apache.spark.sql.TypedColumn</span><span class="o">;</span>
<span class="kn">import</span> <span class="nn">org.apache.spark.sql.expressions.Aggregator</span><span class="o">;</span>
<span class="kd">public</span> <span class="kd">static</span> <span class="kd">class</span> <span class="nc">Employee</span> <span class="kd">implements</span> <span class="nc">Serializable</span> <span class="o">{</span>
<span class="kd">private</span> <span class="nc">String</span> <span class="n">name</span><span class="o">;</span>
<span class="kd">private</span> <span class="kt">long</span> <span class="n">salary</span><span class="o">;</span>
<span class="c1">// Constructors, getters, setters...</span>
<span class="o">}</span>
<span class="kd">public</span> <span class="kd">static</span> <span class="kd">class</span> <span class="nc">Average</span> <span class="kd">implements</span> <span class="nc">Serializable</span> <span class="o">{</span>
<span class="kd">private</span> <span class="kt">long</span> <span class="n">sum</span><span class="o">;</span>
<span class="kd">private</span> <span class="kt">long</span> <span class="n">count</span><span class="o">;</span>
<span class="c1">// Constructors, getters, setters...</span>
<span class="o">}</span>
<span class="kd">public</span> <span class="kd">static</span> <span class="kd">class</span> <span class="nc">MyAverage</span> <span class="kd">extends</span> <span class="nc">Aggregator</span><span class="o">&lt;</span><span class="nc">Employee</span><span class="o">,</span> <span class="nc">Average</span><span class="o">,</span> <span class="nc">Double</span><span class="o">&gt;</span> <span class="o">{</span>
<span class="c1">// A zero value for this aggregation. Should satisfy the property that any b + zero = b</span>
<span class="kd">public</span> <span class="nc">Average</span> <span class="nf">zero</span><span class="o">()</span> <span class="o">{</span>
<span class="k">return</span> <span class="k">new</span> <span class="nf">Average</span><span class="o">(</span><span class="mi">0L</span><span class="o">,</span> <span class="mi">0L</span><span class="o">);</span>
<span class="o">}</span>
<span class="c1">// Combine two values to produce a new value. For performance, the function may modify `buffer`</span>
<span class="c1">// and return it instead of constructing a new object</span>
<span class="kd">public</span> <span class="nc">Average</span> <span class="nf">reduce</span><span class="o">(</span><span class="nc">Average</span> <span class="n">buffer</span><span class="o">,</span> <span class="nc">Employee</span> <span class="n">employee</span><span class="o">)</span> <span class="o">{</span>
<span class="kt">long</span> <span class="n">newSum</span> <span class="o">=</span> <span class="n">buffer</span><span class="o">.</span><span class="na">getSum</span><span class="o">()</span> <span class="o">+</span> <span class="n">employee</span><span class="o">.</span><span class="na">getSalary</span><span class="o">();</span>
<span class="kt">long</span> <span class="n">newCount</span> <span class="o">=</span> <span class="n">buffer</span><span class="o">.</span><span class="na">getCount</span><span class="o">()</span> <span class="o">+</span> <span class="mi">1</span><span class="o">;</span>
<span class="n">buffer</span><span class="o">.</span><span class="na">setSum</span><span class="o">(</span><span class="n">newSum</span><span class="o">);</span>
<span class="n">buffer</span><span class="o">.</span><span class="na">setCount</span><span class="o">(</span><span class="n">newCount</span><span class="o">);</span>
<span class="k">return</span> <span class="n">buffer</span><span class="o">;</span>
<span class="o">}</span>
<span class="c1">// Merge two intermediate values</span>
<span class="kd">public</span> <span class="nc">Average</span> <span class="nf">merge</span><span class="o">(</span><span class="nc">Average</span> <span class="n">b1</span><span class="o">,</span> <span class="nc">Average</span> <span class="n">b2</span><span class="o">)</span> <span class="o">{</span>
<span class="kt">long</span> <span class="n">mergedSum</span> <span class="o">=</span> <span class="n">b1</span><span class="o">.</span><span class="na">getSum</span><span class="o">()</span> <span class="o">+</span> <span class="n">b2</span><span class="o">.</span><span class="na">getSum</span><span class="o">();</span>
<span class="kt">long</span> <span class="n">mergedCount</span> <span class="o">=</span> <span class="n">b1</span><span class="o">.</span><span class="na">getCount</span><span class="o">()</span> <span class="o">+</span> <span class="n">b2</span><span class="o">.</span><span class="na">getCount</span><span class="o">();</span>
<span class="n">b1</span><span class="o">.</span><span class="na">setSum</span><span class="o">(</span><span class="n">mergedSum</span><span class="o">);</span>
<span class="n">b1</span><span class="o">.</span><span class="na">setCount</span><span class="o">(</span><span class="n">mergedCount</span><span class="o">);</span>
<span class="k">return</span> <span class="n">b1</span><span class="o">;</span>
<span class="o">}</span>
<span class="c1">// Transform the output of the reduction</span>
<span class="kd">public</span> <span class="nc">Double</span> <span class="nf">finish</span><span class="o">(</span><span class="nc">Average</span> <span class="n">reduction</span><span class="o">)</span> <span class="o">{</span>
<span class="k">return</span> <span class="o">((</span><span class="kt">double</span><span class="o">)</span> <span class="n">reduction</span><span class="o">.</span><span class="na">getSum</span><span class="o">())</span> <span class="o">/</span> <span class="n">reduction</span><span class="o">.</span><span class="na">getCount</span><span class="o">();</span>
<span class="o">}</span>
<span class="c1">// Specifies the Encoder for the intermediate value type</span>
<span class="kd">public</span> <span class="nc">Encoder</span><span class="o">&lt;</span><span class="nc">Average</span><span class="o">&gt;</span> <span class="nf">bufferEncoder</span><span class="o">()</span> <span class="o">{</span>
<span class="k">return</span> <span class="nc">Encoders</span><span class="o">.</span><span class="na">bean</span><span class="o">(</span><span class="nc">Average</span><span class="o">.</span><span class="na">class</span><span class="o">);</span>
<span class="o">}</span>
<span class="c1">// Specifies the Encoder for the final output value type</span>
<span class="kd">public</span> <span class="nc">Encoder</span><span class="o">&lt;</span><span class="nc">Double</span><span class="o">&gt;</span> <span class="nf">outputEncoder</span><span class="o">()</span> <span class="o">{</span>
<span class="k">return</span> <span class="nc">Encoders</span><span class="o">.</span><span class="na">DOUBLE</span><span class="o">();</span>
<span class="o">}</span>
<span class="o">}</span>
<span class="nc">Encoder</span><span class="o">&lt;</span><span class="nc">Employee</span><span class="o">&gt;</span> <span class="n">employeeEncoder</span> <span class="o">=</span> <span class="nc">Encoders</span><span class="o">.</span><span class="na">bean</span><span class="o">(</span><span class="nc">Employee</span><span class="o">.</span><span class="na">class</span><span class="o">);</span>
<span class="nc">String</span> <span class="n">path</span> <span class="o">=</span> <span class="s">"examples/src/main/resources/employees.json"</span><span class="o">;</span>
<span class="nc">Dataset</span><span class="o">&lt;</span><span class="nc">Employee</span><span class="o">&gt;</span> <span class="n">ds</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="na">read</span><span class="o">().</span><span class="na">json</span><span class="o">(</span><span class="n">path</span><span class="o">).</span><span class="na">as</span><span class="o">(</span><span class="n">employeeEncoder</span><span class="o">);</span>
<span class="n">ds</span><span class="o">.</span><span class="na">show</span><span class="o">();</span>
<span class="c1">// +-------+------+</span>
<span class="c1">// | name|salary|</span>
<span class="c1">// +-------+------+</span>
<span class="c1">// |Michael| 3000|</span>
<span class="c1">// | Andy| 4500|</span>
<span class="c1">// | Justin| 3500|</span>
<span class="c1">// | Berta| 4000|</span>
<span class="c1">// +-------+------+</span>
<span class="nc">MyAverage</span> <span class="n">myAverage</span> <span class="o">=</span> <span class="k">new</span> <span class="nc">MyAverage</span><span class="o">();</span>
<span class="c1">// Convert the function to a `TypedColumn` and give it a name</span>
<span class="nc">TypedColumn</span><span class="o">&lt;</span><span class="nc">Employee</span><span class="o">,</span> <span class="nc">Double</span><span class="o">&gt;</span> <span class="n">averageSalary</span> <span class="o">=</span> <span class="n">myAverage</span><span class="o">.</span><span class="na">toColumn</span><span class="o">().</span><span class="na">name</span><span class="o">(</span><span class="s">"average_salary"</span><span class="o">);</span>
<span class="nc">Dataset</span><span class="o">&lt;</span><span class="nc">Double</span><span class="o">&gt;</span> <span class="n">result</span> <span class="o">=</span> <span class="n">ds</span><span class="o">.</span><span class="na">select</span><span class="o">(</span><span class="n">averageSalary</span><span class="o">);</span>
<span class="n">result</span><span class="o">.</span><span class="na">show</span><span class="o">();</span>
<span class="c1">// +--------------+</span>
<span class="c1">// |average_salary|</span>
<span class="c1">// +--------------+</span>
<span class="c1">// | 3750.0|</span>
<span class="c1">// +--------------+</span></code></pre></div>
<div><small>Find full example code at "examples/src/main/java/org/apache/spark/examples/sql/JavaUserDefinedTypedAggregation.java" in the Spark repo.</small></div>
</div>
</div>
<h4 id="untyped-user-defined-aggregate-functions">Untyped User-Defined Aggregate Functions</h4>
<p>Typed aggregations, as described above, may also be registered as untyped aggregating UDFs for use with DataFrames.
For example, a user-defined average for untyped DataFrames can look like:</p>
<div class="codetabs">
<div data-lang="scala">
<div class="highlight"><pre class="codehilite"><code><span class="k">import</span> <span class="nn">org.apache.spark.sql.</span><span class="o">{</span><span class="nc">Encoder</span><span class="o">,</span> <span class="nc">Encoders</span><span class="o">,</span> <span class="nc">SparkSession</span><span class="o">}</span>
<span class="k">import</span> <span class="nn">org.apache.spark.sql.expressions.Aggregator</span>
<span class="k">import</span> <span class="nn">org.apache.spark.sql.functions</span>
<span class="k">case</span> <span class="k">class</span> <span class="nc">Average</span><span class="o">(</span><span class="k">var</span> <span class="n">sum</span><span class="k">:</span> <span class="kt">Long</span><span class="o">,</span> <span class="k">var</span> <span class="n">count</span><span class="k">:</span> <span class="kt">Long</span><span class="o">)</span>
<span class="k">object</span> <span class="nc">MyAverage</span> <span class="k">extends</span> <span class="nc">Aggregator</span><span class="o">[</span><span class="kt">Long</span>, <span class="kt">Average</span>, <span class="kt">Double</span><span class="o">]</span> <span class="o">{</span>
<span class="c1">// A zero value for this aggregation. Should satisfy the property that any b + zero = b</span>
<span class="k">def</span> <span class="nf">zero</span><span class="k">:</span> <span class="kt">Average</span> <span class="o">=</span> <span class="nc">Average</span><span class="o">(</span><span class="mi">0L</span><span class="o">,</span> <span class="mi">0L</span><span class="o">)</span>
<span class="c1">// Combine two values to produce a new value. For performance, the function may modify `buffer`</span>
<span class="c1">// and return it instead of constructing a new object</span>
<span class="k">def</span> <span class="nf">reduce</span><span class="o">(</span><span class="n">buffer</span><span class="k">:</span> <span class="kt">Average</span><span class="o">,</span> <span class="n">data</span><span class="k">:</span> <span class="kt">Long</span><span class="o">)</span><span class="k">:</span> <span class="kt">Average</span> <span class="o">=</span> <span class="o">{</span>
<span class="nv">buffer</span><span class="o">.</span><span class="py">sum</span> <span class="o">+=</span> <span class="n">data</span>
<span class="nv">buffer</span><span class="o">.</span><span class="py">count</span> <span class="o">+=</span> <span class="mi">1</span>
<span class="n">buffer</span>
<span class="o">}</span>
<span class="c1">// Merge two intermediate values</span>
<span class="k">def</span> <span class="nf">merge</span><span class="o">(</span><span class="n">b1</span><span class="k">:</span> <span class="kt">Average</span><span class="o">,</span> <span class="n">b2</span><span class="k">:</span> <span class="kt">Average</span><span class="o">)</span><span class="k">:</span> <span class="kt">Average</span> <span class="o">=</span> <span class="o">{</span>
<span class="nv">b1</span><span class="o">.</span><span class="py">sum</span> <span class="o">+=</span> <span class="nv">b2</span><span class="o">.</span><span class="py">sum</span>
<span class="nv">b1</span><span class="o">.</span><span class="py">count</span> <span class="o">+=</span> <span class="nv">b2</span><span class="o">.</span><span class="py">count</span>
<span class="n">b1</span>
<span class="o">}</span>
<span class="c1">// Transform the output of the reduction</span>
<span class="k">def</span> <span class="nf">finish</span><span class="o">(</span><span class="n">reduction</span><span class="k">:</span> <span class="kt">Average</span><span class="o">)</span><span class="k">:</span> <span class="kt">Double</span> <span class="o">=</span> <span class="nv">reduction</span><span class="o">.</span><span class="py">sum</span><span class="o">.</span><span class="py">toDouble</span> <span class="o">/</span> <span class="nv">reduction</span><span class="o">.</span><span class="py">count</span>
<span class="c1">// Specifies the Encoder for the intermediate value type</span>
<span class="k">def</span> <span class="nf">bufferEncoder</span><span class="k">:</span> <span class="kt">Encoder</span><span class="o">[</span><span class="kt">Average</span><span class="o">]</span> <span class="k">=</span> <span class="nv">Encoders</span><span class="o">.</span><span class="py">product</span>
<span class="c1">// Specifies the Encoder for the final output value type</span>
<span class="k">def</span> <span class="nf">outputEncoder</span><span class="k">:</span> <span class="kt">Encoder</span><span class="o">[</span><span class="kt">Double</span><span class="o">]</span> <span class="k">=</span> <span class="nv">Encoders</span><span class="o">.</span><span class="py">scalaDouble</span>
<span class="o">}</span>
<span class="c1">// Register the function to access it</span>
<span class="nv">spark</span><span class="o">.</span><span class="py">udf</span><span class="o">.</span><span class="py">register</span><span class="o">(</span><span class="s">"myAverage"</span><span class="o">,</span> <span class="nv">functions</span><span class="o">.</span><span class="py">udaf</span><span class="o">(</span><span class="nc">MyAverage</span><span class="o">))</span>
<span class="k">val</span> <span class="nv">df</span> <span class="k">=</span> <span class="nv">spark</span><span class="o">.</span><span class="py">read</span><span class="o">.</span><span class="py">json</span><span class="o">(</span><span class="s">"examples/src/main/resources/employees.json"</span><span class="o">)</span>
<span class="nv">df</span><span class="o">.</span><span class="py">createOrReplaceTempView</span><span class="o">(</span><span class="s">"employees"</span><span class="o">)</span>
<span class="nv">df</span><span class="o">.</span><span class="py">show</span><span class="o">()</span>
<span class="c1">// +-------+------+</span>
<span class="c1">// | name|salary|</span>
<span class="c1">// +-------+------+</span>
<span class="c1">// |Michael| 3000|</span>
<span class="c1">// | Andy| 4500|</span>
<span class="c1">// | Justin| 3500|</span>
<span class="c1">// | Berta| 4000|</span>
<span class="c1">// +-------+------+</span>
<span class="k">val</span> <span class="nv">result</span> <span class="k">=</span> <span class="nv">spark</span><span class="o">.</span><span class="py">sql</span><span class="o">(</span><span class="s">"SELECT myAverage(salary) as average_salary FROM employees"</span><span class="o">)</span>
<span class="nv">result</span><span class="o">.</span><span class="py">show</span><span class="o">()</span>
<span class="c1">// +--------------+</span>
<span class="c1">// |average_salary|</span>
<span class="c1">// +--------------+</span>
<span class="c1">// | 3750.0|</span>
<span class="c1">// +--------------+</span></code></pre></div>
<div><small>Find full example code at "examples/src/main/scala/org/apache/spark/examples/sql/UserDefinedUntypedAggregation.scala" in the Spark repo.</small></div>
</div>
<div data-lang="java">
<div class="highlight"><pre class="codehilite"><code><span class="kn">import</span> <span class="nn">java.io.Serializable</span><span class="o">;</span>
<span class="kn">import</span> <span class="nn">org.apache.spark.sql.Dataset</span><span class="o">;</span>
<span class="kn">import</span> <span class="nn">org.apache.spark.sql.Encoder</span><span class="o">;</span>
<span class="kn">import</span> <span class="nn">org.apache.spark.sql.Encoders</span><span class="o">;</span>
<span class="kn">import</span> <span class="nn">org.apache.spark.sql.Row</span><span class="o">;</span>
<span class="kn">import</span> <span class="nn">org.apache.spark.sql.SparkSession</span><span class="o">;</span>
<span class="kn">import</span> <span class="nn">org.apache.spark.sql.expressions.Aggregator</span><span class="o">;</span>
<span class="kn">import</span> <span class="nn">org.apache.spark.sql.functions</span><span class="o">;</span>
<span class="kd">public</span> <span class="kd">static</span> <span class="kd">class</span> <span class="nc">Average</span> <span class="kd">implements</span> <span class="nc">Serializable</span> <span class="o">{</span>
<span class="kd">private</span> <span class="kt">long</span> <span class="n">sum</span><span class="o">;</span>
<span class="kd">private</span> <span class="kt">long</span> <span class="n">count</span><span class="o">;</span>
<span class="c1">// Constructors, getters, setters...</span>
<span class="kd">public</span> <span class="nf">Average</span><span class="o">()</span> <span class="o">{</span>
<span class="o">}</span>
<span class="kd">public</span> <span class="nf">Average</span><span class="o">(</span><span class="kt">long</span> <span class="n">sum</span><span class="o">,</span> <span class="kt">long</span> <span class="n">count</span><span class="o">)</span> <span class="o">{</span>
<span class="k">this</span><span class="o">.</span><span class="na">sum</span> <span class="o">=</span> <span class="n">sum</span><span class="o">;</span>
<span class="k">this</span><span class="o">.</span><span class="na">count</span> <span class="o">=</span> <span class="n">count</span><span class="o">;</span>
<span class="o">}</span>
<span class="kd">public</span> <span class="kt">long</span> <span class="nf">getSum</span><span class="o">()</span> <span class="o">{</span>
<span class="k">return</span> <span class="n">sum</span><span class="o">;</span>
<span class="o">}</span>
<span class="kd">public</span> <span class="kt">void</span> <span class="nf">setSum</span><span class="o">(</span><span class="kt">long</span> <span class="n">sum</span><span class="o">)</span> <span class="o">{</span>
<span class="k">this</span><span class="o">.</span><span class="na">sum</span> <span class="o">=</span> <span class="n">sum</span><span class="o">;</span>
<span class="o">}</span>
<span class="kd">public</span> <span class="kt">long</span> <span class="nf">getCount</span><span class="o">()</span> <span class="o">{</span>
<span class="k">return</span> <span class="n">count</span><span class="o">;</span>
<span class="o">}</span>
<span class="kd">public</span> <span class="kt">void</span> <span class="nf">setCount</span><span class="o">(</span><span class="kt">long</span> <span class="n">count</span><span class="o">)</span> <span class="o">{</span>
<span class="k">this</span><span class="o">.</span><span class="na">count</span> <span class="o">=</span> <span class="n">count</span><span class="o">;</span>
<span class="o">}</span>
<span class="o">}</span>
<span class="kd">public</span> <span class="kd">static</span> <span class="kd">class</span> <span class="nc">MyAverage</span> <span class="kd">extends</span> <span class="nc">Aggregator</span><span class="o">&lt;</span><span class="nc">Long</span><span class="o">,</span> <span class="nc">Average</span><span class="o">,</span> <span class="nc">Double</span><span class="o">&gt;</span> <span class="o">{</span>
<span class="c1">// A zero value for this aggregation. Should satisfy the property that any b + zero = b</span>
<span class="kd">public</span> <span class="nc">Average</span> <span class="nf">zero</span><span class="o">()</span> <span class="o">{</span>
<span class="k">return</span> <span class="k">new</span> <span class="nf">Average</span><span class="o">(</span><span class="mi">0L</span><span class="o">,</span> <span class="mi">0L</span><span class="o">);</span>
<span class="o">}</span>
<span class="c1">// Combine two values to produce a new value. For performance, the function may modify `buffer`</span>
<span class="c1">// and return it instead of constructing a new object</span>
<span class="kd">public</span> <span class="nc">Average</span> <span class="nf">reduce</span><span class="o">(</span><span class="nc">Average</span> <span class="n">buffer</span><span class="o">,</span> <span class="nc">Long</span> <span class="n">data</span><span class="o">)</span> <span class="o">{</span>
<span class="kt">long</span> <span class="n">newSum</span> <span class="o">=</span> <span class="n">buffer</span><span class="o">.</span><span class="na">getSum</span><span class="o">()</span> <span class="o">+</span> <span class="n">data</span><span class="o">;</span>
<span class="kt">long</span> <span class="n">newCount</span> <span class="o">=</span> <span class="n">buffer</span><span class="o">.</span><span class="na">getCount</span><span class="o">()</span> <span class="o">+</span> <span class="mi">1</span><span class="o">;</span>
<span class="n">buffer</span><span class="o">.</span><span class="na">setSum</span><span class="o">(</span><span class="n">newSum</span><span class="o">);</span>
<span class="n">buffer</span><span class="o">.</span><span class="na">setCount</span><span class="o">(</span><span class="n">newCount</span><span class="o">);</span>
<span class="k">return</span> <span class="n">buffer</span><span class="o">;</span>
<span class="o">}</span>
<span class="c1">// Merge two intermediate values</span>
<span class="kd">public</span> <span class="nc">Average</span> <span class="nf">merge</span><span class="o">(</span><span class="nc">Average</span> <span class="n">b1</span><span class="o">,</span> <span class="nc">Average</span> <span class="n">b2</span><span class="o">)</span> <span class="o">{</span>
<span class="kt">long</span> <span class="n">mergedSum</span> <span class="o">=</span> <span class="n">b1</span><span class="o">.</span><span class="na">getSum</span><span class="o">()</span> <span class="o">+</span> <span class="n">b2</span><span class="o">.</span><span class="na">getSum</span><span class="o">();</span>
<span class="kt">long</span> <span class="n">mergedCount</span> <span class="o">=</span> <span class="n">b1</span><span class="o">.</span><span class="na">getCount</span><span class="o">()</span> <span class="o">+</span> <span class="n">b2</span><span class="o">.</span><span class="na">getCount</span><span class="o">();</span>
<span class="n">b1</span><span class="o">.</span><span class="na">setSum</span><span class="o">(</span><span class="n">mergedSum</span><span class="o">);</span>
<span class="n">b1</span><span class="o">.</span><span class="na">setCount</span><span class="o">(</span><span class="n">mergedCount</span><span class="o">);</span>
<span class="k">return</span> <span class="n">b1</span><span class="o">;</span>
<span class="o">}</span>
<span class="c1">// Transform the output of the reduction</span>
<span class="kd">public</span> <span class="nc">Double</span> <span class="nf">finish</span><span class="o">(</span><span class="nc">Average</span> <span class="n">reduction</span><span class="o">)</span> <span class="o">{</span>
<span class="k">return</span> <span class="o">((</span><span class="kt">double</span><span class="o">)</span> <span class="n">reduction</span><span class="o">.</span><span class="na">getSum</span><span class="o">())</span> <span class="o">/</span> <span class="n">reduction</span><span class="o">.</span><span class="na">getCount</span><span class="o">();</span>
<span class="o">}</span>
<span class="c1">// Specifies the Encoder for the intermediate value type</span>
<span class="kd">public</span> <span class="nc">Encoder</span><span class="o">&lt;</span><span class="nc">Average</span><span class="o">&gt;</span> <span class="nf">bufferEncoder</span><span class="o">()</span> <span class="o">{</span>
<span class="k">return</span> <span class="nc">Encoders</span><span class="o">.</span><span class="na">bean</span><span class="o">(</span><span class="nc">Average</span><span class="o">.</span><span class="na">class</span><span class="o">);</span>
<span class="o">}</span>
<span class="c1">// Specifies the Encoder for the final output value type</span>
<span class="kd">public</span> <span class="nc">Encoder</span><span class="o">&lt;</span><span class="nc">Double</span><span class="o">&gt;</span> <span class="nf">outputEncoder</span><span class="o">()</span> <span class="o">{</span>
<span class="k">return</span> <span class="nc">Encoders</span><span class="o">.</span><span class="na">DOUBLE</span><span class="o">();</span>
<span class="o">}</span>
<span class="o">}</span>
<span class="c1">// Register the function to access it</span>
<span class="n">spark</span><span class="o">.</span><span class="na">udf</span><span class="o">().</span><span class="na">register</span><span class="o">(</span><span class="s">"myAverage"</span><span class="o">,</span> <span class="n">functions</span><span class="o">.</span><span class="na">udaf</span><span class="o">(</span><span class="k">new</span> <span class="nc">MyAverage</span><span class="o">(),</span> <span class="nc">Encoders</span><span class="o">.</span><span class="na">LONG</span><span class="o">()));</span>
<span class="nc">Dataset</span><span class="o">&lt;</span><span class="nc">Row</span><span class="o">&gt;</span> <span class="n">df</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="na">read</span><span class="o">().</span><span class="na">json</span><span class="o">(</span><span class="s">"examples/src/main/resources/employees.json"</span><span class="o">);</span>
<span class="n">df</span><span class="o">.</span><span class="na">createOrReplaceTempView</span><span class="o">(</span><span class="s">"employees"</span><span class="o">);</span>
<span class="n">df</span><span class="o">.</span><span class="na">show</span><span class="o">();</span>
<span class="c1">// +-------+------+</span>
<span class="c1">// | name|salary|</span>
<span class="c1">// +-------+------+</span>
<span class="c1">// |Michael| 3000|</span>
<span class="c1">// | Andy| 4500|</span>
<span class="c1">// | Justin| 3500|</span>
<span class="c1">// | Berta| 4000|</span>
<span class="c1">// +-------+------+</span>
<span class="nc">Dataset</span><span class="o">&lt;</span><span class="nc">Row</span><span class="o">&gt;</span> <span class="n">result</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="na">sql</span><span class="o">(</span><span class="s">"SELECT myAverage(salary) as average_salary FROM employees"</span><span class="o">);</span>
<span class="n">result</span><span class="o">.</span><span class="na">show</span><span class="o">();</span>
<span class="c1">// +--------------+</span>
<span class="c1">// |average_salary|</span>
<span class="c1">// +--------------+</span>
<span class="c1">// | 3750.0|</span>
<span class="c1">// +--------------+</span></code></pre></div>
<div><small>Find full example code at "examples/src/main/java/org/apache/spark/examples/sql/JavaUserDefinedUntypedAggregation.java" in the Spark repo.</small></div>
</div>
<div data-lang="SQL">
<div class="language-sql highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="c1">-- Compile and place UDAF MyAverage in a JAR file called `MyAverage.jar` in /tmp.</span>
<span class="k">CREATE</span> <span class="k">FUNCTION</span> <span class="n">myAverage</span> <span class="k">AS</span> <span class="s1">'MyAverage'</span> <span class="k">USING</span> <span class="n">JAR</span> <span class="s1">'/tmp/MyAverage.jar'</span><span class="p">;</span>
<span class="k">SHOW</span> <span class="k">USER</span> <span class="n">FUNCTIONS</span><span class="p">;</span>
<span class="o">+</span><span class="c1">------------------+</span>
<span class="o">|</span> <span class="k">function</span><span class="o">|</span>
<span class="o">+</span><span class="c1">------------------+</span>
<span class="o">|</span> <span class="k">default</span><span class="p">.</span><span class="n">myAverage</span><span class="o">|</span>
<span class="o">+</span><span class="c1">------------------+</span>
<span class="k">CREATE</span> <span class="k">TEMPORARY</span> <span class="k">VIEW</span> <span class="n">employees</span>
<span class="k">USING</span> <span class="n">org</span><span class="p">.</span><span class="n">apache</span><span class="p">.</span><span class="n">spark</span><span class="p">.</span><span class="k">sql</span><span class="p">.</span><span class="n">json</span>
<span class="k">OPTIONS</span> <span class="p">(</span>
<span class="n">path</span> <span class="nv">"examples/src/main/resources/employees.json"</span>
<span class="p">);</span>
<span class="k">SELECT</span> <span class="o">*</span> <span class="k">FROM</span> <span class="n">employees</span><span class="p">;</span>
<span class="o">+</span><span class="c1">-------+------+</span>
<span class="o">|</span> <span class="n">name</span><span class="o">|</span><span class="n">salary</span><span class="o">|</span>
<span class="o">+</span><span class="c1">-------+------+</span>
<span class="o">|</span><span class="n">Michael</span><span class="o">|</span> <span class="mi">3000</span><span class="o">|</span>
<span class="o">|</span> <span class="n">Andy</span><span class="o">|</span> <span class="mi">4500</span><span class="o">|</span>
<span class="o">|</span> <span class="n">Justin</span><span class="o">|</span> <span class="mi">3500</span><span class="o">|</span>
<span class="o">|</span> <span class="n">Berta</span><span class="o">|</span> <span class="mi">4000</span><span class="o">|</span>
<span class="o">+</span><span class="c1">-------+------+</span>
<span class="k">SELECT</span> <span class="n">myAverage</span><span class="p">(</span><span class="n">salary</span><span class="p">)</span> <span class="k">as</span> <span class="n">average_salary</span> <span class="k">FROM</span> <span class="n">employees</span><span class="p">;</span>
<span class="o">+</span><span class="c1">--------------+</span>
<span class="o">|</span><span class="n">average_salary</span><span class="o">|</span>
<span class="o">+</span><span class="c1">--------------+</span>
<span class="o">|</span> <span class="mi">3750</span><span class="p">.</span><span class="mi">0</span><span class="o">|</span>
<span class="o">+</span><span class="c1">--------------+</span>
</code></pre></div> </div>
</div>
</div>
<h3 id="related-statements">Related Statements</h3>
<ul>
<li><a href="sql-ref-functions-udf-scalar.html">Scalar User Defined Functions (UDFs)</a></li>
<li><a href="sql-ref-functions-udf-hive.html">Integration with Hive UDFs/UDAFs/UDTFs</a></li>
</ul>
</div>
<!-- /container -->
</div>
<script src="js/vendor/jquery-3.4.1.min.js"></script>
<script src="js/vendor/bootstrap.min.js"></script>
<script src="js/vendor/anchor.min.js"></script>
<script src="js/main.js"></script>
<!-- MathJax Section -->
<script type="text/x-mathjax-config">
MathJax.Hub.Config({
TeX: { equationNumbers: { autoNumber: "AMS" } }
});
</script>
<script>
// Note that we load MathJax this way to work with local file (file://), HTTP and HTTPS.
// We could use "//cdn.mathjax...", but that won't support "file://".
(function(d, script) {
script = d.createElement('script');
script.type = 'text/javascript';
script.async = true;
script.onload = function(){
MathJax.Hub.Config({
tex2jax: {
inlineMath: [ ["$", "$"], ["\\\\(","\\\\)"] ],
displayMath: [ ["$$","$$"], ["\\[", "\\]"] ],
processEscapes: true,
skipTags: ['script', 'noscript', 'style', 'textarea', 'pre']
}
});
};
script.src = ('https:' == document.location.protocol ? 'https://' : 'http://') +
'cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js' +
'?config=TeX-AMS-MML_HTMLorMML';
d.getElementsByTagName('head')[0].appendChild(script);
}(document));
</script>
</body>
</html>