blob: 88c25a0a20de306a60f7d9c285d8d71c75a7e545 [file] [log] [blame]
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<!-- NewPage -->
<html lang="en">
<head>
<!-- Generated by javadoc (1.8.0_342) on Sat Oct 15 20:26:51 UTC 2022 -->
<title>Aggregator (Spark 3.3.1 JavaDoc)</title>
<meta name="date" content="2022-10-15">
<link rel="stylesheet" type="text/css" href="../../../../../stylesheet.css" title="Style">
<script type="text/javascript" src="../../../../../script.js"></script>
</head>
<body>
<script type="text/javascript"><!--
try {
if (location.href.indexOf('is-external=true') == -1) {
parent.document.title="Aggregator (Spark 3.3.1 JavaDoc)";
}
}
catch(err) {
}
//-->
var methods = {"i0":6,"i1":6,"i2":6,"i3":6,"i4":6,"i5":10,"i6":6};
var tabs = {65535:["t0","All Methods"],2:["t2","Instance Methods"],4:["t3","Abstract Methods"],8:["t4","Concrete Methods"]};
var altColor = "altColor";
var rowColor = "rowColor";
var tableTab = "tableTab";
var activeTableTab = "activeTableTab";
</script>
<noscript>
<div>JavaScript is disabled on your browser.</div>
</noscript>
<!-- ========= START OF TOP NAVBAR ======= -->
<div class="topNav"><a name="navbar.top">
<!-- -->
</a>
<div class="skipNav"><a href="#skip.navbar.top" title="Skip navigation links">Skip navigation links</a></div>
<a name="navbar.top.firstrow">
<!-- -->
</a>
<ul class="navList" title="Navigation">
<li><a href="../../../../../overview-summary.html">Overview</a></li>
<li><a href="package-summary.html">Package</a></li>
<li class="navBarCell1Rev">Class</li>
<li><a href="package-tree.html">Tree</a></li>
<li><a href="../../../../../deprecated-list.html">Deprecated</a></li>
<li><a href="../../../../../index-all.html">Index</a></li>
<li><a href="../../../../../help-doc.html">Help</a></li>
</ul>
</div>
<div class="subNav">
<ul class="navList">
<li>Prev&nbsp;Class</li>
<li><a href="../../../../../org/apache/spark/sql/expressions/MutableAggregationBuffer.html" title="class in org.apache.spark.sql.expressions"><span class="typeNameLink">Next&nbsp;Class</span></a></li>
</ul>
<ul class="navList">
<li><a href="../../../../../index.html?org/apache/spark/sql/expressions/Aggregator.html" target="_top">Frames</a></li>
<li><a href="Aggregator.html" target="_top">No&nbsp;Frames</a></li>
</ul>
<ul class="navList" id="allclasses_navbar_top">
<li><a href="../../../../../allclasses-noframe.html">All&nbsp;Classes</a></li>
</ul>
<div>
<script type="text/javascript"><!--
allClassesLink = document.getElementById("allclasses_navbar_top");
if(window==top) {
allClassesLink.style.display = "block";
}
else {
allClassesLink.style.display = "none";
}
//-->
</script>
</div>
<div>
<ul class="subNavList">
<li>Summary:&nbsp;</li>
<li>Nested&nbsp;|&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li><a href="#constructor.summary">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.summary">Method</a></li>
</ul>
<ul class="subNavList">
<li>Detail:&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li><a href="#constructor.detail">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.detail">Method</a></li>
</ul>
</div>
<a name="skip.navbar.top">
<!-- -->
</a></div>
<!-- ========= END OF TOP NAVBAR ========= -->
<!-- ======== START OF CLASS DATA ======== -->
<div class="header">
<div class="subTitle">org.apache.spark.sql.expressions</div>
<h2 title="Class Aggregator" class="title">Class Aggregator&lt;IN,BUF,OUT&gt;</h2>
</div>
<div class="contentContainer">
<ul class="inheritance">
<li>Object</li>
<li>
<ul class="inheritance">
<li>org.apache.spark.sql.expressions.Aggregator&lt;IN,BUF,OUT&gt;</li>
</ul>
</li>
</ul>
<div class="description">
<ul class="blockList">
<li class="blockList">
<dl>
<dt><span class="paramLabel">Type Parameters:</span></dt>
<dd><code>IN</code> - The input type for the aggregation.</dd>
<dd><code>BUF</code> - The type of the intermediate value of the reduction.</dd>
<dd><code>OUT</code> - The type of the final output result.</dd>
</dl>
<dl>
<dt>All Implemented Interfaces:</dt>
<dd>java.io.Serializable</dd>
</dl>
<dl>
<dt>Direct Known Subclasses:</dt>
<dd><a href="../../../../../org/apache/spark/ml/feature/StringIndexerAggregator.html" title="class in org.apache.spark.ml.feature">StringIndexerAggregator</a></dd>
</dl>
<hr>
<br>
<pre>public abstract class <span class="typeNameLabel">Aggregator&lt;IN,BUF,OUT&gt;</span>
extends Object
implements scala.Serializable</pre>
<div class="block">A base class for user-defined aggregations, which can be used in <code>Dataset</code> operations to take
all of the elements of a group and reduce them to a single value.
<p>
For example, the following aggregator extracts an <code>int</code> from a specific class and adds them up:
<pre><code>
case class Data(i: Int)
val customSummer = new Aggregator[Data, Int, Int] {
def zero: Int = 0
def reduce(b: Int, a: Data): Int = b + a.i
def merge(b1: Int, b2: Int): Int = b1 + b2
def finish(r: Int): Int = r
def bufferEncoder: Encoder[Int] = Encoders.scalaInt
def outputEncoder: Encoder[Int] = Encoders.scalaInt
}.toColumn()
val ds: Dataset[Data] = ...
val aggregated = ds.select(customSummer)
</code></pre>
<p>
Based loosely on Aggregator from Algebird: https://github.com/twitter/algebird
<p></div>
<dl>
<dt><span class="simpleTagLabel">Since:</span></dt>
<dd>1.6.0</dd>
<dt><span class="seeLabel">See Also:</span></dt>
<dd><a href="../../../../../serialized-form.html#org.apache.spark.sql.expressions.Aggregator">Serialized Form</a></dd>
</dl>
</li>
</ul>
</div>
<div class="summary">
<ul class="blockList">
<li class="blockList">
<!-- ======== CONSTRUCTOR SUMMARY ======== -->
<ul class="blockList">
<li class="blockList"><a name="constructor.summary">
<!-- -->
</a>
<h3>Constructor Summary</h3>
<table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Constructor Summary table, listing constructors, and an explanation">
<caption><span>Constructors</span><span class="tabEnd">&nbsp;</span></caption>
<tr>
<th class="colOne" scope="col">Constructor and Description</th>
</tr>
<tr class="altColor">
<td class="colOne"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/sql/expressions/Aggregator.html#Aggregator--">Aggregator</a></span>()</code>&nbsp;</td>
</tr>
</table>
</li>
</ul>
<!-- ========== METHOD SUMMARY =========== -->
<ul class="blockList">
<li class="blockList"><a name="method.summary">
<!-- -->
</a>
<h3>Method Summary</h3>
<table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Method Summary table, listing methods, and an explanation">
<caption><span id="t0" class="activeTableTab"><span>All Methods</span><span class="tabEnd">&nbsp;</span></span><span id="t2" class="tableTab"><span><a href="javascript:show(2);">Instance Methods</a></span><span class="tabEnd">&nbsp;</span></span><span id="t3" class="tableTab"><span><a href="javascript:show(4);">Abstract Methods</a></span><span class="tabEnd">&nbsp;</span></span><span id="t4" class="tableTab"><span><a href="javascript:show(8);">Concrete Methods</a></span><span class="tabEnd">&nbsp;</span></span></caption>
<tr>
<th class="colFirst" scope="col">Modifier and Type</th>
<th class="colLast" scope="col">Method and Description</th>
</tr>
<tr id="i0" class="altColor">
<td class="colFirst"><code>abstract <a href="../../../../../org/apache/spark/sql/Encoder.html" title="interface in org.apache.spark.sql">Encoder</a>&lt;<a href="../../../../../org/apache/spark/sql/expressions/Aggregator.html" title="type parameter in Aggregator">BUF</a>&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/sql/expressions/Aggregator.html#bufferEncoder--">bufferEncoder</a></span>()</code>
<div class="block">Specifies the <code>Encoder</code> for the intermediate value type.</div>
</td>
</tr>
<tr id="i1" class="rowColor">
<td class="colFirst"><code>abstract <a href="../../../../../org/apache/spark/sql/expressions/Aggregator.html" title="type parameter in Aggregator">OUT</a></code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/sql/expressions/Aggregator.html#finish-BUF-">finish</a></span>(<a href="../../../../../org/apache/spark/sql/expressions/Aggregator.html" title="type parameter in Aggregator">BUF</a>&nbsp;reduction)</code>
<div class="block">Transform the output of the reduction.</div>
</td>
</tr>
<tr id="i2" class="altColor">
<td class="colFirst"><code>abstract <a href="../../../../../org/apache/spark/sql/expressions/Aggregator.html" title="type parameter in Aggregator">BUF</a></code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/sql/expressions/Aggregator.html#merge-BUF-BUF-">merge</a></span>(<a href="../../../../../org/apache/spark/sql/expressions/Aggregator.html" title="type parameter in Aggregator">BUF</a>&nbsp;b1,
<a href="../../../../../org/apache/spark/sql/expressions/Aggregator.html" title="type parameter in Aggregator">BUF</a>&nbsp;b2)</code>
<div class="block">Merge two intermediate values.</div>
</td>
</tr>
<tr id="i3" class="rowColor">
<td class="colFirst"><code>abstract <a href="../../../../../org/apache/spark/sql/Encoder.html" title="interface in org.apache.spark.sql">Encoder</a>&lt;<a href="../../../../../org/apache/spark/sql/expressions/Aggregator.html" title="type parameter in Aggregator">OUT</a>&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/sql/expressions/Aggregator.html#outputEncoder--">outputEncoder</a></span>()</code>
<div class="block">Specifies the <code>Encoder</code> for the final output value type.</div>
</td>
</tr>
<tr id="i4" class="altColor">
<td class="colFirst"><code>abstract <a href="../../../../../org/apache/spark/sql/expressions/Aggregator.html" title="type parameter in Aggregator">BUF</a></code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/sql/expressions/Aggregator.html#reduce-BUF-IN-">reduce</a></span>(<a href="../../../../../org/apache/spark/sql/expressions/Aggregator.html" title="type parameter in Aggregator">BUF</a>&nbsp;b,
<a href="../../../../../org/apache/spark/sql/expressions/Aggregator.html" title="type parameter in Aggregator">IN</a>&nbsp;a)</code>
<div class="block">Combine two values to produce a new value.</div>
</td>
</tr>
<tr id="i5" class="rowColor">
<td class="colFirst"><code><a href="../../../../../org/apache/spark/sql/TypedColumn.html" title="class in org.apache.spark.sql">TypedColumn</a>&lt;<a href="../../../../../org/apache/spark/sql/expressions/Aggregator.html" title="type parameter in Aggregator">IN</a>,<a href="../../../../../org/apache/spark/sql/expressions/Aggregator.html" title="type parameter in Aggregator">OUT</a>&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/sql/expressions/Aggregator.html#toColumn--">toColumn</a></span>()</code>
<div class="block">Returns this <code>Aggregator</code> as a <code>TypedColumn</code> that can be used in <code>Dataset</code>.</div>
</td>
</tr>
<tr id="i6" class="altColor">
<td class="colFirst"><code>abstract <a href="../../../../../org/apache/spark/sql/expressions/Aggregator.html" title="type parameter in Aggregator">BUF</a></code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/sql/expressions/Aggregator.html#zero--">zero</a></span>()</code>
<div class="block">A zero value for this aggregation.</div>
</td>
</tr>
</table>
<ul class="blockList">
<li class="blockList"><a name="methods.inherited.from.class.Object">
<!-- -->
</a>
<h3>Methods inherited from class&nbsp;Object</h3>
<code>equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait</code></li>
</ul>
</li>
</ul>
</li>
</ul>
</div>
<div class="details">
<ul class="blockList">
<li class="blockList">
<!-- ========= CONSTRUCTOR DETAIL ======== -->
<ul class="blockList">
<li class="blockList"><a name="constructor.detail">
<!-- -->
</a>
<h3>Constructor Detail</h3>
<a name="Aggregator--">
<!-- -->
</a>
<ul class="blockListLast">
<li class="blockList">
<h4>Aggregator</h4>
<pre>public&nbsp;Aggregator()</pre>
</li>
</ul>
</li>
</ul>
<!-- ============ METHOD DETAIL ========== -->
<ul class="blockList">
<li class="blockList"><a name="method.detail">
<!-- -->
</a>
<h3>Method Detail</h3>
<a name="bufferEncoder--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>bufferEncoder</h4>
<pre>public abstract&nbsp;<a href="../../../../../org/apache/spark/sql/Encoder.html" title="interface in org.apache.spark.sql">Encoder</a>&lt;<a href="../../../../../org/apache/spark/sql/expressions/Aggregator.html" title="type parameter in Aggregator">BUF</a>&gt;&nbsp;bufferEncoder()</pre>
<div class="block">Specifies the <code>Encoder</code> for the intermediate value type.</div>
<dl>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>(undocumented)</dd>
<dt><span class="simpleTagLabel">Since:</span></dt>
<dd>2.0.0</dd>
</dl>
</li>
</ul>
<a name="finish-java.lang.Object-">
<!-- -->
</a><a name="finish-BUF-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>finish</h4>
<pre>public abstract&nbsp;<a href="../../../../../org/apache/spark/sql/expressions/Aggregator.html" title="type parameter in Aggregator">OUT</a>&nbsp;finish(<a href="../../../../../org/apache/spark/sql/expressions/Aggregator.html" title="type parameter in Aggregator">BUF</a>&nbsp;reduction)</pre>
<div class="block">Transform the output of the reduction.</div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>reduction</code> - (undocumented)</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>(undocumented)</dd>
<dt><span class="simpleTagLabel">Since:</span></dt>
<dd>1.6.0</dd>
</dl>
</li>
</ul>
<a name="merge-java.lang.Object-java.lang.Object-">
<!-- -->
</a><a name="merge-BUF-BUF-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>merge</h4>
<pre>public abstract&nbsp;<a href="../../../../../org/apache/spark/sql/expressions/Aggregator.html" title="type parameter in Aggregator">BUF</a>&nbsp;merge(<a href="../../../../../org/apache/spark/sql/expressions/Aggregator.html" title="type parameter in Aggregator">BUF</a>&nbsp;b1,
<a href="../../../../../org/apache/spark/sql/expressions/Aggregator.html" title="type parameter in Aggregator">BUF</a>&nbsp;b2)</pre>
<div class="block">Merge two intermediate values.</div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>b1</code> - (undocumented)</dd>
<dd><code>b2</code> - (undocumented)</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>(undocumented)</dd>
<dt><span class="simpleTagLabel">Since:</span></dt>
<dd>1.6.0</dd>
</dl>
</li>
</ul>
<a name="outputEncoder--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>outputEncoder</h4>
<pre>public abstract&nbsp;<a href="../../../../../org/apache/spark/sql/Encoder.html" title="interface in org.apache.spark.sql">Encoder</a>&lt;<a href="../../../../../org/apache/spark/sql/expressions/Aggregator.html" title="type parameter in Aggregator">OUT</a>&gt;&nbsp;outputEncoder()</pre>
<div class="block">Specifies the <code>Encoder</code> for the final output value type.</div>
<dl>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>(undocumented)</dd>
<dt><span class="simpleTagLabel">Since:</span></dt>
<dd>2.0.0</dd>
</dl>
</li>
</ul>
<a name="reduce-java.lang.Object-java.lang.Object-">
<!-- -->
</a><a name="reduce-BUF-IN-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>reduce</h4>
<pre>public abstract&nbsp;<a href="../../../../../org/apache/spark/sql/expressions/Aggregator.html" title="type parameter in Aggregator">BUF</a>&nbsp;reduce(<a href="../../../../../org/apache/spark/sql/expressions/Aggregator.html" title="type parameter in Aggregator">BUF</a>&nbsp;b,
<a href="../../../../../org/apache/spark/sql/expressions/Aggregator.html" title="type parameter in Aggregator">IN</a>&nbsp;a)</pre>
<div class="block">Combine two values to produce a new value. For performance, the function may modify <code>b</code> and
return it instead of constructing new object for b.</div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>b</code> - (undocumented)</dd>
<dd><code>a</code> - (undocumented)</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>(undocumented)</dd>
<dt><span class="simpleTagLabel">Since:</span></dt>
<dd>1.6.0</dd>
</dl>
</li>
</ul>
<a name="toColumn--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>toColumn</h4>
<pre>public&nbsp;<a href="../../../../../org/apache/spark/sql/TypedColumn.html" title="class in org.apache.spark.sql">TypedColumn</a>&lt;<a href="../../../../../org/apache/spark/sql/expressions/Aggregator.html" title="type parameter in Aggregator">IN</a>,<a href="../../../../../org/apache/spark/sql/expressions/Aggregator.html" title="type parameter in Aggregator">OUT</a>&gt;&nbsp;toColumn()</pre>
<div class="block">Returns this <code>Aggregator</code> as a <code>TypedColumn</code> that can be used in <code>Dataset</code>.
operations.</div>
<dl>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>(undocumented)</dd>
<dt><span class="simpleTagLabel">Since:</span></dt>
<dd>1.6.0</dd>
</dl>
</li>
</ul>
<a name="zero--">
<!-- -->
</a>
<ul class="blockListLast">
<li class="blockList">
<h4>zero</h4>
<pre>public abstract&nbsp;<a href="../../../../../org/apache/spark/sql/expressions/Aggregator.html" title="type parameter in Aggregator">BUF</a>&nbsp;zero()</pre>
<div class="block">A zero value for this aggregation. Should satisfy the property that any b + zero = b.</div>
<dl>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>(undocumented)</dd>
<dt><span class="simpleTagLabel">Since:</span></dt>
<dd>1.6.0</dd>
</dl>
</li>
</ul>
</li>
</ul>
</li>
</ul>
</div>
</div>
<!-- ========= END OF CLASS DATA ========= -->
<!-- ======= START OF BOTTOM NAVBAR ====== -->
<div class="bottomNav"><a name="navbar.bottom">
<!-- -->
</a>
<div class="skipNav"><a href="#skip.navbar.bottom" title="Skip navigation links">Skip navigation links</a></div>
<a name="navbar.bottom.firstrow">
<!-- -->
</a>
<ul class="navList" title="Navigation">
<li><a href="../../../../../overview-summary.html">Overview</a></li>
<li><a href="package-summary.html">Package</a></li>
<li class="navBarCell1Rev">Class</li>
<li><a href="package-tree.html">Tree</a></li>
<li><a href="../../../../../deprecated-list.html">Deprecated</a></li>
<li><a href="../../../../../index-all.html">Index</a></li>
<li><a href="../../../../../help-doc.html">Help</a></li>
</ul>
</div>
<div class="subNav">
<ul class="navList">
<li>Prev&nbsp;Class</li>
<li><a href="../../../../../org/apache/spark/sql/expressions/MutableAggregationBuffer.html" title="class in org.apache.spark.sql.expressions"><span class="typeNameLink">Next&nbsp;Class</span></a></li>
</ul>
<ul class="navList">
<li><a href="../../../../../index.html?org/apache/spark/sql/expressions/Aggregator.html" target="_top">Frames</a></li>
<li><a href="Aggregator.html" target="_top">No&nbsp;Frames</a></li>
</ul>
<ul class="navList" id="allclasses_navbar_bottom">
<li><a href="../../../../../allclasses-noframe.html">All&nbsp;Classes</a></li>
</ul>
<div>
<script type="text/javascript"><!--
allClassesLink = document.getElementById("allclasses_navbar_bottom");
if(window==top) {
allClassesLink.style.display = "block";
}
else {
allClassesLink.style.display = "none";
}
//-->
</script>
</div>
<div>
<ul class="subNavList">
<li>Summary:&nbsp;</li>
<li>Nested&nbsp;|&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li><a href="#constructor.summary">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.summary">Method</a></li>
</ul>
<ul class="subNavList">
<li>Detail:&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li><a href="#constructor.detail">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.detail">Method</a></li>
</ul>
</div>
<a name="skip.navbar.bottom">
<!-- -->
</a></div>
<!-- ======== END OF BOTTOM NAVBAR ======= -->
<script defer="defer" type="text/javascript" src="../../../../../lib/jquery.js"></script><script defer="defer" type="text/javascript" src="../../../../../lib/api-javadocs.js"></script></body>
</html>