blob: e71b4e3050c9935b88931c0230e912ca8410e73e [file] [log] [blame]
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<!-- NewPage -->
<html lang="en">
<head>
<!-- Generated by javadoc (1.8.0_292) on Tue Jun 15 06:00:55 GMT 2021 -->
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<title>Reducer (Apache Hadoop Main 3.3.1 API)</title>
<meta name="date" content="2021-06-15">
<link rel="stylesheet" type="text/css" href="../../../../stylesheet.css" title="Style">
<script type="text/javascript" src="../../../../script.js"></script>
</head>
<body>
<script type="text/javascript"><!--
try {
if (location.href.indexOf('is-external=true') == -1) {
parent.document.title="Reducer (Apache Hadoop Main 3.3.1 API)";
}
}
catch(err) {
}
//-->
var methods = {"i0":6};
var tabs = {65535:["t0","All Methods"],2:["t2","Instance Methods"],4:["t3","Abstract Methods"]};
var altColor = "altColor";
var rowColor = "rowColor";
var tableTab = "tableTab";
var activeTableTab = "activeTableTab";
</script>
<noscript>
<div>JavaScript is disabled on your browser.</div>
</noscript>
<!-- ========= START OF TOP NAVBAR ======= -->
<div class="topNav"><a name="navbar.top">
<!-- -->
</a>
<div class="skipNav"><a href="#skip.navbar.top" title="Skip navigation links">Skip navigation links</a></div>
<a name="navbar.top.firstrow">
<!-- -->
</a>
<ul class="navList" title="Navigation">
<li><a href="../../../../overview-summary.html">Overview</a></li>
<li><a href="package-summary.html">Package</a></li>
<li class="navBarCell1Rev">Class</li>
<li><a href="class-use/Reducer.html">Use</a></li>
<li><a href="package-tree.html">Tree</a></li>
<li><a href="../../../../deprecated-list.html">Deprecated</a></li>
<li><a href="../../../../index-all.html">Index</a></li>
<li><a href="../../../../help-doc.html">Help</a></li>
</ul>
</div>
<div class="subNav">
<ul class="navList">
<li><a href="../../../../org/apache/hadoop/mapred/RecordWriter.html" title="interface in org.apache.hadoop.mapred"><span class="typeNameLink">Prev&nbsp;Class</span></a></li>
<li><a href="../../../../org/apache/hadoop/mapred/Reporter.html" title="interface in org.apache.hadoop.mapred"><span class="typeNameLink">Next&nbsp;Class</span></a></li>
</ul>
<ul class="navList">
<li><a href="../../../../index.html?org/apache/hadoop/mapred/Reducer.html" target="_top">Frames</a></li>
<li><a href="Reducer.html" target="_top">No&nbsp;Frames</a></li>
</ul>
<ul class="navList" id="allclasses_navbar_top">
<li><a href="../../../../allclasses-noframe.html">All&nbsp;Classes</a></li>
</ul>
<div>
<script type="text/javascript"><!--
allClassesLink = document.getElementById("allclasses_navbar_top");
if(window==top) {
allClassesLink.style.display = "block";
}
else {
allClassesLink.style.display = "none";
}
//-->
</script>
</div>
<div>
<ul class="subNavList">
<li>Summary:&nbsp;</li>
<li>Nested&nbsp;|&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li>Constr&nbsp;|&nbsp;</li>
<li><a href="#method.summary">Method</a></li>
</ul>
<ul class="subNavList">
<li>Detail:&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li>Constr&nbsp;|&nbsp;</li>
<li><a href="#method.detail">Method</a></li>
</ul>
</div>
<a name="skip.navbar.top">
<!-- -->
</a></div>
<!-- ========= END OF TOP NAVBAR ========= -->
<!-- ======== START OF CLASS DATA ======== -->
<div class="header">
<div class="subTitle">org.apache.hadoop.mapred</div>
<h2 title="Interface Reducer" class="title">Interface Reducer&lt;K2,V2,K3,V3&gt;</h2>
</div>
<div class="contentContainer">
<div class="description">
<ul class="blockList">
<li class="blockList">
<dl>
<dt>All Superinterfaces:</dt>
<dd><a href="https://docs.oracle.com/javase/8/docs/api/java/lang/AutoCloseable.html?is-external=true" title="class or interface in java.lang">AutoCloseable</a>, <a href="https://docs.oracle.com/javase/8/docs/api/java/io/Closeable.html?is-external=true" title="class or interface in java.io">Closeable</a>, <a href="../../../../org/apache/hadoop/mapred/JobConfigurable.html" title="interface in org.apache.hadoop.mapred">JobConfigurable</a></dd>
</dl>
<dl>
<dt>All Known Implementing Classes:</dt>
<dd><a href="../../../../org/apache/hadoop/mapred/lib/ChainReducer.html" title="class in org.apache.hadoop.mapred.lib">ChainReducer</a>, <a href="../../../../org/apache/hadoop/mapred/lib/FieldSelectionMapReduce.html" title="class in org.apache.hadoop.mapred.lib">FieldSelectionMapReduce</a>, <a href="../../../../org/apache/hadoop/mapred/lib/IdentityReducer.html" title="class in org.apache.hadoop.mapred.lib">IdentityReducer</a>, <a href="../../../../org/apache/hadoop/mapred/lib/LongSumReducer.html" title="class in org.apache.hadoop.mapred.lib">LongSumReducer</a>, <a href="../../../../org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorCombiner.html" title="class in org.apache.hadoop.mapred.lib.aggregate">ValueAggregatorCombiner</a>, <a href="../../../../org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorJobBase.html" title="class in org.apache.hadoop.mapred.lib.aggregate">ValueAggregatorJobBase</a>, <a href="../../../../org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorMapper.html" title="class in org.apache.hadoop.mapred.lib.aggregate">ValueAggregatorMapper</a>, <a href="../../../../org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorReducer.html" title="class in org.apache.hadoop.mapred.lib.aggregate">ValueAggregatorReducer</a></dd>
</dl>
<hr>
<br>
<pre>@InterfaceAudience.Public
@InterfaceStability.Stable
public interface <span class="typeNameLabel">Reducer&lt;K2,V2,K3,V3&gt;</span>
extends <a href="../../../../org/apache/hadoop/mapred/JobConfigurable.html" title="interface in org.apache.hadoop.mapred">JobConfigurable</a>, <a href="../../../../org/apache/hadoop/io/Closeable.html" title="interface in org.apache.hadoop.io">Closeable</a></pre>
<div class="block">Reduces a set of intermediate values which share a key to a smaller set of
values.
<p>The number of <code>Reducer</code>s for the job is set by the user via
<a href="../../../../org/apache/hadoop/mapred/JobConf.html#setNumReduceTasks-int-"><code>JobConf.setNumReduceTasks(int)</code></a>. <code>Reducer</code> implementations
can access the <a href="../../../../org/apache/hadoop/mapred/JobConf.html" title="class in org.apache.hadoop.mapred"><code>JobConf</code></a> for the job via the
<a href="../../../../org/apache/hadoop/mapred/JobConfigurable.html#configure-org.apache.hadoop.mapred.JobConf-"><code>JobConfigurable.configure(JobConf)</code></a> method and initialize themselves.
Similarly they can use the <a href="https://docs.oracle.com/javase/8/docs/api/java/io/Closeable.html?is-external=true#close--" title="class or interface in java.io"><code>Closeable.close()</code></a> method for
de-initialization.</p>
<p><code>Reducer</code> has 3 primary phases:</p>
<ol>
<li>
<b id="Shuffle">Shuffle</b>
<p><code>Reducer</code> is input the grouped output of a <a href="../../../../org/apache/hadoop/mapred/Mapper.html" title="interface in org.apache.hadoop.mapred"><code>Mapper</code></a>.
In the phase the framework, for each <code>Reducer</code>, fetches the
relevant partition of the output of all the <code>Mapper</code>s, via HTTP.
</p>
</li>
<li>
<b id="Sort">Sort</b>
<p>The framework groups <code>Reducer</code> inputs by <code>key</code>s
(since different <code>Mapper</code>s may have output the same key) in this
stage.</p>
<p>The shuffle and sort phases occur simultaneously i.e. while outputs are
being fetched they are merged.</p>
<b id="SecondarySort">SecondarySort</b>
<p>If equivalence rules for keys while grouping the intermediates are
different from those for grouping keys before reduction, then one may
specify a <code>Comparator</code> via
<a href="../../../../org/apache/hadoop/mapred/JobConf.html#setOutputValueGroupingComparator-java.lang.Class-"><code>JobConf.setOutputValueGroupingComparator(Class)</code></a>.Since
<a href="../../../../org/apache/hadoop/mapred/JobConf.html#setOutputKeyComparatorClass-java.lang.Class-"><code>JobConf.setOutputKeyComparatorClass(Class)</code></a> can be used to
control how intermediate keys are grouped, these can be used in conjunction
to simulate <i>secondary sort on values</i>.</p>
For example, say that you want to find duplicate web pages and tag them
all with the url of the "best" known example. You would set up the job
like:
<ul>
<li>Map Input Key: url</li>
<li>Map Input Value: document</li>
<li>Map Output Key: document checksum, url pagerank</li>
<li>Map Output Value: url</li>
<li>Partitioner: by checksum</li>
<li>OutputKeyComparator: by checksum and then decreasing pagerank</li>
<li>OutputValueGroupingComparator: by checksum</li>
</ul>
</li>
<li>
<b id="Reduce">Reduce</b>
<p>In this phase the
<a href="../../../../org/apache/hadoop/mapred/Reducer.html#reduce-K2-java.util.Iterator-org.apache.hadoop.mapred.OutputCollector-org.apache.hadoop.mapred.Reporter-"><code>reduce(Object, Iterator, OutputCollector, Reporter)</code></a>
method is called for each <code>&lt;key, (list of values)&gt;</code> pair in
the grouped inputs.</p>
<p>The output of the reduce task is typically written to the
<a href="../../../../org/apache/hadoop/fs/FileSystem.html" title="class in org.apache.hadoop.fs"><code>FileSystem</code></a> via
<a href="../../../../org/apache/hadoop/mapred/OutputCollector.html#collect-K-V-"><code>OutputCollector.collect(Object, Object)</code></a>.</p>
</li>
</ol>
<p>The output of the <code>Reducer</code> is <b>not re-sorted</b>.</p>
<p>Example:</p>
<p><blockquote><pre>
public class MyReducer&lt;K extends WritableComparable, V extends Writable&gt;
extends MapReduceBase implements Reducer&lt;K, V, K, V&gt; {
static enum MyCounters { NUM_RECORDS }
private String reduceTaskId;
private int noKeys = 0;
public void configure(JobConf job) {
reduceTaskId = job.get(JobContext.TASK_ATTEMPT_ID);
}
public void reduce(K key, Iterator&lt;V&gt; values,
OutputCollector&lt;K, V&gt; output,
Reporter reporter)
throws IOException {
// Process
int noValues = 0;
while (values.hasNext()) {
V value = values.next();
// Increment the no. of values for this key
++noValues;
// Process the &lt;key, value&gt; pair (assume this takes a while)
// ...
// ...
// Let the framework know that we are alive, and kicking!
if ((noValues%10) == 0) {
reporter.progress();
}
// Process some more
// ...
// ...
// Output the &lt;key, value&gt;
output.collect(key, value);
}
// Increment the no. of &lt;key, list of values&gt; pairs processed
++noKeys;
// Increment counters
reporter.incrCounter(NUM_RECORDS, 1);
// Every 100 keys update application-level status
if ((noKeys%100) == 0) {
reporter.setStatus(reduceTaskId + " processed " + noKeys);
}
}
}
</pre></blockquote></div>
<dl>
<dt><span class="seeLabel">See Also:</span></dt>
<dd><a href="../../../../org/apache/hadoop/mapred/Mapper.html" title="interface in org.apache.hadoop.mapred"><code>Mapper</code></a>,
<a href="../../../../org/apache/hadoop/mapred/Partitioner.html" title="interface in org.apache.hadoop.mapred"><code>Partitioner</code></a>,
<a href="../../../../org/apache/hadoop/mapred/Reporter.html" title="interface in org.apache.hadoop.mapred"><code>Reporter</code></a>,
<a href="../../../../org/apache/hadoop/mapred/MapReduceBase.html" title="class in org.apache.hadoop.mapred"><code>MapReduceBase</code></a></dd>
</dl>
</li>
</ul>
</div>
<div class="summary">
<ul class="blockList">
<li class="blockList">
<!-- ========== METHOD SUMMARY =========== -->
<ul class="blockList">
<li class="blockList"><a name="method.summary">
<!-- -->
</a>
<h3>Method Summary</h3>
<table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Method Summary table, listing methods, and an explanation">
<caption><span id="t0" class="activeTableTab"><span>All Methods</span><span class="tabEnd">&nbsp;</span></span><span id="t2" class="tableTab"><span><a href="javascript:show(2);">Instance Methods</a></span><span class="tabEnd">&nbsp;</span></span><span id="t3" class="tableTab"><span><a href="javascript:show(4);">Abstract Methods</a></span><span class="tabEnd">&nbsp;</span></span></caption>
<tr>
<th class="colFirst" scope="col">Modifier and Type</th>
<th class="colLast" scope="col">Method and Description</th>
</tr>
<tr id="i0" class="altColor">
<td class="colFirst"><code>void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/hadoop/mapred/Reducer.html#reduce-K2-java.util.Iterator-org.apache.hadoop.mapred.OutputCollector-org.apache.hadoop.mapred.Reporter-">reduce</a></span>(<a href="../../../../org/apache/hadoop/mapred/Reducer.html" title="type parameter in Reducer">K2</a>&nbsp;key,
<a href="https://docs.oracle.com/javase/8/docs/api/java/util/Iterator.html?is-external=true" title="class or interface in java.util">Iterator</a>&lt;<a href="../../../../org/apache/hadoop/mapred/Reducer.html" title="type parameter in Reducer">V2</a>&gt;&nbsp;values,
<a href="../../../../org/apache/hadoop/mapred/OutputCollector.html" title="interface in org.apache.hadoop.mapred">OutputCollector</a>&lt;<a href="../../../../org/apache/hadoop/mapred/Reducer.html" title="type parameter in Reducer">K3</a>,<a href="../../../../org/apache/hadoop/mapred/Reducer.html" title="type parameter in Reducer">V3</a>&gt;&nbsp;output,
<a href="../../../../org/apache/hadoop/mapred/Reporter.html" title="interface in org.apache.hadoop.mapred">Reporter</a>&nbsp;reporter)</code>
<div class="block"><i>Reduces</i> values for a given key.</div>
</td>
</tr>
</table>
<ul class="blockList">
<li class="blockList"><a name="methods.inherited.from.class.org.apache.hadoop.mapred.JobConfigurable">
<!-- -->
</a>
<h3>Methods inherited from interface&nbsp;org.apache.hadoop.mapred.<a href="../../../../org/apache/hadoop/mapred/JobConfigurable.html" title="interface in org.apache.hadoop.mapred">JobConfigurable</a></h3>
<code><a href="../../../../org/apache/hadoop/mapred/JobConfigurable.html#configure-org.apache.hadoop.mapred.JobConf-">configure</a></code></li>
</ul>
<ul class="blockList">
<li class="blockList"><a name="methods.inherited.from.class.java.io.Closeable">
<!-- -->
</a>
<h3>Methods inherited from interface&nbsp;java.io.<a href="https://docs.oracle.com/javase/8/docs/api/java/io/Closeable.html?is-external=true" title="class or interface in java.io">Closeable</a></h3>
<code><a href="https://docs.oracle.com/javase/8/docs/api/java/io/Closeable.html?is-external=true#close--" title="class or interface in java.io">close</a></code></li>
</ul>
</li>
</ul>
</li>
</ul>
</div>
<div class="details">
<ul class="blockList">
<li class="blockList">
<!-- ============ METHOD DETAIL ========== -->
<ul class="blockList">
<li class="blockList"><a name="method.detail">
<!-- -->
</a>
<h3>Method Detail</h3>
<a name="reduce-java.lang.Object-java.util.Iterator-org.apache.hadoop.mapred.OutputCollector-org.apache.hadoop.mapred.Reporter-">
<!-- -->
</a><a name="reduce-K2-java.util.Iterator-org.apache.hadoop.mapred.OutputCollector-org.apache.hadoop.mapred.Reporter-">
<!-- -->
</a>
<ul class="blockListLast">
<li class="blockList">
<h4>reduce</h4>
<pre>void&nbsp;reduce(<a href="../../../../org/apache/hadoop/mapred/Reducer.html" title="type parameter in Reducer">K2</a>&nbsp;key,
<a href="https://docs.oracle.com/javase/8/docs/api/java/util/Iterator.html?is-external=true" title="class or interface in java.util">Iterator</a>&lt;<a href="../../../../org/apache/hadoop/mapred/Reducer.html" title="type parameter in Reducer">V2</a>&gt;&nbsp;values,
<a href="../../../../org/apache/hadoop/mapred/OutputCollector.html" title="interface in org.apache.hadoop.mapred">OutputCollector</a>&lt;<a href="../../../../org/apache/hadoop/mapred/Reducer.html" title="type parameter in Reducer">K3</a>,<a href="../../../../org/apache/hadoop/mapred/Reducer.html" title="type parameter in Reducer">V3</a>&gt;&nbsp;output,
<a href="../../../../org/apache/hadoop/mapred/Reporter.html" title="interface in org.apache.hadoop.mapred">Reporter</a>&nbsp;reporter)
throws <a href="https://docs.oracle.com/javase/8/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a></pre>
<div class="block"><i>Reduces</i> values for a given key.
<p>The framework calls this method for each
<code>&lt;key, (list of values)&gt;</code> pair in the grouped inputs.
Output values must be of the same type as input values. Input keys must
not be altered. The framework will <b>reuse</b> the key and value objects
that are passed into the reduce, therefore the application should clone
the objects they want to keep a copy of. In many cases, all values are
combined into zero or one value.
</p>
<p>Output pairs are collected with calls to
<a href="../../../../org/apache/hadoop/mapred/OutputCollector.html#collect-K-V-"><code>OutputCollector.collect(Object,Object)</code></a>.</p>
<p>Applications can use the <a href="../../../../org/apache/hadoop/mapred/Reporter.html" title="interface in org.apache.hadoop.mapred"><code>Reporter</code></a> provided to report progress
or just indicate that they are alive. In scenarios where the application
takes a significant amount of time to process individual key/value
pairs, this is crucial since the framework might assume that the task has
timed-out and kill that task. The other way of avoiding this is to set
<a href="../../../../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml#mapreduce.task.timeout">
mapreduce.task.timeout</a> to a high-enough value (or even zero for no
time-outs).</p></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>key</code> - the key.</dd>
<dd><code>values</code> - the list of values to reduce.</dd>
<dd><code>output</code> - to collect keys and combined values.</dd>
<dd><code>reporter</code> - facility to report progress.</dd>
<dt><span class="throwsLabel">Throws:</span></dt>
<dd><code><a href="https://docs.oracle.com/javase/8/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a></code></dd>
</dl>
</li>
</ul>
</li>
</ul>
</li>
</ul>
</div>
</div>
<!-- ========= END OF CLASS DATA ========= -->
<!-- ======= START OF BOTTOM NAVBAR ====== -->
<div class="bottomNav"><a name="navbar.bottom">
<!-- -->
</a>
<div class="skipNav"><a href="#skip.navbar.bottom" title="Skip navigation links">Skip navigation links</a></div>
<a name="navbar.bottom.firstrow">
<!-- -->
</a>
<ul class="navList" title="Navigation">
<li><a href="../../../../overview-summary.html">Overview</a></li>
<li><a href="package-summary.html">Package</a></li>
<li class="navBarCell1Rev">Class</li>
<li><a href="class-use/Reducer.html">Use</a></li>
<li><a href="package-tree.html">Tree</a></li>
<li><a href="../../../../deprecated-list.html">Deprecated</a></li>
<li><a href="../../../../index-all.html">Index</a></li>
<li><a href="../../../../help-doc.html">Help</a></li>
</ul>
</div>
<div class="subNav">
<ul class="navList">
<li><a href="../../../../org/apache/hadoop/mapred/RecordWriter.html" title="interface in org.apache.hadoop.mapred"><span class="typeNameLink">Prev&nbsp;Class</span></a></li>
<li><a href="../../../../org/apache/hadoop/mapred/Reporter.html" title="interface in org.apache.hadoop.mapred"><span class="typeNameLink">Next&nbsp;Class</span></a></li>
</ul>
<ul class="navList">
<li><a href="../../../../index.html?org/apache/hadoop/mapred/Reducer.html" target="_top">Frames</a></li>
<li><a href="Reducer.html" target="_top">No&nbsp;Frames</a></li>
</ul>
<ul class="navList" id="allclasses_navbar_bottom">
<li><a href="../../../../allclasses-noframe.html">All&nbsp;Classes</a></li>
</ul>
<div>
<script type="text/javascript"><!--
allClassesLink = document.getElementById("allclasses_navbar_bottom");
if(window==top) {
allClassesLink.style.display = "block";
}
else {
allClassesLink.style.display = "none";
}
//-->
</script>
</div>
<div>
<ul class="subNavList">
<li>Summary:&nbsp;</li>
<li>Nested&nbsp;|&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li>Constr&nbsp;|&nbsp;</li>
<li><a href="#method.summary">Method</a></li>
</ul>
<ul class="subNavList">
<li>Detail:&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li>Constr&nbsp;|&nbsp;</li>
<li><a href="#method.detail">Method</a></li>
</ul>
</div>
<a name="skip.navbar.bottom">
<!-- -->
</a></div>
<!-- ======== END OF BOTTOM NAVBAR ======= -->
<p class="legalCopy"><small>Copyright &#169; 2021 <a href="https://www.apache.org">Apache Software Foundation</a>. All rights reserved.</small></p>
</body>
</html>