blob: f2f687ccae9fc54e425ecfe3be9d95039efd1acb [file] [log] [blame]
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<!-- NewPage -->
<html lang="en">
<head>
<!-- Generated by javadoc (1.8.0_292) on Tue Jun 15 06:00:55 GMT 2021 -->
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<title>Mapper (Apache Hadoop Main 3.3.1 API)</title>
<meta name="date" content="2021-06-15">
<link rel="stylesheet" type="text/css" href="../../../../stylesheet.css" title="Style">
<script type="text/javascript" src="../../../../script.js"></script>
</head>
<body>
<script type="text/javascript"><!--
try {
if (location.href.indexOf('is-external=true') == -1) {
parent.document.title="Mapper (Apache Hadoop Main 3.3.1 API)";
}
}
catch(err) {
}
//-->
var methods = {"i0":6};
var tabs = {65535:["t0","All Methods"],2:["t2","Instance Methods"],4:["t3","Abstract Methods"]};
var altColor = "altColor";
var rowColor = "rowColor";
var tableTab = "tableTab";
var activeTableTab = "activeTableTab";
</script>
<noscript>
<div>JavaScript is disabled on your browser.</div>
</noscript>
<!-- ========= START OF TOP NAVBAR ======= -->
<div class="topNav"><a name="navbar.top">
<!-- -->
</a>
<div class="skipNav"><a href="#skip.navbar.top" title="Skip navigation links">Skip navigation links</a></div>
<a name="navbar.top.firstrow">
<!-- -->
</a>
<ul class="navList" title="Navigation">
<li><a href="../../../../overview-summary.html">Overview</a></li>
<li><a href="package-summary.html">Package</a></li>
<li class="navBarCell1Rev">Class</li>
<li><a href="class-use/Mapper.html">Use</a></li>
<li><a href="package-tree.html">Tree</a></li>
<li><a href="../../../../deprecated-list.html">Deprecated</a></li>
<li><a href="../../../../index-all.html">Index</a></li>
<li><a href="../../../../help-doc.html">Help</a></li>
</ul>
</div>
<div class="subNav">
<ul class="navList">
<li><a href="../../../../org/apache/hadoop/mapred/MapFileOutputFormat.html" title="class in org.apache.hadoop.mapred"><span class="typeNameLink">Prev&nbsp;Class</span></a></li>
<li><a href="../../../../org/apache/hadoop/mapred/MapReduceBase.html" title="class in org.apache.hadoop.mapred"><span class="typeNameLink">Next&nbsp;Class</span></a></li>
</ul>
<ul class="navList">
<li><a href="../../../../index.html?org/apache/hadoop/mapred/Mapper.html" target="_top">Frames</a></li>
<li><a href="Mapper.html" target="_top">No&nbsp;Frames</a></li>
</ul>
<ul class="navList" id="allclasses_navbar_top">
<li><a href="../../../../allclasses-noframe.html">All&nbsp;Classes</a></li>
</ul>
<div>
<script type="text/javascript"><!--
allClassesLink = document.getElementById("allclasses_navbar_top");
if(window==top) {
allClassesLink.style.display = "block";
}
else {
allClassesLink.style.display = "none";
}
//-->
</script>
</div>
<div>
<ul class="subNavList">
<li>Summary:&nbsp;</li>
<li>Nested&nbsp;|&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li>Constr&nbsp;|&nbsp;</li>
<li><a href="#method.summary">Method</a></li>
</ul>
<ul class="subNavList">
<li>Detail:&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li>Constr&nbsp;|&nbsp;</li>
<li><a href="#method.detail">Method</a></li>
</ul>
</div>
<a name="skip.navbar.top">
<!-- -->
</a></div>
<!-- ========= END OF TOP NAVBAR ========= -->
<!-- ======== START OF CLASS DATA ======== -->
<div class="header">
<div class="subTitle">org.apache.hadoop.mapred</div>
<h2 title="Interface Mapper" class="title">Interface Mapper&lt;K1,V1,K2,V2&gt;</h2>
</div>
<div class="contentContainer">
<div class="description">
<ul class="blockList">
<li class="blockList">
<dl>
<dt>All Superinterfaces:</dt>
<dd><a href="https://docs.oracle.com/javase/8/docs/api/java/lang/AutoCloseable.html?is-external=true" title="class or interface in java.lang">AutoCloseable</a>, <a href="https://docs.oracle.com/javase/8/docs/api/java/io/Closeable.html?is-external=true" title="class or interface in java.io">Closeable</a>, <a href="../../../../org/apache/hadoop/mapred/JobConfigurable.html" title="interface in org.apache.hadoop.mapred">JobConfigurable</a></dd>
</dl>
<dl>
<dt>All Known Implementing Classes:</dt>
<dd><a href="../../../../org/apache/hadoop/mapred/lib/ChainMapper.html" title="class in org.apache.hadoop.mapred.lib">ChainMapper</a>, <a href="../../../../org/apache/hadoop/mapred/lib/FieldSelectionMapReduce.html" title="class in org.apache.hadoop.mapred.lib">FieldSelectionMapReduce</a>, <a href="../../../../org/apache/hadoop/mapred/lib/IdentityMapper.html" title="class in org.apache.hadoop.mapred.lib">IdentityMapper</a>, <a href="../../../../org/apache/hadoop/mapred/lib/InverseMapper.html" title="class in org.apache.hadoop.mapred.lib">InverseMapper</a>, <a href="../../../../org/apache/hadoop/mapred/lib/RegexMapper.html" title="class in org.apache.hadoop.mapred.lib">RegexMapper</a>, <a href="../../../../org/apache/hadoop/mapred/lib/TokenCountMapper.html" title="class in org.apache.hadoop.mapred.lib">TokenCountMapper</a>, <a href="../../../../org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorCombiner.html" title="class in org.apache.hadoop.mapred.lib.aggregate">ValueAggregatorCombiner</a>, <a href="../../../../org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorJobBase.html" title="class in org.apache.hadoop.mapred.lib.aggregate">ValueAggregatorJobBase</a>, <a href="../../../../org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorMapper.html" title="class in org.apache.hadoop.mapred.lib.aggregate">ValueAggregatorMapper</a>, <a href="../../../../org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorReducer.html" title="class in org.apache.hadoop.mapred.lib.aggregate">ValueAggregatorReducer</a></dd>
</dl>
<hr>
<br>
<pre>@InterfaceAudience.Public
@InterfaceStability.Stable
public interface <span class="typeNameLabel">Mapper&lt;K1,V1,K2,V2&gt;</span>
extends <a href="../../../../org/apache/hadoop/mapred/JobConfigurable.html" title="interface in org.apache.hadoop.mapred">JobConfigurable</a>, <a href="../../../../org/apache/hadoop/io/Closeable.html" title="interface in org.apache.hadoop.io">Closeable</a></pre>
<div class="block">Maps input key/value pairs to a set of intermediate key/value pairs.
<p>Maps are the individual tasks which transform input records into a
intermediate records. The transformed intermediate records need not be of
the same type as the input records. A given input pair may map to zero or
many output pairs.</p>
<p>The Hadoop Map-Reduce framework spawns one map task for each
<a href="../../../../org/apache/hadoop/mapred/InputSplit.html" title="interface in org.apache.hadoop.mapred"><code>InputSplit</code></a> generated by the <a href="../../../../org/apache/hadoop/mapred/InputFormat.html" title="interface in org.apache.hadoop.mapred"><code>InputFormat</code></a> for the job.
<code>Mapper</code> implementations can access the <a href="../../../../org/apache/hadoop/mapred/JobConf.html" title="class in org.apache.hadoop.mapred"><code>JobConf</code></a> for the
job via the <a href="../../../../org/apache/hadoop/mapred/JobConfigurable.html#configure-org.apache.hadoop.mapred.JobConf-"><code>JobConfigurable.configure(JobConf)</code></a> and initialize
themselves. Similarly they can use the <a href="https://docs.oracle.com/javase/8/docs/api/java/io/Closeable.html?is-external=true#close--" title="class or interface in java.io"><code>Closeable.close()</code></a> method for
de-initialization.</p>
<p>The framework then calls
<a href="../../../../org/apache/hadoop/mapred/Mapper.html#map-K1-V1-org.apache.hadoop.mapred.OutputCollector-org.apache.hadoop.mapred.Reporter-"><code>map(Object, Object, OutputCollector, Reporter)</code></a>
for each key/value pair in the <code>InputSplit</code> for that task.</p>
<p>All intermediate values associated with a given output key are
subsequently grouped by the framework, and passed to a <a href="../../../../org/apache/hadoop/mapred/Reducer.html" title="interface in org.apache.hadoop.mapred"><code>Reducer</code></a> to
determine the final output. Users can control the grouping by specifying
a <code>Comparator</code> via
<a href="../../../../org/apache/hadoop/mapred/JobConf.html#setOutputKeyComparatorClass-java.lang.Class-"><code>JobConf.setOutputKeyComparatorClass(Class)</code></a>.</p>
<p>The grouped <code>Mapper</code> outputs are partitioned per
<code>Reducer</code>. Users can control which keys (and hence records) go to
which <code>Reducer</code> by implementing a custom <a href="../../../../org/apache/hadoop/mapred/Partitioner.html" title="interface in org.apache.hadoop.mapred"><code>Partitioner</code></a>.
<p>Users can optionally specify a <code>combiner</code>, via
<a href="../../../../org/apache/hadoop/mapred/JobConf.html#setCombinerClass-java.lang.Class-"><code>JobConf.setCombinerClass(Class)</code></a>, to perform local aggregation of the
intermediate outputs, which helps to cut down the amount of data transferred
from the <code>Mapper</code> to the <code>Reducer</code>.
<p>The intermediate, grouped outputs are always stored in
<a href="../../../../org/apache/hadoop/io/SequenceFile.html" title="class in org.apache.hadoop.io"><code>SequenceFile</code></a>s. Applications can specify if and how the intermediate
outputs are to be compressed and which <a href="../../../../org/apache/hadoop/io/compress/CompressionCodec.html" title="interface in org.apache.hadoop.io.compress"><code>CompressionCodec</code></a>s are to be
used via the <code>JobConf</code>.</p>
<p>If the job has
<a href="../../../../org/apache/hadoop/mapred/JobConf.html#ReducerNone">zero
reduces</a> then the output of the <code>Mapper</code> is directly written
to the <a href="../../../../org/apache/hadoop/fs/FileSystem.html" title="class in org.apache.hadoop.fs"><code>FileSystem</code></a> without grouping by keys.</p>
<p>Example:</p>
<p><blockquote><pre>
public class MyMapper&lt;K extends WritableComparable, V extends Writable&gt;
extends MapReduceBase implements Mapper&lt;K, V, K, V&gt; {
static enum MyCounters { NUM_RECORDS }
private String mapTaskId;
private String inputFile;
private int noRecords = 0;
public void configure(JobConf job) {
mapTaskId = job.get(JobContext.TASK_ATTEMPT_ID);
inputFile = job.get(JobContext.MAP_INPUT_FILE);
}
public void map(K key, V val,
OutputCollector&lt;K, V&gt; output, Reporter reporter)
throws IOException {
// Process the &lt;key, value&gt; pair (assume this takes a while)
// ...
// ...
// Let the framework know that we are alive, and kicking!
// reporter.progress();
// Process some more
// ...
// ...
// Increment the no. of &lt;key, value&gt; pairs processed
++noRecords;
// Increment counters
reporter.incrCounter(NUM_RECORDS, 1);
// Every 100 records update application-level status
if ((noRecords%100) == 0) {
reporter.setStatus(mapTaskId + " processed " + noRecords +
" from input-file: " + inputFile);
}
// Output the result
output.collect(key, val);
}
}
</pre></blockquote>
<p>Applications may write a custom <a href="../../../../org/apache/hadoop/mapred/MapRunnable.html" title="interface in org.apache.hadoop.mapred"><code>MapRunnable</code></a> to exert greater
control on map processing e.g. multi-threaded <code>Mapper</code>s etc.</p></div>
<dl>
<dt><span class="seeLabel">See Also:</span></dt>
<dd><a href="../../../../org/apache/hadoop/mapred/JobConf.html" title="class in org.apache.hadoop.mapred"><code>JobConf</code></a>,
<a href="../../../../org/apache/hadoop/mapred/InputFormat.html" title="interface in org.apache.hadoop.mapred"><code>InputFormat</code></a>,
<a href="../../../../org/apache/hadoop/mapred/Partitioner.html" title="interface in org.apache.hadoop.mapred"><code>Partitioner</code></a>,
<a href="../../../../org/apache/hadoop/mapred/Reducer.html" title="interface in org.apache.hadoop.mapred"><code>Reducer</code></a>,
<a href="../../../../org/apache/hadoop/mapred/MapReduceBase.html" title="class in org.apache.hadoop.mapred"><code>MapReduceBase</code></a>,
<a href="../../../../org/apache/hadoop/mapred/MapRunnable.html" title="interface in org.apache.hadoop.mapred"><code>MapRunnable</code></a>,
<a href="../../../../org/apache/hadoop/io/SequenceFile.html" title="class in org.apache.hadoop.io"><code>SequenceFile</code></a></dd>
</dl>
</li>
</ul>
</div>
<div class="summary">
<ul class="blockList">
<li class="blockList">
<!-- ========== METHOD SUMMARY =========== -->
<ul class="blockList">
<li class="blockList"><a name="method.summary">
<!-- -->
</a>
<h3>Method Summary</h3>
<table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Method Summary table, listing methods, and an explanation">
<caption><span id="t0" class="activeTableTab"><span>All Methods</span><span class="tabEnd">&nbsp;</span></span><span id="t2" class="tableTab"><span><a href="javascript:show(2);">Instance Methods</a></span><span class="tabEnd">&nbsp;</span></span><span id="t3" class="tableTab"><span><a href="javascript:show(4);">Abstract Methods</a></span><span class="tabEnd">&nbsp;</span></span></caption>
<tr>
<th class="colFirst" scope="col">Modifier and Type</th>
<th class="colLast" scope="col">Method and Description</th>
</tr>
<tr id="i0" class="altColor">
<td class="colFirst"><code>void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/hadoop/mapred/Mapper.html#map-K1-V1-org.apache.hadoop.mapred.OutputCollector-org.apache.hadoop.mapred.Reporter-">map</a></span>(<a href="../../../../org/apache/hadoop/mapred/Mapper.html" title="type parameter in Mapper">K1</a>&nbsp;key,
<a href="../../../../org/apache/hadoop/mapred/Mapper.html" title="type parameter in Mapper">V1</a>&nbsp;value,
<a href="../../../../org/apache/hadoop/mapred/OutputCollector.html" title="interface in org.apache.hadoop.mapred">OutputCollector</a>&lt;<a href="../../../../org/apache/hadoop/mapred/Mapper.html" title="type parameter in Mapper">K2</a>,<a href="../../../../org/apache/hadoop/mapred/Mapper.html" title="type parameter in Mapper">V2</a>&gt;&nbsp;output,
<a href="../../../../org/apache/hadoop/mapred/Reporter.html" title="interface in org.apache.hadoop.mapred">Reporter</a>&nbsp;reporter)</code>
<div class="block">Maps a single input key/value pair into an intermediate key/value pair.</div>
</td>
</tr>
</table>
<ul class="blockList">
<li class="blockList"><a name="methods.inherited.from.class.org.apache.hadoop.mapred.JobConfigurable">
<!-- -->
</a>
<h3>Methods inherited from interface&nbsp;org.apache.hadoop.mapred.<a href="../../../../org/apache/hadoop/mapred/JobConfigurable.html" title="interface in org.apache.hadoop.mapred">JobConfigurable</a></h3>
<code><a href="../../../../org/apache/hadoop/mapred/JobConfigurable.html#configure-org.apache.hadoop.mapred.JobConf-">configure</a></code></li>
</ul>
<ul class="blockList">
<li class="blockList"><a name="methods.inherited.from.class.java.io.Closeable">
<!-- -->
</a>
<h3>Methods inherited from interface&nbsp;java.io.<a href="https://docs.oracle.com/javase/8/docs/api/java/io/Closeable.html?is-external=true" title="class or interface in java.io">Closeable</a></h3>
<code><a href="https://docs.oracle.com/javase/8/docs/api/java/io/Closeable.html?is-external=true#close--" title="class or interface in java.io">close</a></code></li>
</ul>
</li>
</ul>
</li>
</ul>
</div>
<div class="details">
<ul class="blockList">
<li class="blockList">
<!-- ============ METHOD DETAIL ========== -->
<ul class="blockList">
<li class="blockList"><a name="method.detail">
<!-- -->
</a>
<h3>Method Detail</h3>
<a name="map-java.lang.Object-java.lang.Object-org.apache.hadoop.mapred.OutputCollector-org.apache.hadoop.mapred.Reporter-">
<!-- -->
</a><a name="map-K1-V1-org.apache.hadoop.mapred.OutputCollector-org.apache.hadoop.mapred.Reporter-">
<!-- -->
</a>
<ul class="blockListLast">
<li class="blockList">
<h4>map</h4>
<pre>void&nbsp;map(<a href="../../../../org/apache/hadoop/mapred/Mapper.html" title="type parameter in Mapper">K1</a>&nbsp;key,
<a href="../../../../org/apache/hadoop/mapred/Mapper.html" title="type parameter in Mapper">V1</a>&nbsp;value,
<a href="../../../../org/apache/hadoop/mapred/OutputCollector.html" title="interface in org.apache.hadoop.mapred">OutputCollector</a>&lt;<a href="../../../../org/apache/hadoop/mapred/Mapper.html" title="type parameter in Mapper">K2</a>,<a href="../../../../org/apache/hadoop/mapred/Mapper.html" title="type parameter in Mapper">V2</a>&gt;&nbsp;output,
<a href="../../../../org/apache/hadoop/mapred/Reporter.html" title="interface in org.apache.hadoop.mapred">Reporter</a>&nbsp;reporter)
throws <a href="https://docs.oracle.com/javase/8/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a></pre>
<div class="block">Maps a single input key/value pair into an intermediate key/value pair.
<p>Output pairs need not be of the same types as input pairs. A given
input pair may map to zero or many output pairs. Output pairs are
collected with calls to
<a href="../../../../org/apache/hadoop/mapred/OutputCollector.html#collect-K-V-"><code>OutputCollector.collect(Object,Object)</code></a>.</p>
<p>Applications can use the <a href="../../../../org/apache/hadoop/mapred/Reporter.html" title="interface in org.apache.hadoop.mapred"><code>Reporter</code></a> provided to report progress
or just indicate that they are alive. In scenarios where the application
takes significant amount of time to process individual key/value
pairs, this is crucial since the framework might assume that the task has
timed-out and kill that task. The other way of avoiding this is to set
<a href="../../../../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml#mapreduce.task.timeout">
mapreduce.task.timeout</a> to a high-enough value (or even zero for no
time-outs).</p></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>key</code> - the input key.</dd>
<dd><code>value</code> - the input value.</dd>
<dd><code>output</code> - collects mapped keys and values.</dd>
<dd><code>reporter</code> - facility to report progress.</dd>
<dt><span class="throwsLabel">Throws:</span></dt>
<dd><code><a href="https://docs.oracle.com/javase/8/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a></code></dd>
</dl>
</li>
</ul>
</li>
</ul>
</li>
</ul>
</div>
</div>
<!-- ========= END OF CLASS DATA ========= -->
<!-- ======= START OF BOTTOM NAVBAR ====== -->
<div class="bottomNav"><a name="navbar.bottom">
<!-- -->
</a>
<div class="skipNav"><a href="#skip.navbar.bottom" title="Skip navigation links">Skip navigation links</a></div>
<a name="navbar.bottom.firstrow">
<!-- -->
</a>
<ul class="navList" title="Navigation">
<li><a href="../../../../overview-summary.html">Overview</a></li>
<li><a href="package-summary.html">Package</a></li>
<li class="navBarCell1Rev">Class</li>
<li><a href="class-use/Mapper.html">Use</a></li>
<li><a href="package-tree.html">Tree</a></li>
<li><a href="../../../../deprecated-list.html">Deprecated</a></li>
<li><a href="../../../../index-all.html">Index</a></li>
<li><a href="../../../../help-doc.html">Help</a></li>
</ul>
</div>
<div class="subNav">
<ul class="navList">
<li><a href="../../../../org/apache/hadoop/mapred/MapFileOutputFormat.html" title="class in org.apache.hadoop.mapred"><span class="typeNameLink">Prev&nbsp;Class</span></a></li>
<li><a href="../../../../org/apache/hadoop/mapred/MapReduceBase.html" title="class in org.apache.hadoop.mapred"><span class="typeNameLink">Next&nbsp;Class</span></a></li>
</ul>
<ul class="navList">
<li><a href="../../../../index.html?org/apache/hadoop/mapred/Mapper.html" target="_top">Frames</a></li>
<li><a href="Mapper.html" target="_top">No&nbsp;Frames</a></li>
</ul>
<ul class="navList" id="allclasses_navbar_bottom">
<li><a href="../../../../allclasses-noframe.html">All&nbsp;Classes</a></li>
</ul>
<div>
<script type="text/javascript"><!--
allClassesLink = document.getElementById("allclasses_navbar_bottom");
if(window==top) {
allClassesLink.style.display = "block";
}
else {
allClassesLink.style.display = "none";
}
//-->
</script>
</div>
<div>
<ul class="subNavList">
<li>Summary:&nbsp;</li>
<li>Nested&nbsp;|&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li>Constr&nbsp;|&nbsp;</li>
<li><a href="#method.summary">Method</a></li>
</ul>
<ul class="subNavList">
<li>Detail:&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li>Constr&nbsp;|&nbsp;</li>
<li><a href="#method.detail">Method</a></li>
</ul>
</div>
<a name="skip.navbar.bottom">
<!-- -->
</a></div>
<!-- ======== END OF BOTTOM NAVBAR ======= -->
<p class="legalCopy"><small>Copyright &#169; 2021 <a href="https://www.apache.org">Apache Software Foundation</a>. All rights reserved.</small></p>
</body>
</html>