| <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> |
| <!-- NewPage --> |
| <html lang="en"> |
| <head> |
| <!-- Generated by javadoc (1.8.0_292) on Tue Jun 15 06:00:55 GMT 2021 --> |
| <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"> |
| <title>Mapper (Apache Hadoop Main 3.3.1 API)</title> |
| <meta name="date" content="2021-06-15"> |
| <link rel="stylesheet" type="text/css" href="../../../../stylesheet.css" title="Style"> |
| <script type="text/javascript" src="../../../../script.js"></script> |
| </head> |
| <body> |
| <script type="text/javascript"><!-- |
| try { |
| if (location.href.indexOf('is-external=true') == -1) { |
| parent.document.title="Mapper (Apache Hadoop Main 3.3.1 API)"; |
| } |
| } |
| catch(err) { |
| } |
| //--> |
| var methods = {"i0":6}; |
| var tabs = {65535:["t0","All Methods"],2:["t2","Instance Methods"],4:["t3","Abstract Methods"]}; |
| var altColor = "altColor"; |
| var rowColor = "rowColor"; |
| var tableTab = "tableTab"; |
| var activeTableTab = "activeTableTab"; |
| </script> |
| <noscript> |
| <div>JavaScript is disabled on your browser.</div> |
| </noscript> |
| <!-- ========= START OF TOP NAVBAR ======= --> |
| <div class="topNav"><a name="navbar.top"> |
| <!-- --> |
| </a> |
| <div class="skipNav"><a href="#skip.navbar.top" title="Skip navigation links">Skip navigation links</a></div> |
| <a name="navbar.top.firstrow"> |
| <!-- --> |
| </a> |
| <ul class="navList" title="Navigation"> |
| <li><a href="../../../../overview-summary.html">Overview</a></li> |
| <li><a href="package-summary.html">Package</a></li> |
| <li class="navBarCell1Rev">Class</li> |
| <li><a href="class-use/Mapper.html">Use</a></li> |
| <li><a href="package-tree.html">Tree</a></li> |
| <li><a href="../../../../deprecated-list.html">Deprecated</a></li> |
| <li><a href="../../../../index-all.html">Index</a></li> |
| <li><a href="../../../../help-doc.html">Help</a></li> |
| </ul> |
| </div> |
| <div class="subNav"> |
| <ul class="navList"> |
| <li><a href="../../../../org/apache/hadoop/mapred/MapFileOutputFormat.html" title="class in org.apache.hadoop.mapred"><span class="typeNameLink">Prev Class</span></a></li> |
| <li><a href="../../../../org/apache/hadoop/mapred/MapReduceBase.html" title="class in org.apache.hadoop.mapred"><span class="typeNameLink">Next Class</span></a></li> |
| </ul> |
| <ul class="navList"> |
| <li><a href="../../../../index.html?org/apache/hadoop/mapred/Mapper.html" target="_top">Frames</a></li> |
| <li><a href="Mapper.html" target="_top">No Frames</a></li> |
| </ul> |
| <ul class="navList" id="allclasses_navbar_top"> |
| <li><a href="../../../../allclasses-noframe.html">All Classes</a></li> |
| </ul> |
| <div> |
| <script type="text/javascript"><!-- |
| allClassesLink = document.getElementById("allclasses_navbar_top"); |
| if(window==top) { |
| allClassesLink.style.display = "block"; |
| } |
| else { |
| allClassesLink.style.display = "none"; |
| } |
| //--> |
| </script> |
| </div> |
| <div> |
| <ul class="subNavList"> |
| <li>Summary: </li> |
| <li>Nested | </li> |
| <li>Field | </li> |
| <li>Constr | </li> |
| <li><a href="#method.summary">Method</a></li> |
| </ul> |
| <ul class="subNavList"> |
| <li>Detail: </li> |
| <li>Field | </li> |
| <li>Constr | </li> |
| <li><a href="#method.detail">Method</a></li> |
| </ul> |
| </div> |
| <a name="skip.navbar.top"> |
| <!-- --> |
| </a></div> |
| <!-- ========= END OF TOP NAVBAR ========= --> |
| <!-- ======== START OF CLASS DATA ======== --> |
| <div class="header"> |
| <div class="subTitle">org.apache.hadoop.mapred</div> |
| <h2 title="Interface Mapper" class="title">Interface Mapper<K1,V1,K2,V2></h2> |
| </div> |
| <div class="contentContainer"> |
| <div class="description"> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <dl> |
| <dt>All Superinterfaces:</dt> |
| <dd><a href="https://docs.oracle.com/javase/8/docs/api/java/lang/AutoCloseable.html?is-external=true" title="class or interface in java.lang">AutoCloseable</a>, <a href="https://docs.oracle.com/javase/8/docs/api/java/io/Closeable.html?is-external=true" title="class or interface in java.io">Closeable</a>, <a href="../../../../org/apache/hadoop/mapred/JobConfigurable.html" title="interface in org.apache.hadoop.mapred">JobConfigurable</a></dd> |
| </dl> |
| <dl> |
| <dt>All Known Implementing Classes:</dt> |
| <dd><a href="../../../../org/apache/hadoop/mapred/lib/ChainMapper.html" title="class in org.apache.hadoop.mapred.lib">ChainMapper</a>, <a href="../../../../org/apache/hadoop/mapred/lib/FieldSelectionMapReduce.html" title="class in org.apache.hadoop.mapred.lib">FieldSelectionMapReduce</a>, <a href="../../../../org/apache/hadoop/mapred/lib/IdentityMapper.html" title="class in org.apache.hadoop.mapred.lib">IdentityMapper</a>, <a href="../../../../org/apache/hadoop/mapred/lib/InverseMapper.html" title="class in org.apache.hadoop.mapred.lib">InverseMapper</a>, <a href="../../../../org/apache/hadoop/mapred/lib/RegexMapper.html" title="class in org.apache.hadoop.mapred.lib">RegexMapper</a>, <a href="../../../../org/apache/hadoop/mapred/lib/TokenCountMapper.html" title="class in org.apache.hadoop.mapred.lib">TokenCountMapper</a>, <a href="../../../../org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorCombiner.html" title="class in org.apache.hadoop.mapred.lib.aggregate">ValueAggregatorCombiner</a>, <a href="../../../../org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorJobBase.html" title="class in org.apache.hadoop.mapred.lib.aggregate">ValueAggregatorJobBase</a>, <a href="../../../../org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorMapper.html" title="class in org.apache.hadoop.mapred.lib.aggregate">ValueAggregatorMapper</a>, <a href="../../../../org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorReducer.html" title="class in org.apache.hadoop.mapred.lib.aggregate">ValueAggregatorReducer</a></dd> |
| </dl> |
| <hr> |
| <br> |
| <pre>@InterfaceAudience.Public |
| @InterfaceStability.Stable |
| public interface <span class="typeNameLabel">Mapper<K1,V1,K2,V2></span> |
| extends <a href="../../../../org/apache/hadoop/mapred/JobConfigurable.html" title="interface in org.apache.hadoop.mapred">JobConfigurable</a>, <a href="../../../../org/apache/hadoop/io/Closeable.html" title="interface in org.apache.hadoop.io">Closeable</a></pre> |
| <div class="block">Maps input key/value pairs to a set of intermediate key/value pairs. |
| |
| <p>Maps are the individual tasks which transform input records into a |
| intermediate records. The transformed intermediate records need not be of |
| the same type as the input records. A given input pair may map to zero or |
| many output pairs.</p> |
| |
| <p>The Hadoop Map-Reduce framework spawns one map task for each |
| <a href="../../../../org/apache/hadoop/mapred/InputSplit.html" title="interface in org.apache.hadoop.mapred"><code>InputSplit</code></a> generated by the <a href="../../../../org/apache/hadoop/mapred/InputFormat.html" title="interface in org.apache.hadoop.mapred"><code>InputFormat</code></a> for the job. |
| <code>Mapper</code> implementations can access the <a href="../../../../org/apache/hadoop/mapred/JobConf.html" title="class in org.apache.hadoop.mapred"><code>JobConf</code></a> for the |
| job via the <a href="../../../../org/apache/hadoop/mapred/JobConfigurable.html#configure-org.apache.hadoop.mapred.JobConf-"><code>JobConfigurable.configure(JobConf)</code></a> and initialize |
| themselves. Similarly they can use the <a href="https://docs.oracle.com/javase/8/docs/api/java/io/Closeable.html?is-external=true#close--" title="class or interface in java.io"><code>Closeable.close()</code></a> method for |
| de-initialization.</p> |
| |
| <p>The framework then calls |
| <a href="../../../../org/apache/hadoop/mapred/Mapper.html#map-K1-V1-org.apache.hadoop.mapred.OutputCollector-org.apache.hadoop.mapred.Reporter-"><code>map(Object, Object, OutputCollector, Reporter)</code></a> |
| for each key/value pair in the <code>InputSplit</code> for that task.</p> |
| |
| <p>All intermediate values associated with a given output key are |
| subsequently grouped by the framework, and passed to a <a href="../../../../org/apache/hadoop/mapred/Reducer.html" title="interface in org.apache.hadoop.mapred"><code>Reducer</code></a> to |
| determine the final output. Users can control the grouping by specifying |
| a <code>Comparator</code> via |
| <a href="../../../../org/apache/hadoop/mapred/JobConf.html#setOutputKeyComparatorClass-java.lang.Class-"><code>JobConf.setOutputKeyComparatorClass(Class)</code></a>.</p> |
| |
| <p>The grouped <code>Mapper</code> outputs are partitioned per |
| <code>Reducer</code>. Users can control which keys (and hence records) go to |
| which <code>Reducer</code> by implementing a custom <a href="../../../../org/apache/hadoop/mapred/Partitioner.html" title="interface in org.apache.hadoop.mapred"><code>Partitioner</code></a>. |
| |
| <p>Users can optionally specify a <code>combiner</code>, via |
| <a href="../../../../org/apache/hadoop/mapred/JobConf.html#setCombinerClass-java.lang.Class-"><code>JobConf.setCombinerClass(Class)</code></a>, to perform local aggregation of the |
| intermediate outputs, which helps to cut down the amount of data transferred |
| from the <code>Mapper</code> to the <code>Reducer</code>. |
| |
| <p>The intermediate, grouped outputs are always stored in |
| <a href="../../../../org/apache/hadoop/io/SequenceFile.html" title="class in org.apache.hadoop.io"><code>SequenceFile</code></a>s. Applications can specify if and how the intermediate |
| outputs are to be compressed and which <a href="../../../../org/apache/hadoop/io/compress/CompressionCodec.html" title="interface in org.apache.hadoop.io.compress"><code>CompressionCodec</code></a>s are to be |
| used via the <code>JobConf</code>.</p> |
| |
| <p>If the job has |
| <a href="../../../../org/apache/hadoop/mapred/JobConf.html#ReducerNone">zero |
| reduces</a> then the output of the <code>Mapper</code> is directly written |
| to the <a href="../../../../org/apache/hadoop/fs/FileSystem.html" title="class in org.apache.hadoop.fs"><code>FileSystem</code></a> without grouping by keys.</p> |
| |
| <p>Example:</p> |
| <p><blockquote><pre> |
| public class MyMapper<K extends WritableComparable, V extends Writable> |
| extends MapReduceBase implements Mapper<K, V, K, V> { |
| |
| static enum MyCounters { NUM_RECORDS } |
| |
| private String mapTaskId; |
| private String inputFile; |
| private int noRecords = 0; |
| |
| public void configure(JobConf job) { |
| mapTaskId = job.get(JobContext.TASK_ATTEMPT_ID); |
| inputFile = job.get(JobContext.MAP_INPUT_FILE); |
| } |
| |
| public void map(K key, V val, |
| OutputCollector<K, V> output, Reporter reporter) |
| throws IOException { |
| // Process the <key, value> pair (assume this takes a while) |
| // ... |
| // ... |
| |
| // Let the framework know that we are alive, and kicking! |
| // reporter.progress(); |
| |
| // Process some more |
| // ... |
| // ... |
| |
| // Increment the no. of <key, value> pairs processed |
| ++noRecords; |
| |
| // Increment counters |
| reporter.incrCounter(NUM_RECORDS, 1); |
| |
| // Every 100 records update application-level status |
| if ((noRecords%100) == 0) { |
| reporter.setStatus(mapTaskId + " processed " + noRecords + |
| " from input-file: " + inputFile); |
| } |
| |
| // Output the result |
| output.collect(key, val); |
| } |
| } |
| </pre></blockquote> |
| |
| <p>Applications may write a custom <a href="../../../../org/apache/hadoop/mapred/MapRunnable.html" title="interface in org.apache.hadoop.mapred"><code>MapRunnable</code></a> to exert greater |
| control on map processing e.g. multi-threaded <code>Mapper</code>s etc.</p></div> |
| <dl> |
| <dt><span class="seeLabel">See Also:</span></dt> |
| <dd><a href="../../../../org/apache/hadoop/mapred/JobConf.html" title="class in org.apache.hadoop.mapred"><code>JobConf</code></a>, |
| <a href="../../../../org/apache/hadoop/mapred/InputFormat.html" title="interface in org.apache.hadoop.mapred"><code>InputFormat</code></a>, |
| <a href="../../../../org/apache/hadoop/mapred/Partitioner.html" title="interface in org.apache.hadoop.mapred"><code>Partitioner</code></a>, |
| <a href="../../../../org/apache/hadoop/mapred/Reducer.html" title="interface in org.apache.hadoop.mapred"><code>Reducer</code></a>, |
| <a href="../../../../org/apache/hadoop/mapred/MapReduceBase.html" title="class in org.apache.hadoop.mapred"><code>MapReduceBase</code></a>, |
| <a href="../../../../org/apache/hadoop/mapred/MapRunnable.html" title="interface in org.apache.hadoop.mapred"><code>MapRunnable</code></a>, |
| <a href="../../../../org/apache/hadoop/io/SequenceFile.html" title="class in org.apache.hadoop.io"><code>SequenceFile</code></a></dd> |
| </dl> |
| </li> |
| </ul> |
| </div> |
| <div class="summary"> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <!-- ========== METHOD SUMMARY =========== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="method.summary"> |
| <!-- --> |
| </a> |
| <h3>Method Summary</h3> |
| <table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Method Summary table, listing methods, and an explanation"> |
| <caption><span id="t0" class="activeTableTab"><span>All Methods</span><span class="tabEnd"> </span></span><span id="t2" class="tableTab"><span><a href="javascript:show(2);">Instance Methods</a></span><span class="tabEnd"> </span></span><span id="t3" class="tableTab"><span><a href="javascript:show(4);">Abstract Methods</a></span><span class="tabEnd"> </span></span></caption> |
| <tr> |
| <th class="colFirst" scope="col">Modifier and Type</th> |
| <th class="colLast" scope="col">Method and Description</th> |
| </tr> |
| <tr id="i0" class="altColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/hadoop/mapred/Mapper.html#map-K1-V1-org.apache.hadoop.mapred.OutputCollector-org.apache.hadoop.mapred.Reporter-">map</a></span>(<a href="../../../../org/apache/hadoop/mapred/Mapper.html" title="type parameter in Mapper">K1</a> key, |
| <a href="../../../../org/apache/hadoop/mapred/Mapper.html" title="type parameter in Mapper">V1</a> value, |
| <a href="../../../../org/apache/hadoop/mapred/OutputCollector.html" title="interface in org.apache.hadoop.mapred">OutputCollector</a><<a href="../../../../org/apache/hadoop/mapred/Mapper.html" title="type parameter in Mapper">K2</a>,<a href="../../../../org/apache/hadoop/mapred/Mapper.html" title="type parameter in Mapper">V2</a>> output, |
| <a href="../../../../org/apache/hadoop/mapred/Reporter.html" title="interface in org.apache.hadoop.mapred">Reporter</a> reporter)</code> |
| <div class="block">Maps a single input key/value pair into an intermediate key/value pair.</div> |
| </td> |
| </tr> |
| </table> |
| <ul class="blockList"> |
| <li class="blockList"><a name="methods.inherited.from.class.org.apache.hadoop.mapred.JobConfigurable"> |
| <!-- --> |
| </a> |
| <h3>Methods inherited from interface org.apache.hadoop.mapred.<a href="../../../../org/apache/hadoop/mapred/JobConfigurable.html" title="interface in org.apache.hadoop.mapred">JobConfigurable</a></h3> |
| <code><a href="../../../../org/apache/hadoop/mapred/JobConfigurable.html#configure-org.apache.hadoop.mapred.JobConf-">configure</a></code></li> |
| </ul> |
| <ul class="blockList"> |
| <li class="blockList"><a name="methods.inherited.from.class.java.io.Closeable"> |
| <!-- --> |
| </a> |
| <h3>Methods inherited from interface java.io.<a href="https://docs.oracle.com/javase/8/docs/api/java/io/Closeable.html?is-external=true" title="class or interface in java.io">Closeable</a></h3> |
| <code><a href="https://docs.oracle.com/javase/8/docs/api/java/io/Closeable.html?is-external=true#close--" title="class or interface in java.io">close</a></code></li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </div> |
| <div class="details"> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <!-- ============ METHOD DETAIL ========== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="method.detail"> |
| <!-- --> |
| </a> |
| <h3>Method Detail</h3> |
| <a name="map-java.lang.Object-java.lang.Object-org.apache.hadoop.mapred.OutputCollector-org.apache.hadoop.mapred.Reporter-"> |
| <!-- --> |
| </a><a name="map-K1-V1-org.apache.hadoop.mapred.OutputCollector-org.apache.hadoop.mapred.Reporter-"> |
| <!-- --> |
| </a> |
| <ul class="blockListLast"> |
| <li class="blockList"> |
| <h4>map</h4> |
| <pre>void map(<a href="../../../../org/apache/hadoop/mapred/Mapper.html" title="type parameter in Mapper">K1</a> key, |
| <a href="../../../../org/apache/hadoop/mapred/Mapper.html" title="type parameter in Mapper">V1</a> value, |
| <a href="../../../../org/apache/hadoop/mapred/OutputCollector.html" title="interface in org.apache.hadoop.mapred">OutputCollector</a><<a href="../../../../org/apache/hadoop/mapred/Mapper.html" title="type parameter in Mapper">K2</a>,<a href="../../../../org/apache/hadoop/mapred/Mapper.html" title="type parameter in Mapper">V2</a>> output, |
| <a href="../../../../org/apache/hadoop/mapred/Reporter.html" title="interface in org.apache.hadoop.mapred">Reporter</a> reporter) |
| throws <a href="https://docs.oracle.com/javase/8/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a></pre> |
| <div class="block">Maps a single input key/value pair into an intermediate key/value pair. |
| |
| <p>Output pairs need not be of the same types as input pairs. A given |
| input pair may map to zero or many output pairs. Output pairs are |
| collected with calls to |
| <a href="../../../../org/apache/hadoop/mapred/OutputCollector.html#collect-K-V-"><code>OutputCollector.collect(Object,Object)</code></a>.</p> |
| |
| <p>Applications can use the <a href="../../../../org/apache/hadoop/mapred/Reporter.html" title="interface in org.apache.hadoop.mapred"><code>Reporter</code></a> provided to report progress |
| or just indicate that they are alive. In scenarios where the application |
| takes significant amount of time to process individual key/value |
| pairs, this is crucial since the framework might assume that the task has |
| timed-out and kill that task. The other way of avoiding this is to set |
| <a href="../../../../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml#mapreduce.task.timeout"> |
| mapreduce.task.timeout</a> to a high-enough value (or even zero for no |
| time-outs).</p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>key</code> - the input key.</dd> |
| <dd><code>value</code> - the input value.</dd> |
| <dd><code>output</code> - collects mapped keys and values.</dd> |
| <dd><code>reporter</code> - facility to report progress.</dd> |
| <dt><span class="throwsLabel">Throws:</span></dt> |
| <dd><code><a href="https://docs.oracle.com/javase/8/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a></code></dd> |
| </dl> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </div> |
| </div> |
| <!-- ========= END OF CLASS DATA ========= --> |
| <!-- ======= START OF BOTTOM NAVBAR ====== --> |
| <div class="bottomNav"><a name="navbar.bottom"> |
| <!-- --> |
| </a> |
| <div class="skipNav"><a href="#skip.navbar.bottom" title="Skip navigation links">Skip navigation links</a></div> |
| <a name="navbar.bottom.firstrow"> |
| <!-- --> |
| </a> |
| <ul class="navList" title="Navigation"> |
| <li><a href="../../../../overview-summary.html">Overview</a></li> |
| <li><a href="package-summary.html">Package</a></li> |
| <li class="navBarCell1Rev">Class</li> |
| <li><a href="class-use/Mapper.html">Use</a></li> |
| <li><a href="package-tree.html">Tree</a></li> |
| <li><a href="../../../../deprecated-list.html">Deprecated</a></li> |
| <li><a href="../../../../index-all.html">Index</a></li> |
| <li><a href="../../../../help-doc.html">Help</a></li> |
| </ul> |
| </div> |
| <div class="subNav"> |
| <ul class="navList"> |
| <li><a href="../../../../org/apache/hadoop/mapred/MapFileOutputFormat.html" title="class in org.apache.hadoop.mapred"><span class="typeNameLink">Prev Class</span></a></li> |
| <li><a href="../../../../org/apache/hadoop/mapred/MapReduceBase.html" title="class in org.apache.hadoop.mapred"><span class="typeNameLink">Next Class</span></a></li> |
| </ul> |
| <ul class="navList"> |
| <li><a href="../../../../index.html?org/apache/hadoop/mapred/Mapper.html" target="_top">Frames</a></li> |
| <li><a href="Mapper.html" target="_top">No Frames</a></li> |
| </ul> |
| <ul class="navList" id="allclasses_navbar_bottom"> |
| <li><a href="../../../../allclasses-noframe.html">All Classes</a></li> |
| </ul> |
| <div> |
| <script type="text/javascript"><!-- |
| allClassesLink = document.getElementById("allclasses_navbar_bottom"); |
| if(window==top) { |
| allClassesLink.style.display = "block"; |
| } |
| else { |
| allClassesLink.style.display = "none"; |
| } |
| //--> |
| </script> |
| </div> |
| <div> |
| <ul class="subNavList"> |
| <li>Summary: </li> |
| <li>Nested | </li> |
| <li>Field | </li> |
| <li>Constr | </li> |
| <li><a href="#method.summary">Method</a></li> |
| </ul> |
| <ul class="subNavList"> |
| <li>Detail: </li> |
| <li>Field | </li> |
| <li>Constr | </li> |
| <li><a href="#method.detail">Method</a></li> |
| </ul> |
| </div> |
| <a name="skip.navbar.bottom"> |
| <!-- --> |
| </a></div> |
| <!-- ======== END OF BOTTOM NAVBAR ======= --> |
| <p class="legalCopy"><small>Copyright © 2021 <a href="https://www.apache.org">Apache Software Foundation</a>. All rights reserved.</small></p> |
| </body> |
| </html> |