| <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> |
| <!-- NewPage --> |
| <html lang="en"> |
| <head> |
| <!-- Generated by javadoc (1.8.0_292) on Tue Jun 15 06:00:55 GMT 2021 --> |
| <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"> |
| <title>InputFormat (Apache Hadoop Main 3.3.1 API)</title> |
| <meta name="date" content="2021-06-15"> |
| <link rel="stylesheet" type="text/css" href="../../../../stylesheet.css" title="Style"> |
| <script type="text/javascript" src="../../../../script.js"></script> |
| </head> |
| <body> |
| <script type="text/javascript"><!-- |
| try { |
| if (location.href.indexOf('is-external=true') == -1) { |
| parent.document.title="InputFormat (Apache Hadoop Main 3.3.1 API)"; |
| } |
| } |
| catch(err) { |
| } |
| //--> |
| var methods = {"i0":6,"i1":6}; |
| var tabs = {65535:["t0","All Methods"],2:["t2","Instance Methods"],4:["t3","Abstract Methods"]}; |
| var altColor = "altColor"; |
| var rowColor = "rowColor"; |
| var tableTab = "tableTab"; |
| var activeTableTab = "activeTableTab"; |
| </script> |
| <noscript> |
| <div>JavaScript is disabled on your browser.</div> |
| </noscript> |
| <!-- ========= START OF TOP NAVBAR ======= --> |
| <div class="topNav"><a name="navbar.top"> |
| <!-- --> |
| </a> |
| <div class="skipNav"><a href="#skip.navbar.top" title="Skip navigation links">Skip navigation links</a></div> |
| <a name="navbar.top.firstrow"> |
| <!-- --> |
| </a> |
| <ul class="navList" title="Navigation"> |
| <li><a href="../../../../overview-summary.html">Overview</a></li> |
| <li><a href="package-summary.html">Package</a></li> |
| <li class="navBarCell1Rev">Class</li> |
| <li><a href="class-use/InputFormat.html">Use</a></li> |
| <li><a href="package-tree.html">Tree</a></li> |
| <li><a href="../../../../deprecated-list.html">Deprecated</a></li> |
| <li><a href="../../../../index-all.html">Index</a></li> |
| <li><a href="../../../../help-doc.html">Help</a></li> |
| </ul> |
| </div> |
| <div class="subNav"> |
| <ul class="navList"> |
| <li><a href="../../../../org/apache/hadoop/mapred/ID.html" title="class in org.apache.hadoop.mapred"><span class="typeNameLink">Prev Class</span></a></li> |
| <li><a href="../../../../org/apache/hadoop/mapred/InputSplit.html" title="interface in org.apache.hadoop.mapred"><span class="typeNameLink">Next Class</span></a></li> |
| </ul> |
| <ul class="navList"> |
| <li><a href="../../../../index.html?org/apache/hadoop/mapred/InputFormat.html" target="_top">Frames</a></li> |
| <li><a href="InputFormat.html" target="_top">No Frames</a></li> |
| </ul> |
| <ul class="navList" id="allclasses_navbar_top"> |
| <li><a href="../../../../allclasses-noframe.html">All Classes</a></li> |
| </ul> |
| <div> |
| <script type="text/javascript"><!-- |
| allClassesLink = document.getElementById("allclasses_navbar_top"); |
| if(window==top) { |
| allClassesLink.style.display = "block"; |
| } |
| else { |
| allClassesLink.style.display = "none"; |
| } |
| //--> |
| </script> |
| </div> |
| <div> |
| <ul class="subNavList"> |
| <li>Summary: </li> |
| <li>Nested | </li> |
| <li>Field | </li> |
| <li>Constr | </li> |
| <li><a href="#method.summary">Method</a></li> |
| </ul> |
| <ul class="subNavList"> |
| <li>Detail: </li> |
| <li>Field | </li> |
| <li>Constr | </li> |
| <li><a href="#method.detail">Method</a></li> |
| </ul> |
| </div> |
| <a name="skip.navbar.top"> |
| <!-- --> |
| </a></div> |
| <!-- ========= END OF TOP NAVBAR ========= --> |
| <!-- ======== START OF CLASS DATA ======== --> |
| <div class="header"> |
| <div class="subTitle">org.apache.hadoop.mapred</div> |
| <h2 title="Interface InputFormat" class="title">Interface InputFormat<K,V></h2> |
| </div> |
| <div class="contentContainer"> |
| <div class="description"> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <dl> |
| <dt>All Known Subinterfaces:</dt> |
| <dd><a href="../../../../org/apache/hadoop/mapred/join/ComposableInputFormat.html" title="interface in org.apache.hadoop.mapred.join">ComposableInputFormat</a><K,V></dd> |
| </dl> |
| <dl> |
| <dt>All Known Implementing Classes:</dt> |
| <dd><a href="../../../../org/apache/hadoop/mapred/lib/CombineFileInputFormat.html" title="class in org.apache.hadoop.mapred.lib">CombineFileInputFormat</a>, <a href="../../../../org/apache/hadoop/mapred/lib/CombineSequenceFileInputFormat.html" title="class in org.apache.hadoop.mapred.lib">CombineSequenceFileInputFormat</a>, <a href="../../../../org/apache/hadoop/mapred/lib/CombineTextInputFormat.html" title="class in org.apache.hadoop.mapred.lib">CombineTextInputFormat</a>, <a href="../../../../org/apache/hadoop/mapred/join/CompositeInputFormat.html" title="class in org.apache.hadoop.mapred.join">CompositeInputFormat</a>, <a href="../../../../org/apache/hadoop/mapred/lib/db/DBInputFormat.html" title="class in org.apache.hadoop.mapred.lib.db">DBInputFormat</a>, <a href="../../../../org/apache/hadoop/mapred/FileInputFormat.html" title="class in org.apache.hadoop.mapred">FileInputFormat</a>, <a href="../../../../org/apache/hadoop/mapred/FixedLengthInputFormat.html" title="class in org.apache.hadoop.mapred">FixedLengthInputFormat</a>, <a href="../../../../org/apache/hadoop/mapred/KeyValueTextInputFormat.html" title="class in org.apache.hadoop.mapred">KeyValueTextInputFormat</a>, <a href="../../../../org/apache/hadoop/mapred/MultiFileInputFormat.html" title="class in org.apache.hadoop.mapred">MultiFileInputFormat</a>, <a href="../../../../org/apache/hadoop/mapred/lib/NLineInputFormat.html" title="class in org.apache.hadoop.mapred.lib">NLineInputFormat</a>, <a href="../../../../org/apache/hadoop/mapred/join/Parser.Node.html" title="class in org.apache.hadoop.mapred.join">Parser.Node</a>, <a href="../../../../org/apache/hadoop/mapred/SequenceFileAsBinaryInputFormat.html" title="class in org.apache.hadoop.mapred">SequenceFileAsBinaryInputFormat</a>, <a href="../../../../org/apache/hadoop/mapred/SequenceFileAsTextInputFormat.html" title="class in org.apache.hadoop.mapred">SequenceFileAsTextInputFormat</a>, <a href="../../../../org/apache/hadoop/mapred/SequenceFileInputFilter.html" title="class in org.apache.hadoop.mapred">SequenceFileInputFilter</a>, <a href="../../../../org/apache/hadoop/mapred/SequenceFileInputFormat.html" title="class in org.apache.hadoop.mapred">SequenceFileInputFormat</a>, <a href="../../../../org/apache/hadoop/mapred/TextInputFormat.html" title="class in org.apache.hadoop.mapred">TextInputFormat</a></dd> |
| </dl> |
| <hr> |
| <br> |
| <pre>@InterfaceAudience.Public |
| @InterfaceStability.Stable |
| public interface <span class="typeNameLabel">InputFormat<K,V></span></pre> |
| <div class="block"><code>InputFormat</code> describes the input-specification for a |
| Map-Reduce job. |
| |
| <p>The Map-Reduce framework relies on the <code>InputFormat</code> of the |
| job to:<p> |
| <ol> |
| <li> |
| Validate the input-specification of the job. |
| <li> |
| Split-up the input file(s) into logical <a href="../../../../org/apache/hadoop/mapred/InputSplit.html" title="interface in org.apache.hadoop.mapred"><code>InputSplit</code></a>s, each of |
| which is then assigned to an individual <a href="../../../../org/apache/hadoop/mapred/Mapper.html" title="interface in org.apache.hadoop.mapred"><code>Mapper</code></a>. |
| </li> |
| <li> |
| Provide the <a href="../../../../org/apache/hadoop/mapred/RecordReader.html" title="interface in org.apache.hadoop.mapred"><code>RecordReader</code></a> implementation to be used to glean |
| input records from the logical <code>InputSplit</code> for processing by |
| the <a href="../../../../org/apache/hadoop/mapred/Mapper.html" title="interface in org.apache.hadoop.mapred"><code>Mapper</code></a>. |
| </li> |
| </ol> |
| |
| <p>The default behavior of file-based <a href="../../../../org/apache/hadoop/mapred/InputFormat.html" title="interface in org.apache.hadoop.mapred"><code>InputFormat</code></a>s, typically |
| sub-classes of <a href="../../../../org/apache/hadoop/mapred/FileInputFormat.html" title="class in org.apache.hadoop.mapred"><code>FileInputFormat</code></a>, is to split the |
| input into <i>logical</i> <a href="../../../../org/apache/hadoop/mapred/InputSplit.html" title="interface in org.apache.hadoop.mapred"><code>InputSplit</code></a>s based on the total size, in |
| bytes, of the input files. However, the <a href="../../../../org/apache/hadoop/fs/FileSystem.html" title="class in org.apache.hadoop.fs"><code>FileSystem</code></a> blocksize of |
| the input files is treated as an upper bound for input splits. A lower bound |
| on the split size can be set via |
| <a href="../../../../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml#mapreduce.input.fileinputformat.split.minsize"> |
| mapreduce.input.fileinputformat.split.minsize</a>.</p> |
| |
| <p>Clearly, logical splits based on input-size is insufficient for many |
| applications since record boundaries are to be respected. In such cases, the |
| application has to also implement a <a href="../../../../org/apache/hadoop/mapred/RecordReader.html" title="interface in org.apache.hadoop.mapred"><code>RecordReader</code></a> on whom lies the |
| responsibilty to respect record-boundaries and present a record-oriented |
| view of the logical <code>InputSplit</code> to the individual task.</div> |
| <dl> |
| <dt><span class="seeLabel">See Also:</span></dt> |
| <dd><a href="../../../../org/apache/hadoop/mapred/InputSplit.html" title="interface in org.apache.hadoop.mapred"><code>InputSplit</code></a>, |
| <a href="../../../../org/apache/hadoop/mapred/RecordReader.html" title="interface in org.apache.hadoop.mapred"><code>RecordReader</code></a>, |
| <a href="../../../../org/apache/hadoop/mapred/JobClient.html" title="class in org.apache.hadoop.mapred"><code>JobClient</code></a>, |
| <a href="../../../../org/apache/hadoop/mapred/FileInputFormat.html" title="class in org.apache.hadoop.mapred"><code>FileInputFormat</code></a></dd> |
| </dl> |
| </li> |
| </ul> |
| </div> |
| <div class="summary"> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <!-- ========== METHOD SUMMARY =========== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="method.summary"> |
| <!-- --> |
| </a> |
| <h3>Method Summary</h3> |
| <table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Method Summary table, listing methods, and an explanation"> |
| <caption><span id="t0" class="activeTableTab"><span>All Methods</span><span class="tabEnd"> </span></span><span id="t2" class="tableTab"><span><a href="javascript:show(2);">Instance Methods</a></span><span class="tabEnd"> </span></span><span id="t3" class="tableTab"><span><a href="javascript:show(4);">Abstract Methods</a></span><span class="tabEnd"> </span></span></caption> |
| <tr> |
| <th class="colFirst" scope="col">Modifier and Type</th> |
| <th class="colLast" scope="col">Method and Description</th> |
| </tr> |
| <tr id="i0" class="altColor"> |
| <td class="colFirst"><code><a href="../../../../org/apache/hadoop/mapred/RecordReader.html" title="interface in org.apache.hadoop.mapred">RecordReader</a><<a href="../../../../org/apache/hadoop/mapred/InputFormat.html" title="type parameter in InputFormat">K</a>,<a href="../../../../org/apache/hadoop/mapred/InputFormat.html" title="type parameter in InputFormat">V</a>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/hadoop/mapred/InputFormat.html#getRecordReader-org.apache.hadoop.mapred.InputSplit-org.apache.hadoop.mapred.JobConf-org.apache.hadoop.mapred.Reporter-">getRecordReader</a></span>(<a href="../../../../org/apache/hadoop/mapred/InputSplit.html" title="interface in org.apache.hadoop.mapred">InputSplit</a> split, |
| <a href="../../../../org/apache/hadoop/mapred/JobConf.html" title="class in org.apache.hadoop.mapred">JobConf</a> job, |
| <a href="../../../../org/apache/hadoop/mapred/Reporter.html" title="interface in org.apache.hadoop.mapred">Reporter</a> reporter)</code> |
| <div class="block">Get the <a href="../../../../org/apache/hadoop/mapred/RecordReader.html" title="interface in org.apache.hadoop.mapred"><code>RecordReader</code></a> for the given <a href="../../../../org/apache/hadoop/mapred/InputSplit.html" title="interface in org.apache.hadoop.mapred"><code>InputSplit</code></a>.</div> |
| </td> |
| </tr> |
| <tr id="i1" class="rowColor"> |
| <td class="colFirst"><code><a href="../../../../org/apache/hadoop/mapred/InputSplit.html" title="interface in org.apache.hadoop.mapred">InputSplit</a>[]</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/hadoop/mapred/InputFormat.html#getSplits-org.apache.hadoop.mapred.JobConf-int-">getSplits</a></span>(<a href="../../../../org/apache/hadoop/mapred/JobConf.html" title="class in org.apache.hadoop.mapred">JobConf</a> job, |
| int numSplits)</code> |
| <div class="block">Logically split the set of input files for the job.</div> |
| </td> |
| </tr> |
| </table> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </div> |
| <div class="details"> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <!-- ============ METHOD DETAIL ========== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="method.detail"> |
| <!-- --> |
| </a> |
| <h3>Method Detail</h3> |
| <a name="getSplits-org.apache.hadoop.mapred.JobConf-int-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>getSplits</h4> |
| <pre><a href="../../../../org/apache/hadoop/mapred/InputSplit.html" title="interface in org.apache.hadoop.mapred">InputSplit</a>[] getSplits(<a href="../../../../org/apache/hadoop/mapred/JobConf.html" title="class in org.apache.hadoop.mapred">JobConf</a> job, |
| int numSplits) |
| throws <a href="https://docs.oracle.com/javase/8/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a></pre> |
| <div class="block">Logically split the set of input files for the job. |
| |
| <p>Each <a href="../../../../org/apache/hadoop/mapred/InputSplit.html" title="interface in org.apache.hadoop.mapred"><code>InputSplit</code></a> is then assigned to an individual <a href="../../../../org/apache/hadoop/mapred/Mapper.html" title="interface in org.apache.hadoop.mapred"><code>Mapper</code></a> |
| for processing.</p> |
| |
| <p><i>Note</i>: The split is a <i>logical</i> split of the inputs and the |
| input files are not physically split into chunks. For e.g. a split could |
| be <i><input-file-path, start, offset></i> tuple.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>job</code> - job configuration.</dd> |
| <dd><code>numSplits</code> - the desired number of splits, a hint.</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>an array of <a href="../../../../org/apache/hadoop/mapred/InputSplit.html" title="interface in org.apache.hadoop.mapred"><code>InputSplit</code></a>s for the job.</dd> |
| <dt><span class="throwsLabel">Throws:</span></dt> |
| <dd><code><a href="https://docs.oracle.com/javase/8/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a></code></dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="getRecordReader-org.apache.hadoop.mapred.InputSplit-org.apache.hadoop.mapred.JobConf-org.apache.hadoop.mapred.Reporter-"> |
| <!-- --> |
| </a> |
| <ul class="blockListLast"> |
| <li class="blockList"> |
| <h4>getRecordReader</h4> |
| <pre><a href="../../../../org/apache/hadoop/mapred/RecordReader.html" title="interface in org.apache.hadoop.mapred">RecordReader</a><<a href="../../../../org/apache/hadoop/mapred/InputFormat.html" title="type parameter in InputFormat">K</a>,<a href="../../../../org/apache/hadoop/mapred/InputFormat.html" title="type parameter in InputFormat">V</a>> getRecordReader(<a href="../../../../org/apache/hadoop/mapred/InputSplit.html" title="interface in org.apache.hadoop.mapred">InputSplit</a> split, |
| <a href="../../../../org/apache/hadoop/mapred/JobConf.html" title="class in org.apache.hadoop.mapred">JobConf</a> job, |
| <a href="../../../../org/apache/hadoop/mapred/Reporter.html" title="interface in org.apache.hadoop.mapred">Reporter</a> reporter) |
| throws <a href="https://docs.oracle.com/javase/8/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a></pre> |
| <div class="block">Get the <a href="../../../../org/apache/hadoop/mapred/RecordReader.html" title="interface in org.apache.hadoop.mapred"><code>RecordReader</code></a> for the given <a href="../../../../org/apache/hadoop/mapred/InputSplit.html" title="interface in org.apache.hadoop.mapred"><code>InputSplit</code></a>. |
| |
| <p>It is the responsibility of the <code>RecordReader</code> to respect |
| record boundaries while processing the logical split to present a |
| record-oriented view to the individual task.</p></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>split</code> - the <a href="../../../../org/apache/hadoop/mapred/InputSplit.html" title="interface in org.apache.hadoop.mapred"><code>InputSplit</code></a></dd> |
| <dd><code>job</code> - the job that this split belongs to</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>a <a href="../../../../org/apache/hadoop/mapred/RecordReader.html" title="interface in org.apache.hadoop.mapred"><code>RecordReader</code></a></dd> |
| <dt><span class="throwsLabel">Throws:</span></dt> |
| <dd><code><a href="https://docs.oracle.com/javase/8/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a></code></dd> |
| </dl> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </div> |
| </div> |
| <!-- ========= END OF CLASS DATA ========= --> |
| <!-- ======= START OF BOTTOM NAVBAR ====== --> |
| <div class="bottomNav"><a name="navbar.bottom"> |
| <!-- --> |
| </a> |
| <div class="skipNav"><a href="#skip.navbar.bottom" title="Skip navigation links">Skip navigation links</a></div> |
| <a name="navbar.bottom.firstrow"> |
| <!-- --> |
| </a> |
| <ul class="navList" title="Navigation"> |
| <li><a href="../../../../overview-summary.html">Overview</a></li> |
| <li><a href="package-summary.html">Package</a></li> |
| <li class="navBarCell1Rev">Class</li> |
| <li><a href="class-use/InputFormat.html">Use</a></li> |
| <li><a href="package-tree.html">Tree</a></li> |
| <li><a href="../../../../deprecated-list.html">Deprecated</a></li> |
| <li><a href="../../../../index-all.html">Index</a></li> |
| <li><a href="../../../../help-doc.html">Help</a></li> |
| </ul> |
| </div> |
| <div class="subNav"> |
| <ul class="navList"> |
| <li><a href="../../../../org/apache/hadoop/mapred/ID.html" title="class in org.apache.hadoop.mapred"><span class="typeNameLink">Prev Class</span></a></li> |
| <li><a href="../../../../org/apache/hadoop/mapred/InputSplit.html" title="interface in org.apache.hadoop.mapred"><span class="typeNameLink">Next Class</span></a></li> |
| </ul> |
| <ul class="navList"> |
| <li><a href="../../../../index.html?org/apache/hadoop/mapred/InputFormat.html" target="_top">Frames</a></li> |
| <li><a href="InputFormat.html" target="_top">No Frames</a></li> |
| </ul> |
| <ul class="navList" id="allclasses_navbar_bottom"> |
| <li><a href="../../../../allclasses-noframe.html">All Classes</a></li> |
| </ul> |
| <div> |
| <script type="text/javascript"><!-- |
| allClassesLink = document.getElementById("allclasses_navbar_bottom"); |
| if(window==top) { |
| allClassesLink.style.display = "block"; |
| } |
| else { |
| allClassesLink.style.display = "none"; |
| } |
| //--> |
| </script> |
| </div> |
| <div> |
| <ul class="subNavList"> |
| <li>Summary: </li> |
| <li>Nested | </li> |
| <li>Field | </li> |
| <li>Constr | </li> |
| <li><a href="#method.summary">Method</a></li> |
| </ul> |
| <ul class="subNavList"> |
| <li>Detail: </li> |
| <li>Field | </li> |
| <li>Constr | </li> |
| <li><a href="#method.detail">Method</a></li> |
| </ul> |
| </div> |
| <a name="skip.navbar.bottom"> |
| <!-- --> |
| </a></div> |
| <!-- ======== END OF BOTTOM NAVBAR ======= --> |
| <p class="legalCopy"><small>Copyright © 2021 <a href="https://www.apache.org">Apache Software Foundation</a>. All rights reserved.</small></p> |
| </body> |
| </html> |