name="hadoop-mapred 0.22.0"
<package name="org.apache.hadoop.filecache">
<!-- start class org.apache.hadoop.filecache.DistributedCache -->
<class name="DistributedCache" extends="org.apache.hadoop.mapreduce.filecache.DistributedCache"
static="false" final="false" visibility="public"
deprecated="Use methods on {@link Job}.">
<constructor name="DistributedCache"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Distribute application-specific large, read-only files efficiently.
<p><code>DistributedCache</code> is a facility provided by the Map-Reduce
framework to cache files (text, archives, jars etc.) needed by applications.
<p>Applications specify the files, via urls (hdfs:// or http://) to be cached
via the {@link org.apache.hadoop.mapred.JobConf}. The
<code>DistributedCache</code> assumes that the files specified via urls are
already present on the {@link FileSystem} at the path specified by the url
and are accessible by every machine in the cluster.</p>
<p>The framework will copy the necessary files on to the slave node before
any tasks for the job are executed on that node. Its efficiency stems from
the fact that the files are only copied once per job and the ability to
cache archives which are un-archived on the slaves.</p>
<p><code>DistributedCache</code> can be used to distribute simple, read-only
data/text files and/or more complex types such as archives, jars etc.
Archives (zip, tar and tgz/tar.gz files) are un-archived at the slave nodes.
Jars may be optionally added to the classpath of the tasks, a rudimentary
software distribution mechanism. Files have execution permissions.
Optionally users can also direct it to symlink the distributed cache file(s)
into the working directory of the task.</p>
<p><code>DistributedCache</code> tracks modification timestamps of the cache
files. Clearly the cache files should not be modified by the application
or externally while the job is executing.</p>
<p>Here is an illustrative example on how to use the
// Setting up the cache for the application
1. Copy the requisite files to the <code>FileSystem</code>:
$ bin/hadoop fs -copyFromLocal lookup.dat /myapp/lookup.dat
$ bin/hadoop fs -copyFromLocal /myapp/
$ bin/hadoop fs -copyFromLocal mylib.jar /myapp/mylib.jar
$ bin/hadoop fs -copyFromLocal mytar.tar /myapp/mytar.tar
$ bin/hadoop fs -copyFromLocal mytgz.tgz /myapp/mytgz.tgz
$ bin/hadoop fs -copyFromLocal mytargz.tar.gz /myapp/mytargz.tar.gz
2. Setup the application's <code>JobConf</code>:
JobConf job = new JobConf();
DistributedCache.addCacheFile(new URI("/myapp/lookup.dat#lookup.dat"),
DistributedCache.addCacheArchive(new URI("/myapp/", job);
DistributedCache.addFileToClassPath(new Path("/myapp/mylib.jar"), job);
DistributedCache.addCacheArchive(new URI("/myapp/mytar.tar", job);
DistributedCache.addCacheArchive(new URI("/myapp/mytgz.tgz", job);
DistributedCache.addCacheArchive(new URI("/myapp/mytargz.tar.gz", job);
3. Use the cached files in the {@link org.apache.hadoop.mapred.Mapper}
or {@link org.apache.hadoop.mapred.Reducer}:
public static class MapClass extends MapReduceBase
implements Mapper&lt;K, V, K, V&gt; {
private Path[] localArchives;
private Path[] localFiles;
public void configure(JobConf job) {
// Get the cached archives/files
localArchives = DistributedCache.getLocalCacheArchives(job);
localFiles = DistributedCache.getLocalCacheFiles(job);
public void map(K key, V value,
OutputCollector&lt;K, V&gt; output, Reporter reporter)
throws IOException {
// Use data from the cached archives/files here
// ...
// ...
output.collect(k, v);
It is also very common to use the DistributedCache by using
{@link org.apache.hadoop.util.GenericOptionsParser}.
This class includes methods that should be used by users
(specifically those mentioned in the example above, as well
as {@link DistributedCache#addArchiveToClassPath(Path, Configuration)}),
as well as methods intended for use by the MapReduce framework
(e.g., {@link org.apache.hadoop.mapred.JobClient}). For implementation
details, see {@link TrackerDistributedCacheManager} and
{@link TaskDistributedCacheManager}.
@see org.apache.hadoop.mapred.JobConf
@see org.apache.hadoop.mapred.JobClient
@see org.apache.hadoop.mapreduce.Job
@deprecated Use methods on {@link Job}.]]>
<!-- end class org.apache.hadoop.filecache.DistributedCache -->
<package name="org.apache.hadoop.mapred">
<!-- start class org.apache.hadoop.mapred.ClusterStatus -->
<class name="ClusterStatus" extends="java.lang.Object"
static="false" final="false" visibility="public"
deprecated="Use {@link ClusterMetrics} or {@link TaskTrackerInfo} instead">
<implements name=""/>
<method name="getTaskTrackers" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Get the number of task trackers in the cluster.
@return the number of task trackers in the cluster.]]>
<method name="getActiveTrackerNames" return="java.util.Collection"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Get the names of task trackers in the cluster.
@return the active task trackers in the cluster.]]>
<method name="getBlacklistedTrackerNames" return="java.util.Collection"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Get the names of task trackers in the cluster.
@return the blacklisted task trackers in the cluster.]]>
<method name="getBlacklistedTrackers" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Get the number of blacklisted task trackers in the cluster.
@return the number of blacklisted task trackers in the cluster.]]>
<method name="getNumExcludedNodes" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Get the number of excluded hosts in the cluster.
@return the number of excluded hosts in the cluster.]]>
<method name="getTTExpiryInterval" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Get the tasktracker expiry interval for the cluster
@return the expiry interval in msec]]>
<method name="getMapTasks" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Get the number of currently running map tasks in the cluster.
@return the number of currently running map tasks in the cluster.]]>
<method name="getReduceTasks" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Get the number of currently running reduce tasks in the cluster.
@return the number of currently running reduce tasks in the cluster.]]>
<method name="getMaxMapTasks" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Get the maximum capacity for running map tasks in the cluster.
@return the maximum capacity for running map tasks in the cluster.]]>
<method name="getMaxReduceTasks" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Get the maximum capacity for running reduce tasks in the cluster.
@return the maximum capacity for running reduce tasks in the cluster.]]>
<method name="getJobTrackerState" return="org.apache.hadoop.mapred.JobTracker.State"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="Use {@link #getJobTrackerStatus()} instead.">
<![CDATA[Get the current state of the <code>JobTracker</code>,
as {@link JobTracker.State}
@return the current state of the <code>JobTracker</code>.
@deprecated Use {@link #getJobTrackerStatus()} instead.]]>
<method name="getJobTrackerStatus" return="org.apache.hadoop.mapreduce.Cluster.JobTrackerStatus"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Get the JobTracker's status.
@return {@link JobTrackerStatus} of the JobTracker]]>
<method name="getBlackListedTrackersInfo" return="java.util.Collection"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Gets the list of blacklisted trackers along with reasons for blacklisting.
@return the collection of {@link BlackListInfo} objects.]]>
<method name="write"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="out" type=""/>
<exception name="IOException" type=""/>
<method name="readFields"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type=""/>
<exception name="IOException" type=""/>
<![CDATA[Status information on the current state of the Map-Reduce cluster.
<p><code>ClusterStatus</code> provides clients with information such as:
Size of the cluster.
Name of the trackers.
Task capacity of the cluster.
The number of currently running map & reduce tasks.
State of the <code>JobTracker</code>.
Details regarding black listed trackers.
<p>Clients can query for the latest <code>ClusterStatus</code>, via
{@link JobClient#getClusterStatus()}.</p>
@see JobClient
@deprecated Use {@link ClusterMetrics} or {@link TaskTrackerInfo} instead]]>
<!-- end class org.apache.hadoop.mapred.ClusterStatus -->
<!-- start class org.apache.hadoop.mapred.ClusterStatus.BlackListInfo -->
<class name="ClusterStatus.BlackListInfo" extends="java.lang.Object"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<implements name=""/>
<method name="getTrackerName" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Gets the blacklisted tasktracker's name.
@return tracker's name.]]>
<method name="getReasonForBlackListing" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Gets the reason for which the tasktracker was blacklisted.
@return reason which tracker was blacklisted]]>
<method name="getBlackListReport" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Gets a descriptive report about why the tasktracker was blacklisted.
@return report describing why the tasktracker was blacklisted.]]>
<method name="readFields"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type=""/>
<exception name="IOException" type=""/>
<method name="write"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="out" type=""/>
<exception name="IOException" type=""/>
<method name="toString" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Class which encapsulates information about a blacklisted tasktracker.
The information includes the tasktracker's name and reasons for
getting blacklisted. The toString method of the class will print
the information in a whitespace separated fashion to enable parsing.]]>
<!-- end class org.apache.hadoop.mapred.ClusterStatus.BlackListInfo -->
<!-- start class org.apache.hadoop.mapred.Counters -->
<class name="Counters" extends="java.lang.Object"
static="false" final="false" visibility="public"
deprecated="Use {@link org.apache.hadoop.mapreduce.Counters} instead.">
<implements name=""/>
<implements name="java.lang.Iterable"/>
<constructor name="Counters"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<method name="getGroupNames" return="java.util.Collection"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Returns the names of all counter classes.
@return Set of counter names.]]>
<method name="iterator" return="java.util.Iterator"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<method name="getGroup" return="org.apache.hadoop.mapred.Counters.Group"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="groupName" type="java.lang.String"/>
<![CDATA[Returns the named counter group, or an empty group if there is none
with the specified name.]]>
<method name="findCounter" return="org.apache.hadoop.mapred.Counters.Counter"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Enum"/>
<![CDATA[Find the counter for the given enum. The same enum will always return the
same counter.
@param key the counter key
@return the matching counter object]]>
<method name="findCounter" return="org.apache.hadoop.mapred.Counters.Counter"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="group" type="java.lang.String"/>
<param name="name" type="java.lang.String"/>
<![CDATA[Find a counter given the group and the name.
@param group the name of the group
@param name the internal name of the counter
@return the counter for that name]]>
<method name="findCounter" return="org.apache.hadoop.mapred.Counters.Counter"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="deprecated, no comment">
<param name="group" type="java.lang.String"/>
<param name="id" type="int"/>
<param name="name" type="java.lang.String"/>
<![CDATA[Find a counter by using strings
@param group the name of the group
@param id the id of the counter within the group (0 to N-1)
@param name the internal name of the counter
@return the counter for that name
<method name="incrCounter"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Enum"/>
<param name="amount" type="long"/>
<![CDATA[Increments the specified counter by the specified amount, creating it if
it didn't already exist.
@param key identifies a counter
@param amount amount by which counter is to be incremented]]>
<method name="incrCounter"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="group" type="java.lang.String"/>
<param name="counter" type="java.lang.String"/>
<param name="amount" type="long"/>
<![CDATA[Increments the specified counter by the specified amount, creating it if
it didn't already exist.
@param group the name of the group
@param counter the internal name of the counter
@param amount amount by which counter is to be incremented]]>
<method name="getCounter" return="long"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Enum"/>
<![CDATA[Returns current value of the specified counter, or 0 if the counter
does not exist.]]>
<method name="incrAllCounters"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="other" type="org.apache.hadoop.mapred.Counters"/>
<![CDATA[Increments multiple counters by their amounts in another Counters
@param other the other Counters instance]]>
<method name="sum" return="org.apache.hadoop.mapred.Counters"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="a" type="org.apache.hadoop.mapred.Counters"/>
<param name="b" type="org.apache.hadoop.mapred.Counters"/>
<![CDATA[Convenience method for computing the sum of two sets of counters.]]>
<method name="size" return="int"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Returns the total number of counters, by summing the number of counters
in each group.]]>
<method name="write"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="out" type=""/>
<exception name="IOException" type=""/>
<![CDATA[Write the set of groups.
The external format is:
#groups (groupName group)*
i.e. the number of groups followed by 0 or more groups, where each
group is of the form:
groupDisplayName #counters (false | true counter)*
where each counter is of the form:
name (false | true displayName) value]]>
<method name="readFields"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type=""/>
<exception name="IOException" type=""/>
<![CDATA[Read a set of groups.]]>
<method name="log"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="log" type="org.apache.commons.logging.Log"/>
<![CDATA[Logs the current counter values.
@param log The log to use.]]>
<method name="toString" return="java.lang.String"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Return textual representation of the counter values.]]>
<method name="makeCompactString" return="java.lang.String"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Convert a counters object into a single line that is easy to parse.
@return the string with "name=value" for each counter and separated by ","]]>
<method name="makeEscapedCompactString" return="java.lang.String"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Represent the counter in a textual format that can be converted back to
its object form
@return the string in the following format
<method name="fromEscapedCompactString" return="org.apache.hadoop.mapred.Counters"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="compactString" type="java.lang.String"/>
<exception name="ParseException" type="java.text.ParseException"/>
<![CDATA[Convert a stringified counter representation into a counter object. Note
that the counter can be recovered if its stringified using
{@link #makeEscapedCompactString()}.
@return a Counter]]>
<method name="hashCode" return="int"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<method name="equals" return="boolean"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="obj" type="java.lang.Object"/>
<![CDATA[A set of named counters.
<p><code>Counters</code> represent global counters, defined either by the
Map-Reduce framework or applications. Each <code>Counter</code> can be of
any {@link Enum} type.</p>
<p><code>Counters</code> are bunched into {@link Group}s, each comprising of
counters from a particular <code>Enum</code> class.
@deprecated Use {@link org.apache.hadoop.mapreduce.Counters} instead.]]>
<!-- end class org.apache.hadoop.mapred.Counters -->
<!-- start class org.apache.hadoop.mapred.Counters.Counter -->
<class name="Counters.Counter" extends="org.apache.hadoop.mapreduce.Counter"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<method name="setDisplayName"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="newName" type="java.lang.String"/>
<method name="makeEscapedCompactString" return="java.lang.String"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Returns the compact stringified version of the counter in the format
<method name="getCounter" return="long"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[What is the current value of this counter?
@return the current value]]>
<![CDATA[A counter record, comprising its name and value.]]>
<!-- end class org.apache.hadoop.mapred.Counters.Counter -->
<!-- start class org.apache.hadoop.mapred.Counters.Group -->
<class name="Counters.Group" extends="java.lang.Object"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<implements name=""/>
<implements name="java.lang.Iterable"/>
<method name="getName" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Returns raw name of the group. This is the name of the enum class
for this group of counters.]]>
<method name="getDisplayName" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Returns localized name of the group. This is the same as getName() by
default, but different if an appropriate ResourceBundle is found.]]>
<method name="setDisplayName"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="displayName" type="java.lang.String"/>
<![CDATA[Set the display name]]>
<method name="makeEscapedCompactString" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Returns the compact stringified version of the group in the format
{(actual-name)(display-name)(value)[][][]} where [] are compact strings for the
counters within.]]>
<method name="hashCode" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<method name="equals" return="boolean"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="obj" type="java.lang.Object"/>
<![CDATA[Checks for (content) equality of Groups]]>
<method name="getCounter" return="long"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="counterName" type="java.lang.String"/>
<![CDATA[Returns the value of the specified counter, or 0 if the counter does
not exist.]]>
<method name="getCounter" return="org.apache.hadoop.mapred.Counters.Counter"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="use {@link #getCounter(String)} instead">
<param name="id" type="int"/>
<param name="name" type="java.lang.String"/>
<![CDATA[Get the counter for the given id and create it if it doesn't exist.
@param id the numeric id of the counter within the group
@param name the internal counter name
@return the counter
@deprecated use {@link #getCounter(String)} instead]]>
<method name="getCounterForName" return="org.apache.hadoop.mapred.Counters.Counter"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="name" type="java.lang.String"/>
<![CDATA[Get the counter for the given name and create it if it doesn't exist.
@param name the internal counter name
@return the counter]]>
<method name="size" return="int"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Returns the number of counters in this group.]]>
<method name="write"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="out" type=""/>
<exception name="IOException" type=""/>
<method name="readFields"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type=""/>
<exception name="IOException" type=""/>
<method name="iterator" return="java.util.Iterator"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[<code>Group</code> of counters, comprising of counters from a particular
counter {@link Enum} class.
<p><code>Group</code>handles localization of the class name and the
counter names.</p>]]>
<!-- end class org.apache.hadoop.mapred.Counters.Group -->
<!-- start class org.apache.hadoop.mapred.FileAlreadyExistsException -->
<class name="FileAlreadyExistsException" extends=""
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="FileAlreadyExistsException"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="FileAlreadyExistsException" type="java.lang.String"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Used when target file already exists for any operation and
is not configured to be overwritten.]]>
<!-- end class org.apache.hadoop.mapred.FileAlreadyExistsException -->
<!-- start class org.apache.hadoop.mapred.FileInputFormat -->
<class name="FileInputFormat" extends="java.lang.Object"
static="false" final="false" visibility="public"
deprecated="Use {@link org.apache.hadoop.mapreduce.lib.input.FileInputFormat}
<implements name="org.apache.hadoop.mapred.InputFormat"/>
<constructor name="FileInputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<method name="setMinSplitSize"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="minSplitSize" type="long"/>
<method name="isSplitable" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="fs" type="org.apache.hadoop.fs.FileSystem"/>
<param name="filename" type="org.apache.hadoop.fs.Path"/>
<![CDATA[Is the given filename splitable? Usually, true, but if the file is
stream compressed, it will not be.
<code>FileInputFormat</code> implementations can override this and return
<code>false</code> to ensure that individual input files are never split-up
so that {@link Mapper}s process entire files.
@param fs the file system that the file is on
@param filename the file name to check
@return is this file splitable?]]>
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapred.InputSplit"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type=""/>
<method name="setInputPathFilter"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="filter" type="java.lang.Class"/>
<![CDATA[Set a PathFilter to be applied to the input paths for the map-reduce job.
@param filter the PathFilter class use for filtering the input paths.]]>
<method name="getInputPathFilter" return="org.apache.hadoop.fs.PathFilter"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<![CDATA[Get a PathFilter instance of the filter set for the input paths.
@return the PathFilter instance set for the job, NULL if none has been set.]]>
<method name="addInputPathRecursively"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="result" type="java.util.List"/>
<param name="fs" type="org.apache.hadoop.fs.FileSystem"/>
<param name="path" type="org.apache.hadoop.fs.Path"/>
<param name="inputFilter" type="org.apache.hadoop.fs.PathFilter"/>
<exception name="IOException" type=""/>
<![CDATA[Add files in the input path recursively into the results.
@param result
The List to store all files.
@param fs
The FileSystem.
@param path
The input path.
@param inputFilter
The input filter that can be used to filter files/dirs.
@throws IOException]]>
<method name="listStatus" return="org.apache.hadoop.fs.FileStatus[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<exception name="IOException" type=""/>
<![CDATA[List input directories.
Subclasses may override to, e.g., select only files matching a regular
@param job the job to list input paths for
@return array of FileStatus objects
@throws IOException if zero items.]]>
<method name="makeSplit" return="org.apache.hadoop.mapred.FileSplit"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="file" type="org.apache.hadoop.fs.Path"/>
<param name="start" type="long"/>
<param name="length" type="long"/>
<param name="hosts" type="java.lang.String[]"/>
<![CDATA[A factory that makes the split for this class. It can be overridden
by sub-classes to make sub-types]]>
<method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="numSplits" type="int"/>
<exception name="IOException" type=""/>
<![CDATA[Splits files returned by {@link #listStatus(JobConf)} when
they're too big.]]>
<method name="computeSplitSize" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="goalSize" type="long"/>
<param name="minSize" type="long"/>
<param name="blockSize" type="long"/>
<method name="getBlockIndex" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="blkLocations" type="org.apache.hadoop.fs.BlockLocation[]"/>
<param name="offset" type="long"/>
<method name="setInputPaths"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="commaSeparatedPaths" type="java.lang.String"/>
<![CDATA[Sets the given comma separated paths as the list of inputs
for the map-reduce job.
@param conf Configuration of the job
@param commaSeparatedPaths Comma separated paths to be set as
the list of inputs for the map-reduce job.]]>
<method name="addInputPaths"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="commaSeparatedPaths" type="java.lang.String"/>
<![CDATA[Add the given comma separated paths to the list of inputs for
the map-reduce job.
@param conf The configuration of the job
@param commaSeparatedPaths Comma separated paths to be added to
the list of inputs for the map-reduce job.]]>
<method name="setInputPaths"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="inputPaths" type="org.apache.hadoop.fs.Path[]"/>
<![CDATA[Set the array of {@link Path}s as the list of inputs
for the map-reduce job.
@param conf Configuration of the job.
@param inputPaths the {@link Path}s of the input directories/files
for the map-reduce job.]]>
<method name="addInputPath"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="path" type="org.apache.hadoop.fs.Path"/>
<![CDATA[Add a {@link Path} to the list of inputs for the map-reduce job.
@param conf The configuration of the job
@param path {@link Path} to be added to the list of inputs for
the map-reduce job.]]>
<method name="getInputPaths" return="org.apache.hadoop.fs.Path[]"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<![CDATA[Get the list of input {@link Path}s for the map-reduce job.
@param conf The configuration of the job
@return the list of input {@link Path}s for the map-reduce job.]]>
<method name="getSplitHosts" return="java.lang.String[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="blkLocations" type="org.apache.hadoop.fs.BlockLocation[]"/>
<param name="offset" type="long"/>
<param name="splitSize" type="long"/>
<param name="clusterMap" type=""/>
<exception name="IOException" type=""/>
<![CDATA[This function identifies and returns the hosts that contribute
most for a given split. For calculating the contribution, rack
locality is treated on par with host locality, so hosts from racks
that contribute the most are preferred over hosts on racks that
contribute less
@param blkLocations The list of block locations
@param offset
@param splitSize
@return array of hosts that contribute most to this split
@throws IOException]]>
<field name="LOG" type="org.apache.commons.logging.Log"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<field name="NUM_INPUT_FILES" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<![CDATA[A base class for file-based {@link InputFormat}.
<p><code>FileInputFormat</code> is the base class for all file-based
<code>InputFormat</code>s. This provides a generic implementation of
{@link #getSplits(JobConf, int)}.
Subclasses of <code>FileInputFormat</code> can also override the
{@link #isSplitable(FileSystem, Path)} method to ensure input-files are
not split-up and are processed as a whole by {@link Mapper}s.
@deprecated Use {@link org.apache.hadoop.mapreduce.lib.input.FileInputFormat}
<!-- end class org.apache.hadoop.mapred.FileInputFormat -->
<!-- start class org.apache.hadoop.mapred.FileOutputCommitter -->
<class name="FileOutputCommitter" extends="org.apache.hadoop.mapred.OutputCommitter"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="FileOutputCommitter"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<method name="setupJob"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapred.JobContext"/>
<exception name="IOException" type=""/>
<method name="commitJob"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapred.JobContext"/>
<exception name="IOException" type=""/>
<method name="cleanupJob"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapred.JobContext"/>
<exception name="IOException" type=""/>
<method name="abortJob"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapred.JobContext"/>
<param name="runState" type="int"/>
<exception name="IOException" type=""/>
<method name="setupTask"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapred.TaskAttemptContext"/>
<exception name="IOException" type=""/>
<method name="commitTask"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapred.TaskAttemptContext"/>
<exception name="IOException" type=""/>
<method name="abortTask"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapred.TaskAttemptContext"/>
<exception name="IOException" type=""/>
<method name="needsTaskCommit" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapred.TaskAttemptContext"/>
<exception name="IOException" type=""/>
<field name="LOG" type="org.apache.commons.logging.Log"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<field name="TEMP_DIR_NAME" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<![CDATA[Temporary directory name]]>
<field name="SUCCEEDED_FILE_NAME" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<![CDATA[An {@link OutputCommitter} that commits files specified
in job output directory i.e. ${mapreduce.output.fileoutputformat.outputdir}.]]>
<!-- end class org.apache.hadoop.mapred.FileOutputCommitter -->
<!-- start class org.apache.hadoop.mapred.FileOutputFormat -->
<class name="FileOutputFormat" extends="java.lang.Object"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.OutputFormat"/>
<constructor name="FileOutputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<method name="setCompressOutput"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="compress" type="boolean"/>
<![CDATA[Set whether the output of the job is compressed.
@param conf the {@link JobConf} to modify
@param compress should the output of the job be compressed?]]>
<method name="getCompressOutput" return="boolean"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<![CDATA[Is the job output compressed?
@param conf the {@link JobConf} to look in
@return <code>true</code> if the job output should be compressed,
<code>false</code> otherwise]]>
<method name="setOutputCompressorClass"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="codecClass" type="java.lang.Class"/>
<![CDATA[Set the {@link CompressionCodec} to be used to compress job outputs.
@param conf the {@link JobConf} to modify
@param codecClass the {@link CompressionCodec} to be used to
compress the job outputs]]>
<method name="getOutputCompressorClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="defaultValue" type="java.lang.Class"/>
<![CDATA[Get the {@link CompressionCodec} for compressing the job outputs.
@param conf the {@link JobConf} to look in
@param defaultValue the {@link CompressionCodec} to return if not set
@return the {@link CompressionCodec} to be used to compress the
job outputs
@throws IllegalArgumentException if the class was specified, but not found]]>
<method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="name" type="java.lang.String"/>
<param name="progress" type="org.apache.hadoop.util.Progressable"/>
<exception name="IOException" type=""/>
<method name="checkOutputSpecs"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<exception name="FileAlreadyExistsException" type="org.apache.hadoop.mapred.FileAlreadyExistsException"/>
<exception name="InvalidJobConfException" type="org.apache.hadoop.mapred.InvalidJobConfException"/>
<exception name="IOException" type=""/>
<method name="setOutputPath"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="outputDir" type="org.apache.hadoop.fs.Path"/>
<![CDATA[Set the {@link Path} of the output directory for the map-reduce job.
@param conf The configuration of the job.
@param outputDir the {@link Path} of the output directory for
the map-reduce job.]]>
<method name="getOutputPath" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<![CDATA[Get the {@link Path} to the output directory for the map-reduce job.
@return the {@link Path} to the output directory for the map-reduce job.
@see FileOutputFormat#getWorkOutputPath(JobConf)]]>
<method name="getWorkOutputPath" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<![CDATA[Get the {@link Path} to the task's temporary output directory
for the map-reduce job
<h4 id="SideEffectFiles">Tasks' Side-Effect Files</h4>
<p><i>Note:</i> The following is valid only if the {@link OutputCommitter}
is {@link FileOutputCommitter}. If <code>OutputCommitter</code> is not
a <code>FileOutputCommitter</code>, the task's temporary output
directory is same as {@link #getOutputPath(JobConf)} i.e.
<p>Some applications need to create/write-to side-files, which differ from
the actual job-outputs.
<p>In such cases there could be issues with 2 instances of the same TIP
(running simultaneously e.g. speculative tasks) trying to open/write-to the
same file (path) on HDFS. Hence the application-writer will have to pick
unique names per task-attempt (e.g. using the attemptid, say
<tt>attempt_200709221812_0001_m_000000_0</tt>), not just per TIP.</p>
<p>To get around this the Map-Reduce framework helps the application-writer
out by maintaining a special
sub-directory for each task-attempt on HDFS where the output of the
task-attempt goes. On successful completion of the task-attempt the files
in the <tt>${mapreduce.output.fileoutputformat.outputdir}/_temporary/_${taskid}</tt> (only)
are <i>promoted</i> to <tt>${mapreduce.output.fileoutputformat.outputdir}</tt>. Of course, the
framework discards the sub-directory of unsuccessful task-attempts. This
is completely transparent to the application.</p>
<p>The application-writer can take advantage of this by creating any
side-files required in <tt>${mapreduce.task.output.dir}</tt> during execution
of his reduce-task i.e. via {@link #getWorkOutputPath(JobConf)}, and the
framework will move them out similarly - thus she doesn't have to pick
unique paths per task-attempt.</p>
<p><i>Note</i>: the value of <tt>${mapreduce.task.output.dir}</tt> during
execution of a particular task-attempt is actually
<tt>${mapreduce.output.fileoutputformat.outputdir}/_temporary/_{$taskid}</tt>, and this value is
set by the map-reduce framework. So, just create any side-files in the
path returned by {@link #getWorkOutputPath(JobConf)} from map/reduce
task to take advantage of this feature.</p>
<p>The entire discussion holds true for maps of jobs with
reducer=NONE (i.e. 0 reduces) since output of the map, in that case,
goes directly to HDFS.</p>
@return the {@link Path} to the task's temporary output directory
for the map-reduce job.]]>
<method name="getTaskOutputPath" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="name" type="java.lang.String"/>
<exception name="IOException" type=""/>
<![CDATA[Helper function to create the task's temporary output directory and
return the path to the task's output file.
@param conf job-configuration
@param name temporary task-output filename
@return path to the task's temporary output file
@throws IOException]]>
<method name="getUniqueName" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="name" type="java.lang.String"/>
<![CDATA[Helper function to generate a name that is unique for the task.
<p>The generated name can be used to create custom files from within the
different tasks for the job, the names for different tasks will not collide
with each other.</p>
<p>The given name is postfixed with the task type, 'm' for maps, 'r' for
reduces and the task partition number. For example, give a name 'test'
running on the first map o the job the generated name will be
@param conf the configuration for the job.
@param name the name to make unique.
@return a unique name accross all tasks of the job.]]>
<method name="getPathForCustomFile" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="name" type="java.lang.String"/>
<![CDATA[Helper function to generate a {@link Path} for a file that is unique for
the task within the job output directory.
<p>The path can be used to create custom files from within the map and
reduce tasks. The path name will be unique for each task. The path parent
will be the job output directory.</p>ls
<p>This method uses the {@link #getUniqueName} method to make the file name
unique for the task.</p>
@param conf the configuration for the job.
@param name the name for the file.
@return a unique path accross all tasks of the job.]]>
<![CDATA[A base class for {@link OutputFormat}.]]>
<!-- end class org.apache.hadoop.mapred.FileOutputFormat -->
<!-- start class org.apache.hadoop.mapred.FileSplit -->
<class name="FileSplit" extends="org.apache.hadoop.mapreduce.InputSplit"
static="false" final="false" visibility="public"
deprecated="Use {@link org.apache.hadoop.mapreduce.lib.input.FileSplit}
<implements name="org.apache.hadoop.mapred.InputSplit"/>
<constructor name="FileSplit"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<constructor name="FileSplit" type="org.apache.hadoop.fs.Path, long, long, org.apache.hadoop.mapred.JobConf"
static="false" final="false" visibility="public"
deprecated="deprecated, no comment">
<![CDATA[Constructs a split.
@param file the file name
@param start the position of the first byte in the file to process
@param length the number of bytes in the file to process]]>
<constructor name="FileSplit" type="org.apache.hadoop.fs.Path, long, long, java.lang.String[]"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Constructs a split with host information
@param file the file name
@param start the position of the first byte in the file to process
@param length the number of bytes in the file to process
@param hosts the list of hosts containing the block, possibly null]]>
<constructor name="FileSplit" type="org.apache.hadoop.mapreduce.lib.input.FileSplit"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<method name="getPath" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[The file containing this split's data.]]>
<method name="getStart" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[The position of the first byte in the file to process.]]>
<method name="getLength" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[The number of bytes in the file to process.]]>
<method name="toString" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<method name="write"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="out" type=""/>
<exception name="IOException" type=""/>
<method name="readFields"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type=""/>
<exception name="IOException" type=""/>
<method name="getLocations" return="java.lang.String[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type=""/>
<![CDATA[A section of an input file. Returned by {@link
InputFormat#getSplits(JobConf, int)} and passed to
{@link InputFormat#getRecordReader(InputSplit,JobConf,Reporter)}.
@deprecated Use {@link org.apache.hadoop.mapreduce.lib.input.FileSplit}
<!-- end class org.apache.hadoop.mapred.FileSplit -->
<!-- start class org.apache.hadoop.mapred.ID -->
<class name="ID" extends="org.apache.hadoop.mapreduce.ID"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="ID" type="int"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[constructs an ID object from the given int]]>
<constructor name="ID"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<![CDATA[A general identifier, which internally stores the id
as an integer. This is the super class of {@link JobID},
{@link TaskID} and {@link TaskAttemptID}.
@see JobID
@see TaskID
@see TaskAttemptID]]>
<!-- end class org.apache.hadoop.mapred.ID -->
<!-- start interface org.apache.hadoop.mapred.InputFormat -->
<interface name="InputFormat" abstract="true"
static="false" final="false" visibility="public"
deprecated="Use {@link org.apache.hadoop.mapreduce.InputFormat} instead.">
<method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="numSplits" type="int"/>
<exception name="IOException" type=""/>
<![CDATA[Logically split the set of input files for the job.
<p>Each {@link InputSplit} is then assigned to an individual {@link Mapper}
for processing.</p>
<p><i>Note</i>: The split is a <i>logical</i> split of the inputs and the
input files are not physically split into chunks. For e.g. a split could
be <i>&lt;input-file-path, start, offset&gt;</i> tuple.
@param job job configuration.
@param numSplits the desired number of splits, a hint.
@return an array of {@link InputSplit}s for the job.]]>
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapred.InputSplit"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type=""/>
<![CDATA[Get the {@link RecordReader} for the given {@link InputSplit}.
<p>It is the responsibility of the <code>RecordReader</code> to respect
record boundaries while processing the logical split to present a
record-oriented view to the individual task.</p>
@param split the {@link InputSplit}
@param job the job that this split belongs to
@return a {@link RecordReader}]]>
<![CDATA[<code>InputFormat</code> describes the input-specification for a
Map-Reduce job.
<p>The Map-Reduce framework relies on the <code>InputFormat</code> of the
job to:<p>
Validate the input-specification of the job.
Split-up the input file(s) into logical {@link InputSplit}s, each of
which is then assigned to an individual {@link Mapper}.
Provide the {@link RecordReader} implementation to be used to glean
input records from the logical <code>InputSplit</code> for processing by
the {@link Mapper}.
<p>The default behavior of file-based {@link InputFormat}s, typically
sub-classes of {@link FileInputFormat}, is to split the
input into <i>logical</i> {@link InputSplit}s based on the total size, in
bytes, of the input files. However, the {@link FileSystem} blocksize of
the input files is treated as an upper bound for input splits. A lower bound
on the split size can be set via
<a href="{@docRoot}/../mapred-default.html#mapreduce.input.fileinputformat.split.minsize">
<p>Clearly, logical splits based on input-size is insufficient for many
applications since record boundaries are to respected. In such cases, the
application has to also implement a {@link RecordReader} on whom lies the
responsibilty to respect record-boundaries and present a record-oriented
view of the logical <code>InputSplit</code> to the individual task.
@see InputSplit
@see RecordReader
@see JobClient
@see FileInputFormat
@deprecated Use {@link org.apache.hadoop.mapreduce.InputFormat} instead.]]>
<!-- end interface org.apache.hadoop.mapred.InputFormat -->
<!-- start interface org.apache.hadoop.mapred.InputSplit -->
<interface name="InputSplit" abstract="true"
static="false" final="false" visibility="public"
deprecated="Use {@link org.apache.hadoop.mapreduce.InputSplit} instead.">
<implements name=""/>
<method name="getLength" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type=""/>
<![CDATA[Get the total number of bytes in the data of the <code>InputSplit</code>.
@return the number of bytes in the input split.
@throws IOException]]>
<method name="getLocations" return="java.lang.String[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type=""/>
<![CDATA[Get the list of hostnames where the input split is located.
@return list of hostnames where data of the <code>InputSplit</code> is
located as an array of <code>String</code>s.
@throws IOException]]>
<![CDATA[<code>InputSplit</code> represents the data to be processed by an
individual {@link Mapper}.
<p>Typically, it presents a byte-oriented view on the input and is the
responsibility of {@link RecordReader} of the job to process this and present
a record-oriented view.
@see InputFormat
@see RecordReader
@deprecated Use {@link org.apache.hadoop.mapreduce.InputSplit} instead.]]>
<!-- end interface org.apache.hadoop.mapred.InputSplit -->
<!-- start class org.apache.hadoop.mapred.InvalidFileTypeException -->
<class name="InvalidFileTypeException" extends=""
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="InvalidFileTypeException"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="InvalidFileTypeException" type="java.lang.String"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Used when file type differs from the desired file type. like
getting a file when a directory is expected. Or a wrong file type.]]>
<!-- end class org.apache.hadoop.mapred.InvalidFileTypeException -->
<!-- start class org.apache.hadoop.mapred.InvalidInputException -->
<class name="InvalidInputException" extends=""
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="InvalidInputException" type="java.util.List"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Create the exception with the given list.
@param probs the list of problems to report. this list is not copied.]]>
<method name="getProblems" return="java.util.List"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Get the complete list of the problems reported.
@return the list of problems, which must not be modified]]>
<method name="getMessage" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Get a summary message of the problems found.
@return the concatenated messages from all of the problems.]]>
<![CDATA[This class wraps a list of problems with the input, so that the user
can get a list of problems together instead of finding and fixing them one
by one.]]>
<!-- end class org.apache.hadoop.mapred.InvalidInputException -->
<!-- start class org.apache.hadoop.mapred.InvalidJobConfException -->
<class name="InvalidJobConfException" extends=""
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="InvalidJobConfException"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="InvalidJobConfException" type="java.lang.String"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[This exception is thrown when jobconf misses some mendatory attributes
or value of some attributes is invalid.]]>
<!-- end class org.apache.hadoop.mapred.InvalidJobConfException -->
<!-- start class org.apache.hadoop.mapred.IsolationRunner -->
<class name="IsolationRunner" extends="java.lang.Object"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="IsolationRunner"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<method name="main"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="args" type="java.lang.String[]"/>
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
<exception name="IOException" type=""/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<![CDATA[Run a single task.
@param args the first argument is the task directory]]>
<![CDATA[IsolationRunner is intended to facilitate debugging by re-running a specific
task, given left-over task files for a (typically failed) past job.
Currently, it is limited to re-running map tasks.
Users may coerce MapReduce to keep task files around by setting
mapreduce.task.files.preserve.failedtasks. See mapred_tutorial.xml for more documentation.]]>
<!-- end class org.apache.hadoop.mapred.IsolationRunner -->
<!-- start class org.apache.hadoop.mapred.JobClient -->
<class name="JobClient" extends=""
static="false" final="false" visibility="public"
deprecated="Use {@link Job} and {@link Cluster} instead">
<constructor name="JobClient"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Create a job client.]]>
<constructor name="JobClient" type="org.apache.hadoop.mapred.JobConf"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type=""/>
<![CDATA[Build a job client with the given {@link JobConf}, and connect to the
default {@link JobTracker}.
@param conf the job configuration.
@throws IOException]]>
<constructor name="JobClient" type="org.apache.hadoop.conf.Configuration"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type=""/>
<![CDATA[Build a job client with the given {@link Configuration},
and connect to the default {@link JobTracker}.
@param conf the configuration.
@throws IOException]]>
<constructor name="JobClient" type=", org.apache.hadoop.conf.Configuration"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type=""/>
<![CDATA[Build a job client, connect to the indicated job tracker.
@param jobTrackAddr the job tracker to connect to.
@param conf configuration.]]>
<method name="init"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<exception name="IOException" type=""/>
<![CDATA[Connect to the default {@link JobTracker}.
@param conf the job configuration.
@throws IOException]]>
<method name="close"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type=""/>
<![CDATA[Close the <code>JobClient</code>.]]>
<method name="getFs" return="org.apache.hadoop.fs.FileSystem"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type=""/>
<![CDATA[Get a filesystem handle. We need this to prepare jobs
for submission to the MapReduce system.
@return the filesystem handle.]]>
<method name="getClusterHandle" return="org.apache.hadoop.mapreduce.Cluster"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Get a handle to the Cluster]]>
<method name="submitJob" return="org.apache.hadoop.mapred.RunningJob"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobFile" type="java.lang.String"/>
<exception name="FileNotFoundException" type=""/>
<exception name="InvalidJobConfException" type="org.apache.hadoop.mapred.InvalidJobConfException"/>
<exception name="IOException" type=""/>
<![CDATA[Submit a job to the MR system.
This returns a handle to the {@link RunningJob} which can be used to track
the running-job.
@param jobFile the job configuration.
@return a handle to the {@link RunningJob} which can be used to track the
@throws FileNotFoundException
@throws InvalidJobConfException
@throws IOException]]>
<method name="submitJob" return="org.apache.hadoop.mapred.RunningJob"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<exception name="FileNotFoundException" type=""/>
<exception name="IOException" type=""/>
<![CDATA[Submit a job to the MR system.
This returns a handle to the {@link RunningJob} which can be used to track
the running-job.
@param conf the job configuration.
@return a handle to the {@link RunningJob} which can be used to track the
@throws FileNotFoundException
@throws IOException]]>
<method name="getJob" return="org.apache.hadoop.mapred.RunningJob"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobid" type="org.apache.hadoop.mapred.JobID"/>
<exception name="IOException" type=""/>
<![CDATA[Get an {@link RunningJob} object to track an ongoing job. Returns
null if the id does not correspond to any known job.
@param jobid the jobid of the job.
@return the {@link RunningJob} handle to track the job, null if the
<code>jobid</code> doesn't correspond to any known job.
@throws IOException]]>
<method name="getJob" return="org.apache.hadoop.mapred.RunningJob"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="Applications should rather use {@link #getJob(JobID)}.">
<param name="jobid" type="java.lang.String"/>
<exception name="IOException" type=""/>
<![CDATA[@deprecated Applications should rather use {@link #getJob(JobID)}.]]>
<method name="getMapTaskReports" return="org.apache.hadoop.mapred.TaskReport[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobId" type="org.apache.hadoop.mapred.JobID"/>
<exception name="IOException" type=""/>
<![CDATA[Get the information of the current state of the map tasks of a job.
@param jobId the job to query.
@return the list of all of the map tips.
@throws IOException]]>
<method name="getMapTaskReports" return="org.apache.hadoop.mapred.TaskReport[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="Applications should rather use {@link #getMapTaskReports(JobID)}">
<param name="jobId" type="java.lang.String"/>
<exception name="IOException" type=""/>
<![CDATA[@deprecated Applications should rather use {@link #getMapTaskReports(JobID)}]]>
<method name="getReduceTaskReports" return="org.apache.hadoop.mapred.TaskReport[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobId" type="org.apache.hadoop.mapred.JobID"/>
<exception name="IOException" type=""/>
<![CDATA[Get the information of the current state of the reduce tasks of a job.
@param jobId the job to query.
@return the list of all of the reduce tips.
@throws IOException]]>
<method name="getCleanupTaskReports" return="org.apache.hadoop.mapred.TaskReport[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobId" type="org.apache.hadoop.mapred.JobID"/>
<exception name="IOException" type=""/>
<![CDATA[Get the information of the current state of the cleanup tasks of a job.
@param jobId the job to query.
@return the list of all of the cleanup tips.
@throws IOException]]>
<method name="getSetupTaskReports" return="org.apache.hadoop.mapred.TaskReport[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobId" type="org.apache.hadoop.mapred.JobID"/>
<exception name="IOException" type=""/>
<![CDATA[Get the information of the current state of the setup tasks of a job.
@param jobId the job to query.
@return the list of all of the setup tips.
@throws IOException]]>
<method name="getReduceTaskReports" return="org.apache.hadoop.mapred.TaskReport[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="Applications should rather use {@link #getReduceTaskReports(JobID)}">
<param name="jobId" type="java.lang.String"/>
<exception name="IOException" type=""/>
<![CDATA[@deprecated Applications should rather use {@link #getReduceTaskReports(JobID)}]]>
<method name="displayTasks"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobId" type="org.apache.hadoop.mapred.JobID"/>
<param name="type" type="java.lang.String"/>
<param name="state" type="java.lang.String"/>
<exception name="IOException" type=""/>
<![CDATA[Display the information about a job's tasks, of a particular type and
in a particular state
@param jobId the ID of the job
@param type the type of the task (map/reduce/setup/cleanup)
@param state the state of the task
<method name="getClusterStatus" return="org.apache.hadoop.mapred.ClusterStatus"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type=""/>
<![CDATA[Get status information about the Map-Reduce cluster.
@return the status information about the Map-Reduce cluster as an object
of {@link ClusterStatus}.
@throws IOException]]>
<method name="getClusterStatus" return="org.apache.hadoop.mapred.ClusterStatus"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="detailed" type="boolean"/>
<exception name="IOException" type=""/>
<![CDATA[Get status information about the Map-Reduce cluster.
@param detailed if true then get a detailed status including the
tracker names
@return the status information about the Map-Reduce cluster as an object
of {@link ClusterStatus}.
@throws IOException]]>
<method name="jobsToComplete" return="org.apache.hadoop.mapred.JobStatus[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type=""/>
<![CDATA[Get the jobs that are not completed and not failed.
@return array of {@link JobStatus} for the running/to-be-run jobs.
@throws IOException]]>
<method name="getAllJobs" return="org.apache.hadoop.mapred.JobStatus[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type=""/>
<![CDATA[Get the jobs that are submitted.
@return array of {@link JobStatus} for the submitted jobs.
@throws IOException]]>
<method name="runJob" return="org.apache.hadoop.mapred.RunningJob"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<exception name="IOException" type=""/>
<![CDATA[Utility that submits a job, then polls for progress until the job is
@param job the job configuration.
@throws IOException if the job fails]]>
<method name="monitorAndPrintJob" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="job" type="org.apache.hadoop.mapred.RunningJob"/>
<exception name="IOException" type=""/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<![CDATA[Monitor a job and print status in real-time as progress is made and tasks
@param conf the job's configuration
@param job the job to track
@return true if the job succeeded
@throws IOException if communication to the JobTracker fails]]>
<method name="setTaskOutputFilter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="newValue" type="org.apache.hadoop.mapred.JobClient.TaskStatusFilter"/>
<![CDATA[Sets the output filter for tasks. only those tasks are printed whose
output matches the filter.
@param newValue task filter.]]>
<method name="getTaskOutputFilter" return="org.apache.hadoop.mapred.JobClient.TaskStatusFilter"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<![CDATA[Get the task output filter out of the JobConf.
@param job the JobConf to examine.
@return the filter level.]]>
<method name="setTaskOutputFilter"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="newValue" type="org.apache.hadoop.mapred.JobClient.TaskStatusFilter"/>
<![CDATA[Modify the JobConf to set the task output filter.
@param job the JobConf to modify.
@param newValue the value to set.]]>
<method name="getTaskOutputFilter" return="org.apache.hadoop.mapred.JobClient.TaskStatusFilter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Returns task output filter.
@return task filter.]]>
<method name="getCounter" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="cntrs" type="org.apache.hadoop.mapreduce.Counters"/>
<param name="counterGroupName" type="java.lang.String"/>
<param name="counterName" type="java.lang.String"/>
<exception name="IOException" type=""/>
<method name="getDefaultMaps" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type=""/>
<![CDATA[Get status information about the max available Maps in the cluster.
@return the max available Maps in the cluster
@throws IOException]]>
<method name="getDefaultReduces" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type=""/>
<![CDATA[Get status information about the max available Reduces in the cluster.
@return the max available Reduces in the cluster
@throws IOException]]>
<method name="getSystemDir" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Grab the jobtracker system directory path where job-specific files are to be placed.
@return the system directory where job-specific files are to be placed.]]>
<method name="getRootQueues" return="org.apache.hadoop.mapred.JobQueueInfo[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type=""/>
<![CDATA[Returns an array of queue information objects about root level queues
@return the array of root level JobQueueInfo objects
@throws IOException]]>
<method name="getChildQueues" return="org.apache.hadoop.mapred.JobQueueInfo[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="queueName" type="java.lang.String"/>
<exception name="IOException" type=""/>
<![CDATA[Returns an array of queue information objects about immediate children
of queue queueName.
@param queueName
@return the array of immediate children JobQueueInfo objects
@throws IOException]]>
<method name="getQueues" return="org.apache.hadoop.mapred.JobQueueInfo[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type=""/>
<![CDATA[Return an array of queue information objects about all the Job Queues
@return Array of JobQueueInfo objects
@throws IOException]]>
<method name="getJobsFromQueue" return="org.apache.hadoop.mapred.JobStatus[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="queueName" type="java.lang.String"/>
<exception name="IOException" type=""/>
<![CDATA[Gets all the jobs which were added to particular Job Queue
@param queueName name of the Job Queue
@return Array of jobs present in the job queue
@throws IOException]]>
<method name="getQueueInfo" return="org.apache.hadoop.mapred.JobQueueInfo"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="queueName" type="java.lang.String"/>
<exception name="IOException" type=""/>
<![CDATA[Gets the queue information associated to a particular Job Queue
@param queueName name of the job queue.
@return Queue information associated to particular queue.
@throws IOException]]>
<method name="getQueueAclsForCurrentUser" return="org.apache.hadoop.mapred.QueueAclsInfo[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type=""/>
<![CDATA[Gets the Queue ACLs for current user
@return array of QueueAclsInfo object for current user.
@throws IOException]]>
<method name="getDelegationToken" return=""
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="renewer" type=""/>
<exception name="IOException" type=""/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<![CDATA[Get a delegation token for the user from the JobTracker.
@param renewer the user who can renew the token
@return the new token
@throws IOException]]>
<method name="renewDelegationToken" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="token" type=""/>
<exception name="SecretManager.InvalidToken" type=""/>
<exception name="IOException" type=""/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<![CDATA[Renew a delegation token
@param token the token to renew
@return true if the renewal went well
@throws InvalidToken
@throws IOException]]>
<method name="cancelDelegationToken"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="token" type=""/>
<exception name="SecretManager.InvalidToken" type=""/>
<exception name="IOException" type=""/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<![CDATA[Cancel a delegation token from the JobTracker
@param token the token to cancel
@throws IOException]]>
<method name="main"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="argv" type="java.lang.String[]"/>
<exception name="Exception" type="java.lang.Exception"/>
<![CDATA[<code>JobClient</code> is the primary interface for the user-job to interact
with the {@link JobTracker}.
<code>JobClient</code> provides facilities to submit jobs, track their
progress, access component-tasks' reports/logs, get the Map-Reduce cluster
status information etc.
<p>The job submission process involves:
Checking the input and output specifications of the job.
Computing the {@link InputSplit}s for the job.
Setup the requisite accounting information for the {@link DistributedCache}
of the job, if necessary.
Copying the job's jar and configuration to the map-reduce system directory
on the distributed file-system.
Submitting the job to the <code>JobTracker</code> and optionally monitoring
it's status.
Normally the user creates the application, describes various facets of the
job via {@link JobConf} and then uses the <code>JobClient</code> to submit
the job and monitor its progress.
<p>Here is an example on how to use <code>JobClient</code>:</p>
// Create a new JobConf
JobConf job = new JobConf(new Configuration(), MyJob.class);
// Specify various job-specific parameters
job.setInputPath(new Path("in"));
job.setOutputPath(new Path("out"));
// Submit the job, then poll for progress until the job is complete
<h4 id="JobControl">Job Control</h4>
<p>At times clients would chain map-reduce jobs to accomplish complex tasks
which cannot be done via a single map-reduce job. This is fairly easy since
the output of the job, typically, goes to distributed file-system and that
can be used as the input for the next job.</p>
<p>However, this also means that the onus on ensuring jobs are complete
(success/failure) lies squarely on the clients. In such situations the
various job-control options are:
{@link #runJob(JobConf)} : submits the job and returns only after
the job has completed.
{@link #submitJob(JobConf)} : only submits the job, then poll the
returned handle to the {@link RunningJob} to query status and make
scheduling decisions.
{@link JobConf#setJobEndNotificationURI(String)} : setup a notification
on job-completion, thus avoiding polling.
@see JobConf
@see ClusterStatus
@see Tool
@see DistributedCache
@deprecated Use {@link Job} and {@link Cluster} instead]]>
<!-- end class org.apache.hadoop.mapred.JobClient -->
<!-- start class org.apache.hadoop.mapred.JobClient.TaskStatusFilter -->
<class name="JobClient.TaskStatusFilter" extends="java.lang.Enum"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<method name="values" return="org.apache.hadoop.mapred.JobClient.TaskStatusFilter[]"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<method name="valueOf" return="org.apache.hadoop.mapred.JobClient.TaskStatusFilter"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="name" type="java.lang.String"/>
<!-- end class org.apache.hadoop.mapred.JobClient.TaskStatusFilter -->
<!-- start class org.apache.hadoop.mapred.JobConf -->
<class name="JobConf" extends="org.apache.hadoop.conf.Configuration"
static="false" final="false" visibility="public"
deprecated="Use {@link Configuration} instead">
<constructor name="JobConf"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Construct a map/reduce job configuration.]]>
<constructor name="JobConf" type="java.lang.Class"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Construct a map/reduce job configuration.
@param exampleClass a class whose containing jar is used as the job's jar.]]>
<constructor name="JobConf" type="org.apache.hadoop.conf.Configuration"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Construct a map/reduce job configuration.
@param conf a Configuration whose settings will be inherited.]]>
<constructor name="JobConf" type="org.apache.hadoop.conf.Configuration, java.lang.Class"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Construct a map/reduce job configuration.
@param conf a Configuration whose settings will be inherited.
@param exampleClass a class whose containing jar is used as the job's jar.]]>
<constructor name="JobConf" type="java.lang.String"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Construct a map/reduce configuration.
@param config a Configuration-format XML job description file.]]>
<constructor name="JobConf" type="org.apache.hadoop.fs.Path"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Construct a map/reduce configuration.
@param config a Configuration-format XML job description file.]]>
<constructor name="JobConf" type="boolean"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[A new map/reduce configuration where the behavior of reading from the
default resources can be turned off.
If the parameter {@code loadDefaults} is false, the new instance
will not load resources from the default files.
@param loadDefaults specifies whether to load from the default files]]>
<method name="getCredentials" return=""
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Get credentials for the job.
@return credentials for the job]]>
<method name="getJar" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Get the user jar for the map-reduce job.
@return the user jar for the map-reduce job.]]>
<method name="setJar"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jar" type="java.lang.String"/>
<![CDATA[Set the user jar for the map-reduce job.
@param jar the user jar for the map-reduce job.]]>
<method name="getJarUnpackPattern" return="java.util.regex.Pattern"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Get the pattern for jar contents to unpack on the tasktracker]]>
<method name="setJarByClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="cls" type="java.lang.Class"/>
<![CDATA[Set the job's jar file by finding an example class location.
@param cls the example class.]]>
<method name="getLocalDirs" return="java.lang.String[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type=""/>
<method name="deleteLocalFiles"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type=""/>
<![CDATA[Use MRAsyncDiskService.moveAndDeleteAllVolumes instead.
@see org.apache.hadoop.mapreduce.util.MRAsyncDiskService#cleanupAllVolumes()]]>
<method name="deleteLocalFiles"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="subdir" type="java.lang.String"/>
<exception name="IOException" type=""/>
<method name="getLocalPath" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="pathString" type="java.lang.String"/>
<exception name="IOException" type=""/>
<![CDATA[Constructs a local file name. Files are distributed among configured
local directories.]]>
<method name="getUser" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Get the reported username for this job.
@return the username]]>
<method name="setUser"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="user" type="java.lang.String"/>
<![CDATA[Set the reported username for this job.
@param user the username for this job.]]>
<method name="setKeepFailedTaskFiles"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="keep" type="boolean"/>
<![CDATA[Set whether the framework should keep the intermediate files for
failed tasks.
@param keep <code>true</code> if framework should keep the intermediate files
for failed tasks, <code>false</code> otherwise.]]>
<method name="getKeepFailedTaskFiles" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Should the temporary files for failed tasks be kept?
@return should the files be kept?]]>
<method name="setKeepTaskFilesPattern"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="pattern" type="java.lang.String"/>
<![CDATA[Set a regular expression for task names that should be kept.
The regular expression ".*_m_000123_0" would keep the files
for the first instance of map 123 that ran.
@param pattern the java.util.regex.Pattern to match against the
task names.]]>
<method name="getKeepTaskFilesPattern" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Get the regular expression that is matched against the task names
to see if we need to keep the files.
@return the pattern as a string, if it was set, othewise null.]]>
<method name="setWorkingDirectory"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="dir" type="org.apache.hadoop.fs.Path"/>
<![CDATA[Set the current working directory for the default file system.
@param dir the new current working directory.]]>
<method name="getWorkingDirectory" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Get the current working directory for the default file system.
@return the directory name.]]>
<method name="setNumTasksToExecutePerJvm"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="numTasks" type="int"/>
<![CDATA[Sets the number of tasks that a spawned task JVM should run
before it exits
@param numTasks the number of tasks to execute; defaults to 1;
-1 signifies no limit]]>
<method name="getNumTasksToExecutePerJvm" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Get the number of tasks that a spawned JVM should execute]]>
<method name="getInputFormat" return="org.apache.hadoop.mapred.InputFormat"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Get the {@link InputFormat} implementation for the map-reduce job,
defaults to {@link TextInputFormat} if not specified explicity.
@return the {@link InputFormat} implementation for the map-reduce job.]]>
<method name="setInputFormat"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="theClass" type="java.lang.Class"/>
<![CDATA[Set the {@link InputFormat} implementation for the map-reduce job.
@param theClass the {@link InputFormat} implementation for the map-reduce
<method name="getOutputFormat" return="org.apache.hadoop.mapred.OutputFormat"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Get the {@link OutputFormat} implementation for the map-reduce job,
defaults to {@link TextOutputFormat} if not specified explicity.
@return the {@link OutputFormat} implementation for the map-reduce job.]]>
<method name="getOutputCommitter" return="org.apache.hadoop.mapred.OutputCommitter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Get the {@link OutputCommitter} implementation for the map-reduce job,
defaults to {@link FileOutputCommitter} if not specified explicitly.
@return the {@link OutputCommitter} implementation for the map-reduce job.]]>
<method name="setOutputCommitter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="theClass" type="java.lang.Class"/>
<![CDATA[Set the {@link OutputCommitter} implementation for the map-reduce job.
@param theClass the {@link OutputCommitter} implementation for the map-reduce
<method name="setOutputFormat"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="theClass" type="java.lang.Class"/>
<![CDATA[Set the {@link OutputFormat} implementation for the map-reduce job.
@param theClass the {@link OutputFormat} implementation for the map-reduce
<method name="setCompressMapOutput"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="compress" type="boolean"/>
<![CDATA[Should the map outputs be compressed before transfer?
Uses the SequenceFile compression.
@param compress should the map outputs be compressed?]]>
<method name="getCompressMapOutput" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Are the outputs of the maps be compressed?
@return <code>true</code> if the outputs of the maps are to be compressed,
<code>false</code> otherwise.]]>
<method name="setMapOutputCompressorClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="codecClass" type="java.lang.Class"/>
<![CDATA[Set the given class as the {@link CompressionCodec} for the map outputs.
@param codecClass the {@link CompressionCodec} class that will compress
the map outputs.]]>
<method name="getMapOutputCompressorClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="defaultValue" type="java.lang.Class"/>
<![CDATA[Get the {@link CompressionCodec} for compressing the map outputs.
@param defaultValue the {@link CompressionCodec} to return if not set
@return the {@link CompressionCodec} class that should be used to compress the
map outputs.
@throws IllegalArgumentException if the class was specified, but not found]]>
<method name="getMapOutputKeyClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Get the key class for the map output data. If it is not set, use the
(final) output key class. This allows the map output key class to be
different than the final output key class.
@return the map output key class.]]>
<method name="setMapOutputKeyClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="theClass" type="java.lang.Class"/>
<![CDATA[Set the key class for the map output data. This allows the user to
specify the map output key class to be different than the final output
value class.
@param theClass the map output key class.]]>
<method name="getMapOutputValueClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Get the value class for the map output data. If it is not set, use the
(final) output value class This allows the map output value class to be
different than the final output value class.
@return the map output value class.]]>
<method name="setMapOutputValueClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="theClass" type="java.lang.Class"/>
<![CDATA[Set the value class for the map output data. This allows the user to
specify the map output value class to be different than the final output
value class.
@param theClass the map output value class.]]>
<method name="getOutputKeyClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<![CDATA[Get the key class for the job output data.
@return the key class for the job output data.]]>
<method name="setOutputKeyClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="theClass" type="java.lang.Class"/>
<![CDATA[Set the key class for the job output data.