blob: f4762d9870694c45f6ef647eec6b2af75c296482 [file] [log] [blame]
<?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
<!-- Generated by the JDiff Javadoc doclet -->
<!-- (http://www.jdiff.org) -->
<!-- on Fri Mar 30 00:32:28 UTC 2018 -->
<api
xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'
xsi:noNamespaceSchemaLocation='api.xsd'
name="Apache Hadoop MapReduce Core 3.1.0"
jdversion="1.0.9">
<!-- Command line arguments = -doclet org.apache.hadoop.classification.tools.IncludePublicAnnotationsJDiffDoclet -docletpath /build/source/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/target/hadoop-annotations.jar:/build/source/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/target/jdiff.jar -verbose -classpath /build/source/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/target/classes:/build/source/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/target/hadoop-yarn-client-3.1.0.jar:/maven/log4j/log4j/1.2.17/log4j-1.2.17.jar:/build/source/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/target/hadoop-yarn-api-3.1.0.jar:/build/source/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/target/hadoop-yarn-common-3.1.0.jar:/build/source/hadoop-common-project/hadoop-auth/target/hadoop-auth-3.1.0.jar:/maven/com/nimbusds/nimbus-jose-jwt/4.41.1/nimbus-jose-jwt-4.41.1.jar:/maven/com/github/stephenc/jcip/jcip-annotations/1.0-1/jcip-annotations-1.0-1.jar:/maven/net/minidev/json-smart/2.3/json-smart-2.3.jar:/maven/net/minidev/accessors-smart/1.2/accessors-smart-1.2.jar:/maven/org/ow2/asm/asm/5.0.4/asm-5.0.4.jar:/maven/org/apache/curator/curator-framework/2.12.0/curator-framework-2.12.0.jar:/maven/javax/xml/bind/jaxb-api/2.2.11/jaxb-api-2.2.11.jar:/maven/org/apache/commons/commons-compress/1.4.1/commons-compress-1.4.1.jar:/maven/org/tukaani/xz/1.0/xz-1.0.jar:/maven/javax/servlet/javax.servlet-api/3.1.0/javax.servlet-api-3.1.0.jar:/maven/org/eclipse/jetty/jetty-util/9.3.19.v20170502/jetty-util-9.3.19.v20170502.jar:/maven/com/sun/jersey/jersey-core/1.19/jersey-core-1.19.jar:/maven/javax/ws/rs/jsr311-api/1.1.1/jsr311-api-1.1.1.jar:/maven/com/sun/jersey/jersey-client/1.19/jersey-client-1.19.jar:/maven/commons-io/commons-io/2.5/commons-io-2.5.jar:/maven/com/google/inject/guice/4.0/guice-4.0.jar:/maven/javax/inject/javax.inject/1/javax.inject-1.jar:/maven/aopalliance/aopalliance/1.0/aopalliance-1.0.jar:/maven/com/sun/jersey/jersey-server/1.19/jersey-server-1.19.jar:/maven/com/sun/jersey/jersey-json/1.19/jersey-json-1.19.jar:/maven/org/codehaus/jettison/jettison/1.1/jettison-1.1.jar:/maven/com/sun/xml/bind/jaxb-impl/2.2.3-1/jaxb-impl-2.2.3-1.jar:/maven/org/codehaus/jackson/jackson-jaxrs/1.9.13/jackson-jaxrs-1.9.13.jar:/maven/org/codehaus/jackson/jackson-xc/1.9.13/jackson-xc-1.9.13.jar:/maven/com/sun/jersey/contribs/jersey-guice/1.19/jersey-guice-1.19.jar:/maven/com/fasterxml/jackson/core/jackson-core/2.7.8/jackson-core-2.7.8.jar:/maven/com/fasterxml/jackson/module/jackson-module-jaxb-annotations/2.7.8/jackson-module-jaxb-annotations-2.7.8.jar:/maven/com/fasterxml/jackson/jaxrs/jackson-jaxrs-json-provider/2.7.8/jackson-jaxrs-json-provider-2.7.8.jar:/maven/com/fasterxml/jackson/jaxrs/jackson-jaxrs-base/2.7.8/jackson-jaxrs-base-2.7.8.jar:/build/source/hadoop-hdfs-project/hadoop-hdfs-client/target/hadoop-hdfs-client-3.1.0.jar:/maven/com/squareup/okhttp/okhttp/2.7.5/okhttp-2.7.5.jar:/maven/com/squareup/okio/okio/1.6.0/okio-1.6.0.jar:/maven/com/fasterxml/jackson/core/jackson-annotations/2.7.8/jackson-annotations-2.7.8.jar:/maven/org/eclipse/jetty/jetty-server/9.3.19.v20170502/jetty-server-9.3.19.v20170502.jar:/maven/org/eclipse/jetty/jetty-http/9.3.19.v20170502/jetty-http-9.3.19.v20170502.jar:/maven/org/eclipse/jetty/jetty-io/9.3.19.v20170502/jetty-io-9.3.19.v20170502.jar:/maven/org/apache/htrace/htrace-core4/4.1.0-incubating/htrace-core4-4.1.0-incubating.jar:/maven/com/fasterxml/jackson/core/jackson-databind/2.7.8/jackson-databind-2.7.8.jar:/maven/com/google/protobuf/protobuf-java/2.5.0/protobuf-java-2.5.0.jar:/maven/org/apache/avro/avro/1.7.7/avro-1.7.7.jar:/maven/org/codehaus/jackson/jackson-core-asl/1.9.13/jackson-core-asl-1.9.13.jar:/maven/org/codehaus/jackson/jackson-mapper-asl/1.9.13/jackson-mapper-asl-1.9.13.jar:/maven/com/thoughtworks/paranamer/paranamer/2.3/paranamer-2.3.jar:/maven/org/xerial/snappy/snappy-java/1.0.5/snappy-java-1.0.5.jar:/build/source/hadoop-common-project/hadoop-common/target/hadoop-common-3.1.0.jar:/maven/org/apache/commons/commons-math3/3.1.1/commons-math3-3.1.1.jar:/maven/org/apache/httpcomponents/httpclient/4.5.2/httpclient-4.5.2.jar:/maven/org/apache/httpcomponents/httpcore/4.4.4/httpcore-4.4.4.jar:/maven/commons-net/commons-net/3.6/commons-net-3.6.jar:/maven/org/eclipse/jetty/jetty-servlet/9.3.19.v20170502/jetty-servlet-9.3.19.v20170502.jar:/maven/org/eclipse/jetty/jetty-security/9.3.19.v20170502/jetty-security-9.3.19.v20170502.jar:/maven/org/eclipse/jetty/jetty-webapp/9.3.19.v20170502/jetty-webapp-9.3.19.v20170502.jar:/maven/org/eclipse/jetty/jetty-xml/9.3.19.v20170502/jetty-xml-9.3.19.v20170502.jar:/maven/javax/servlet/jsp/jsp-api/2.1/jsp-api-2.1.jar:/maven/com/sun/jersey/jersey-servlet/1.19/jersey-servlet-1.19.jar:/maven/commons-beanutils/commons-beanutils/1.9.3/commons-beanutils-1.9.3.jar:/maven/org/apache/commons/commons-configuration2/2.1.1/commons-configuration2-2.1.1.jar:/maven/org/apache/commons/commons-lang3/3.4/commons-lang3-3.4.jar:/maven/com/google/re2j/re2j/1.1/re2j-1.1.jar:/maven/com/google/code/gson/gson/2.2.4/gson-2.2.4.jar:/maven/com/jcraft/jsch/0.1.54/jsch-0.1.54.jar:/maven/org/apache/curator/curator-client/2.12.0/curator-client-2.12.0.jar:/maven/org/apache/curator/curator-recipes/2.12.0/curator-recipes-2.12.0.jar:/maven/com/google/code/findbugs/jsr305/3.0.0/jsr305-3.0.0.jar:/maven/org/apache/zookeeper/zookeeper/3.4.9/zookeeper-3.4.9.jar:/maven/org/apache/kerby/kerb-simplekdc/1.0.1/kerb-simplekdc-1.0.1.jar:/maven/org/apache/kerby/kerb-client/1.0.1/kerb-client-1.0.1.jar:/maven/org/apache/kerby/kerby-config/1.0.1/kerby-config-1.0.1.jar:/maven/org/apache/kerby/kerb-core/1.0.1/kerb-core-1.0.1.jar:/maven/org/apache/kerby/kerby-pkix/1.0.1/kerby-pkix-1.0.1.jar:/maven/org/apache/kerby/kerby-asn1/1.0.1/kerby-asn1-1.0.1.jar:/maven/org/apache/kerby/kerby-util/1.0.1/kerby-util-1.0.1.jar:/maven/org/apache/kerby/kerb-common/1.0.1/kerb-common-1.0.1.jar:/maven/org/apache/kerby/kerb-crypto/1.0.1/kerb-crypto-1.0.1.jar:/maven/org/apache/kerby/kerb-util/1.0.1/kerb-util-1.0.1.jar:/maven/org/apache/kerby/token-provider/1.0.1/token-provider-1.0.1.jar:/maven/org/apache/kerby/kerb-admin/1.0.1/kerb-admin-1.0.1.jar:/maven/org/apache/kerby/kerb-server/1.0.1/kerb-server-1.0.1.jar:/maven/org/apache/kerby/kerb-identity/1.0.1/kerb-identity-1.0.1.jar:/maven/org/apache/kerby/kerby-xdr/1.0.1/kerby-xdr-1.0.1.jar:/maven/org/codehaus/woodstox/stax2-api/3.1.4/stax2-api-3.1.4.jar:/maven/com/fasterxml/woodstox/woodstox-core/5.0.3/woodstox-core-5.0.3.jar:/maven/org/slf4j/slf4j-api/1.7.25/slf4j-api-1.7.25.jar:/maven/org/slf4j/slf4j-log4j12/1.7.25/slf4j-log4j12-1.7.25.jar:/build/source/hadoop-common-project/hadoop-annotations/target/hadoop-annotations-3.1.0.jar:/usr/lib/jvm/java-8-openjdk-amd64/lib/tools.jar:/maven/com/google/inject/extensions/guice-servlet/4.0/guice-servlet-4.0.jar:/maven/io/netty/netty/3.10.5.Final/netty-3.10.5.Final.jar:/maven/commons-logging/commons-logging/1.1.3/commons-logging-1.1.3.jar:/maven/com/google/guava/guava/11.0.2/guava-11.0.2.jar:/maven/commons-codec/commons-codec/1.11/commons-codec-1.11.jar:/maven/commons-cli/commons-cli/1.2/commons-cli-1.2.jar:/maven/commons-lang/commons-lang/2.6/commons-lang-2.6.jar:/maven/commons-collections/commons-collections/3.2.2/commons-collections-3.2.2.jar:/maven/xerces/xercesImpl/2.11.0/xercesImpl-2.11.0.jar:/maven/xml-apis/xml-apis/1.4.01/xml-apis-1.4.01.jar -sourcepath /build/source/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java -doclet org.apache.hadoop.classification.tools.IncludePublicAnnotationsJDiffDoclet -docletpath /build/source/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/target/hadoop-annotations.jar:/build/source/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/target/jdiff.jar -apidir /build/source/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/target/site/jdiff/xml -apiname Apache Hadoop MapReduce Core 3.1.0 -->
<package name="org.apache.hadoop.filecache">
<!-- start class org.apache.hadoop.filecache.DistributedCache -->
<class name="DistributedCache" extends="org.apache.hadoop.mapreduce.filecache.DistributedCache"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="DistributedCache"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="addLocalArchives"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<param name="str" type="java.lang.String"/>
<doc>
<![CDATA[Add a archive that has been localized to the conf. Used
by internal DistributedCache code.
@param conf The conf to modify to contain the localized caches
@param str a comma separated list of local archives]]>
</doc>
</method>
<method name="addLocalFiles"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<param name="str" type="java.lang.String"/>
<doc>
<![CDATA[Add a file that has been localized to the conf.. Used
by internal DistributedCache code.
@param conf The conf to modify to contain the localized caches
@param str a comma separated list of local files]]>
</doc>
</method>
<method name="createAllSymlink"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="Internal to MapReduce framework. Use DistributedCacheManager
instead.">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<param name="jobCacheDir" type="java.io.File"/>
<param name="workDir" type="java.io.File"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[This method create symlinks for all files in a given dir in another
directory. Currently symlinks cannot be disabled. This is a NO-OP.
@param conf the configuration
@param jobCacheDir the target directory for creating symlinks
@param workDir the directory in which the symlinks are created
@throws IOException
@deprecated Internal to MapReduce framework. Use DistributedCacheManager
instead.]]>
</doc>
</method>
<method name="getFileStatus" return="org.apache.hadoop.fs.FileStatus"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<param name="cache" type="java.net.URI"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Returns {@link FileStatus} of a given cache file on hdfs. Internal to
MapReduce.
@param conf configuration
@param cache cache file
@return <code>FileStatus</code> of a given cache file on hdfs
@throws IOException]]>
</doc>
</method>
<method name="getTimestamp" return="long"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<param name="cache" type="java.net.URI"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Returns mtime of a given cache file on hdfs. Internal to MapReduce.
@param conf configuration
@param cache cache file
@return mtime of a given cache file on hdfs
@throws IOException]]>
</doc>
</method>
<method name="setArchiveTimestamps"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<param name="timestamps" type="java.lang.String"/>
<doc>
<![CDATA[This is to check the timestamp of the archives to be localized.
Used by internal MapReduce code.
@param conf Configuration which stores the timestamp's
@param timestamps comma separated list of timestamps of archives.
The order should be the same as the order in which the archives are added.]]>
</doc>
</method>
<method name="setFileTimestamps"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<param name="timestamps" type="java.lang.String"/>
<doc>
<![CDATA[This is to check the timestamp of the files to be localized.
Used by internal MapReduce code.
@param conf Configuration which stores the timestamp's
@param timestamps comma separated list of timestamps of files.
The order should be the same as the order in which the files are added.]]>
</doc>
</method>
<method name="setLocalArchives"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<param name="str" type="java.lang.String"/>
<doc>
<![CDATA[Set the conf to contain the location for localized archives. Used
by internal DistributedCache code.
@param conf The conf to modify to contain the localized caches
@param str a comma separated list of local archives]]>
</doc>
</method>
<method name="setLocalFiles"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<param name="str" type="java.lang.String"/>
<doc>
<![CDATA[Set the conf to contain the location for localized files. Used
by internal DistributedCache code.
@param conf The conf to modify to contain the localized caches
@param str a comma separated list of local files]]>
</doc>
</method>
<field name="CACHE_FILES_SIZES" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Warning: {@link #CACHE_FILES_SIZES} is not a *public* constant.
The variable is kept for M/R 1.x applications, M/R 2.x applications should
use {@link MRJobConfig#CACHE_FILES_SIZES}]]>
</doc>
</field>
<field name="CACHE_ARCHIVES_SIZES" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Warning: {@link #CACHE_ARCHIVES_SIZES} is not a *public* constant.
The variable is kept for M/R 1.x applications, M/R 2.x applications should
use {@link MRJobConfig#CACHE_ARCHIVES_SIZES}]]>
</doc>
</field>
<field name="CACHE_ARCHIVES_TIMESTAMPS" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Warning: {@link #CACHE_ARCHIVES_TIMESTAMPS} is not a *public* constant.
The variable is kept for M/R 1.x applications, M/R 2.x applications should
use {@link MRJobConfig#CACHE_ARCHIVES_TIMESTAMPS}]]>
</doc>
</field>
<field name="CACHE_FILES_TIMESTAMPS" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Warning: {@link #CACHE_FILES_TIMESTAMPS} is not a *public* constant.
The variable is kept for M/R 1.x applications, M/R 2.x applications should
use {@link MRJobConfig#CACHE_FILE_TIMESTAMPS}]]>
</doc>
</field>
<field name="CACHE_ARCHIVES" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Warning: {@link #CACHE_ARCHIVES} is not a *public* constant.
The variable is kept for M/R 1.x applications, M/R 2.x applications should
use {@link MRJobConfig#CACHE_ARCHIVES}]]>
</doc>
</field>
<field name="CACHE_FILES" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Warning: {@link #CACHE_FILES} is not a *public* constant.
The variable is kept for M/R 1.x applications, M/R 2.x applications should
use {@link MRJobConfig#CACHE_FILES}]]>
</doc>
</field>
<field name="CACHE_LOCALARCHIVES" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Warning: {@link #CACHE_LOCALARCHIVES} is not a *public* constant.
The variable is kept for M/R 1.x applications, M/R 2.x applications should
use {@link MRJobConfig#CACHE_LOCALARCHIVES}]]>
</doc>
</field>
<field name="CACHE_LOCALFILES" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Warning: {@link #CACHE_LOCALFILES} is not a *public* constant.
The variable is kept for M/R 1.x applications, M/R 2.x applications should
use {@link MRJobConfig#CACHE_LOCALFILES}]]>
</doc>
</field>
<field name="CACHE_SYMLINK" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Warning: {@link #CACHE_SYMLINK} is not a *public* constant.
The variable is kept for M/R 1.x applications, M/R 2.x applications should
use {@link MRJobConfig#CACHE_SYMLINK}]]>
</doc>
</field>
<doc>
<![CDATA[Distribute application-specific large, read-only files efficiently.
<p><code>DistributedCache</code> is a facility provided by the Map-Reduce
framework to cache files (text, archives, jars etc.) needed by applications.
</p>
<p>Applications specify the files, via urls (hdfs:// or http://) to be cached
via the {@link org.apache.hadoop.mapred.JobConf}. The
<code>DistributedCache</code> assumes that the files specified via urls are
already present on the {@link FileSystem} at the path specified by the url
and are accessible by every machine in the cluster.</p>
<p>The framework will copy the necessary files on to the worker node before
any tasks for the job are executed on that node. Its efficiency stems from
the fact that the files are only copied once per job and the ability to
cache archives which are un-archived on the workers.</p>
<p><code>DistributedCache</code> can be used to distribute simple, read-only
data/text files and/or more complex types such as archives, jars etc.
Archives (zip, tar and tgz/tar.gz files) are un-archived at the worker nodes.
Jars may be optionally added to the classpath of the tasks, a rudimentary
software distribution mechanism. Files have execution permissions.
In older version of Hadoop Map/Reduce users could optionally ask for symlinks
to be created in the working directory of the child task. In the current
version symlinks are always created. If the URL does not have a fragment
the name of the file or directory will be used. If multiple files or
directories map to the same link name, the last one added, will be used. All
others will not even be downloaded.</p>
<p><code>DistributedCache</code> tracks modification timestamps of the cache
files. Clearly the cache files should not be modified by the application
or externally while the job is executing.</p>
<p>Here is an illustrative example on how to use the
<code>DistributedCache</code>:</p>
<p><blockquote><pre>
// Setting up the cache for the application
1. Copy the requisite files to the <code>FileSystem</code>:
$ bin/hadoop fs -copyFromLocal lookup.dat /myapp/lookup.dat
$ bin/hadoop fs -copyFromLocal map.zip /myapp/map.zip
$ bin/hadoop fs -copyFromLocal mylib.jar /myapp/mylib.jar
$ bin/hadoop fs -copyFromLocal mytar.tar /myapp/mytar.tar
$ bin/hadoop fs -copyFromLocal mytgz.tgz /myapp/mytgz.tgz
$ bin/hadoop fs -copyFromLocal mytargz.tar.gz /myapp/mytargz.tar.gz
2. Setup the application's <code>JobConf</code>:
JobConf job = new JobConf();
DistributedCache.addCacheFile(new URI("/myapp/lookup.dat#lookup.dat"),
job);
DistributedCache.addCacheArchive(new URI("/myapp/map.zip"), job);
DistributedCache.addFileToClassPath(new Path("/myapp/mylib.jar"), job);
DistributedCache.addCacheArchive(new URI("/myapp/mytar.tar"), job);
DistributedCache.addCacheArchive(new URI("/myapp/mytgz.tgz"), job);
DistributedCache.addCacheArchive(new URI("/myapp/mytargz.tar.gz"), job);
3. Use the cached files in the {@link org.apache.hadoop.mapred.Mapper}
or {@link org.apache.hadoop.mapred.Reducer}:
public static class MapClass extends MapReduceBase
implements Mapper&lt;K, V, K, V&gt; {
private Path[] localArchives;
private Path[] localFiles;
public void configure(JobConf job) {
// Get the cached archives/files
File f = new File("./map.zip/some/file/in/zip.txt");
}
public void map(K key, V value,
OutputCollector&lt;K, V&gt; output, Reporter reporter)
throws IOException {
// Use data from the cached archives/files here
// ...
// ...
output.collect(k, v);
}
}
</pre></blockquote>
It is also very common to use the DistributedCache by using
{@link org.apache.hadoop.util.GenericOptionsParser}.
This class includes methods that should be used by users
(specifically those mentioned in the example above, as well
as {@link DistributedCache#addArchiveToClassPath(Path, Configuration)}),
as well as methods intended for use by the MapReduce framework
(e.g., {@link org.apache.hadoop.mapred.JobClient}).
@see org.apache.hadoop.mapred.JobConf
@see org.apache.hadoop.mapred.JobClient
@see org.apache.hadoop.mapreduce.Job]]>
</doc>
</class>
<!-- end class org.apache.hadoop.filecache.DistributedCache -->
</package>
<package name="org.apache.hadoop.mapred">
<!-- start class org.apache.hadoop.mapred.ClusterStatus -->
<class name="ClusterStatus" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.io.Writable"/>
<method name="getTaskTrackers" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the number of task trackers in the cluster.
@return the number of task trackers in the cluster.]]>
</doc>
</method>
<method name="getActiveTrackerNames" return="java.util.Collection"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the names of task trackers in the cluster.
@return the active task trackers in the cluster.]]>
</doc>
</method>
<method name="getBlacklistedTrackerNames" return="java.util.Collection"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the names of task trackers in the cluster.
@return the blacklisted task trackers in the cluster.]]>
</doc>
</method>
<method name="getGraylistedTrackerNames" return="java.util.Collection"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the names of graylisted task trackers in the cluster.
The gray list of trackers is no longer available on M/R 2.x. The function
is kept to be compatible with M/R 1.x applications.
@return an empty graylisted task trackers in the cluster.]]>
</doc>
</method>
<method name="getGraylistedTrackers" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the number of graylisted task trackers in the cluster.
The gray list of trackers is no longer available on M/R 2.x. The function
is kept to be compatible with M/R 1.x applications.
@return 0 graylisted task trackers in the cluster.]]>
</doc>
</method>
<method name="getBlacklistedTrackers" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the number of blacklisted task trackers in the cluster.
@return the number of blacklisted task trackers in the cluster.]]>
</doc>
</method>
<method name="getNumExcludedNodes" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the number of excluded hosts in the cluster.
@return the number of excluded hosts in the cluster.]]>
</doc>
</method>
<method name="getTTExpiryInterval" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the tasktracker expiry interval for the cluster
@return the expiry interval in msec]]>
</doc>
</method>
<method name="getMapTasks" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the number of currently running map tasks in the cluster.
@return the number of currently running map tasks in the cluster.]]>
</doc>
</method>
<method name="getReduceTasks" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the number of currently running reduce tasks in the cluster.
@return the number of currently running reduce tasks in the cluster.]]>
</doc>
</method>
<method name="getMaxMapTasks" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the maximum capacity for running map tasks in the cluster.
@return the maximum capacity for running map tasks in the cluster.]]>
</doc>
</method>
<method name="getMaxReduceTasks" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the maximum capacity for running reduce tasks in the cluster.
@return the maximum capacity for running reduce tasks in the cluster.]]>
</doc>
</method>
<method name="getJobTrackerStatus" return="org.apache.hadoop.mapreduce.Cluster.JobTrackerStatus"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the JobTracker's status.
@return {@link JobTrackerStatus} of the JobTracker]]>
</doc>
</method>
<method name="getMaxMemory" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns UNINITIALIZED_MEMORY_VALUE (-1)]]>
</doc>
</method>
<method name="getUsedMemory" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns UNINITIALIZED_MEMORY_VALUE (-1)]]>
</doc>
</method>
<method name="getBlackListedTrackersInfo" return="java.util.Collection"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Gets the list of blacklisted trackers along with reasons for blacklisting.
@return the collection of {@link BlackListInfo} objects.]]>
</doc>
</method>
<method name="getJobTrackerState" return="org.apache.hadoop.mapred.JobTracker.State"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the current state of the <code>JobTracker</code>,
as {@link JobTracker.State}
{@link JobTracker.State} should no longer be used on M/R 2.x. The function
is kept to be compatible with M/R 1.x applications.
@return the invalid state of the <code>JobTracker</code>.]]>
</doc>
</method>
<method name="write"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="out" type="java.io.DataOutput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="readFields"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type="java.io.DataInput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<field name="UNINITIALIZED_MEMORY_VALUE" type="long"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[Status information on the current state of the Map-Reduce cluster.
<p><code>ClusterStatus</code> provides clients with information such as:
<ol>
<li>
Size of the cluster.
</li>
<li>
Name of the trackers.
</li>
<li>
Task capacity of the cluster.
</li>
<li>
The number of currently running map and reduce tasks.
</li>
<li>
State of the <code>JobTracker</code>.
</li>
<li>
Details regarding black listed trackers.
</li>
</ol>
<p>Clients can query for the latest <code>ClusterStatus</code>, via
{@link JobClient#getClusterStatus()}.</p>
@see JobClient]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.ClusterStatus -->
<!-- start class org.apache.hadoop.mapred.Counters -->
<class name="Counters" extends="org.apache.hadoop.mapreduce.counters.AbstractCounters"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="Counters"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<constructor name="Counters" type="org.apache.hadoop.mapreduce.Counters"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getGroup" return="org.apache.hadoop.mapred.Counters.Group"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="groupName" type="java.lang.String"/>
</method>
<method name="getGroupNames" return="java.util.Collection"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="makeCompactString" return="java.lang.String"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="findCounter" return="org.apache.hadoop.mapred.Counters.Counter"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="group" type="java.lang.String"/>
<param name="name" type="java.lang.String"/>
</method>
<method name="findCounter" return="org.apache.hadoop.mapred.Counters.Counter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="use {@link #findCounter(String, String)} instead">
<param name="group" type="java.lang.String"/>
<param name="id" type="int"/>
<param name="name" type="java.lang.String"/>
<doc>
<![CDATA[Find a counter by using strings
@param group the name of the group
@param id the id of the counter within the group (0 to N-1)
@param name the internal name of the counter
@return the counter for that name
@deprecated use {@link #findCounter(String, String)} instead]]>
</doc>
</method>
<method name="incrCounter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Enum"/>
<param name="amount" type="long"/>
<doc>
<![CDATA[Increments the specified counter by the specified amount, creating it if
it didn't already exist.
@param key identifies a counter
@param amount amount by which counter is to be incremented]]>
</doc>
</method>
<method name="incrCounter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="group" type="java.lang.String"/>
<param name="counter" type="java.lang.String"/>
<param name="amount" type="long"/>
<doc>
<![CDATA[Increments the specified counter by the specified amount, creating it if
it didn't already exist.
@param group the name of the group
@param counter the internal name of the counter
@param amount amount by which counter is to be incremented]]>
</doc>
</method>
<method name="getCounter" return="long"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Enum"/>
<doc>
<![CDATA[Returns current value of the specified counter, or 0 if the counter
does not exist.
@param key the counter enum to lookup
@return the counter value or 0 if counter not found]]>
</doc>
</method>
<method name="incrAllCounters"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="other" type="org.apache.hadoop.mapred.Counters"/>
<doc>
<![CDATA[Increments multiple counters by their amounts in another Counters
instance.
@param other the other Counters instance]]>
</doc>
</method>
<method name="size" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="use {@link #countCounters()} instead">
<doc>
<![CDATA[@return the total number of counters
@deprecated use {@link #countCounters()} instead]]>
</doc>
</method>
<method name="sum" return="org.apache.hadoop.mapred.Counters"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="a" type="org.apache.hadoop.mapred.Counters"/>
<param name="b" type="org.apache.hadoop.mapred.Counters"/>
<doc>
<![CDATA[Convenience method for computing the sum of two sets of counters.
@param a the first counters
@param b the second counters
@return a new summed counters object]]>
</doc>
</method>
<method name="log"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="log" type="org.slf4j.Logger"/>
<doc>
<![CDATA[Logs the current counter values.
@param log The log to use.]]>
</doc>
</method>
<method name="makeEscapedCompactString" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Represent the counter in a textual format that can be converted back to
its object form
@return the string in the following format
{(groupName)(group-displayName)[(counterName)(displayName)(value)][]*}*]]>
</doc>
</method>
<method name="fromEscapedCompactString" return="org.apache.hadoop.mapred.Counters"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="compactString" type="java.lang.String"/>
<exception name="ParseException" type="java.text.ParseException"/>
<doc>
<![CDATA[Convert a stringified (by {@link #makeEscapedCompactString()} counter
representation into a counter object.
@param compactString to parse
@return a new counters object
@throws ParseException]]>
</doc>
</method>
<field name="MAX_COUNTER_LIMIT" type="int"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="MAX_GROUP_LIMIT" type="int"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[A set of named counters.
<p><code>Counters</code> represent global counters, defined either by the
Map-Reduce framework or applications. Each <code>Counter</code> can be of
any {@link Enum} type.</p>
<p><code>Counters</code> are bunched into {@link Group}s, each comprising of
counters from a particular <code>Enum</code> class.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.Counters -->
<!-- start class org.apache.hadoop.mapred.Counters.Counter -->
<class name="Counters.Counter" extends="java.lang.Object"
abstract="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapreduce.Counter"/>
<constructor name="Counter"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="setDisplayName"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="displayName" type="java.lang.String"/>
</method>
<method name="getName" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getDisplayName" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getValue" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="setValue"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="value" type="long"/>
</method>
<method name="increment"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="incr" type="long"/>
</method>
<method name="write"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="out" type="java.io.DataOutput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="readFields"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type="java.io.DataInput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="makeEscapedCompactString" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns the compact stringified version of the counter in the format
[(actual-name)(display-name)(value)]
@return the stringified result]]>
</doc>
</method>
<method name="contentEquals" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="deprecated, no comment">
<param name="counter" type="org.apache.hadoop.mapred.Counters.Counter"/>
<doc>
<![CDATA[Checks for (content) equality of two (basic) counters
@param counter to compare
@return true if content equals
@deprecated]]>
</doc>
</method>
<method name="getCounter" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the value of the counter]]>
</doc>
</method>
<method name="getUnderlyingCounter" return="org.apache.hadoop.mapreduce.Counter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="equals" return="boolean"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="genericRight" type="java.lang.Object"/>
</method>
<method name="hashCode" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<doc>
<![CDATA[A counter record, comprising its name and value.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.Counters.Counter -->
<!-- start class org.apache.hadoop.mapred.Counters.Group -->
<class name="Counters.Group" extends="java.lang.Object"
abstract="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapreduce.counters.CounterGroupBase"/>
<constructor name="Group"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</constructor>
<method name="getCounter" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="counterName" type="java.lang.String"/>
<doc>
<![CDATA[@param counterName the name of the counter
@return the value of the specified counter, or 0 if the counter does
not exist.]]>
</doc>
</method>
<method name="makeEscapedCompactString" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the compact stringified version of the group in the format
{(actual-name)(display-name)(value)[][][]} where [] are compact strings
for the counters within.]]>
</doc>
</method>
<method name="getCounter" return="org.apache.hadoop.mapred.Counters.Counter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="use {@link #findCounter(String)} instead">
<param name="id" type="int"/>
<param name="name" type="java.lang.String"/>
<doc>
<![CDATA[Get the counter for the given id and create it if it doesn't exist.
@param id the numeric id of the counter within the group
@param name the internal counter name
@return the counter
@deprecated use {@link #findCounter(String)} instead]]>
</doc>
</method>
<method name="getCounterForName" return="org.apache.hadoop.mapred.Counters.Counter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="name" type="java.lang.String"/>
<doc>
<![CDATA[Get the counter for the given name and create it if it doesn't exist.
@param name the internal counter name
@return the counter]]>
</doc>
</method>
<method name="write"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="out" type="java.io.DataOutput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="readFields"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type="java.io.DataInput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="iterator" return="java.util.Iterator"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getName" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getDisplayName" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="setDisplayName"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="displayName" type="java.lang.String"/>
</method>
<method name="addCounter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="counter" type="org.apache.hadoop.mapred.Counters.Counter"/>
</method>
<method name="addCounter" return="org.apache.hadoop.mapred.Counters.Counter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="name" type="java.lang.String"/>
<param name="displayName" type="java.lang.String"/>
<param name="value" type="long"/>
</method>
<method name="findCounter" return="org.apache.hadoop.mapred.Counters.Counter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="counterName" type="java.lang.String"/>
<param name="displayName" type="java.lang.String"/>
</method>
<method name="findCounter" return="org.apache.hadoop.mapred.Counters.Counter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="counterName" type="java.lang.String"/>
<param name="create" type="boolean"/>
</method>
<method name="findCounter" return="org.apache.hadoop.mapred.Counters.Counter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="counterName" type="java.lang.String"/>
</method>
<method name="size" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="incrAllCounters"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="rightGroup" type="org.apache.hadoop.mapreduce.counters.CounterGroupBase"/>
</method>
<method name="getUnderlyingGroup" return="org.apache.hadoop.mapreduce.counters.CounterGroupBase"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="equals" return="boolean"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="genericRight" type="java.lang.Object"/>
</method>
<method name="hashCode" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<doc>
<![CDATA[<code>Group</code> of counters, comprising of counters from a particular
counter {@link Enum} class.
<p><code>Group</code>handles localization of the class name and the
counter names.</p>]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.Counters.Group -->
<!-- start class org.apache.hadoop.mapred.FileAlreadyExistsException -->
<class name="FileAlreadyExistsException" extends="java.io.IOException"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="FileAlreadyExistsException"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<constructor name="FileAlreadyExistsException" type="java.lang.String"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<doc>
<![CDATA[Used when target file already exists for any operation and
is not configured to be overwritten.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.FileAlreadyExistsException -->
<!-- start class org.apache.hadoop.mapred.FileInputFormat -->
<class name="FileInputFormat" extends="java.lang.Object"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.InputFormat"/>
<constructor name="FileInputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="setMinSplitSize"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="minSplitSize" type="long"/>
</method>
<method name="isSplitable" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="fs" type="org.apache.hadoop.fs.FileSystem"/>
<param name="filename" type="org.apache.hadoop.fs.Path"/>
<doc>
<![CDATA[Is the given filename splittable? Usually, true, but if the file is
stream compressed, it will not be.
The default implementation in <code>FileInputFormat</code> always returns
true. Implementations that may deal with non-splittable files <i>must</i>
override this method.
<code>FileInputFormat</code> implementations can override this and return
<code>false</code> to ensure that individual input files are never split-up
so that {@link Mapper}s process entire files.
@param fs the file system that the file is on
@param filename the file name to check
@return is this file splitable?]]>
</doc>
</method>
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapred.InputSplit"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="setInputPathFilter"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="filter" type="java.lang.Class"/>
<doc>
<![CDATA[Set a PathFilter to be applied to the input paths for the map-reduce job.
@param filter the PathFilter class use for filtering the input paths.]]>
</doc>
</method>
<method name="getInputPathFilter" return="org.apache.hadoop.fs.PathFilter"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Get a PathFilter instance of the filter set for the input paths.
@return the PathFilter instance set for the job, NULL if none has been set.]]>
</doc>
</method>
<method name="addInputPathRecursively"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="result" type="java.util.List"/>
<param name="fs" type="org.apache.hadoop.fs.FileSystem"/>
<param name="path" type="org.apache.hadoop.fs.Path"/>
<param name="inputFilter" type="org.apache.hadoop.fs.PathFilter"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Add files in the input path recursively into the results.
@param result
The List to store all files.
@param fs
The FileSystem.
@param path
The input path.
@param inputFilter
The input filter that can be used to filter files/dirs.
@throws IOException]]>
</doc>
</method>
<method name="listStatus" return="org.apache.hadoop.fs.FileStatus[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[List input directories.
Subclasses may override to, e.g., select only files matching a regular
expression.
@param job the job to list input paths for
@return array of FileStatus objects
@throws IOException if zero items.]]>
</doc>
</method>
<method name="makeSplit" return="org.apache.hadoop.mapred.FileSplit"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="file" type="org.apache.hadoop.fs.Path"/>
<param name="start" type="long"/>
<param name="length" type="long"/>
<param name="hosts" type="java.lang.String[]"/>
<doc>
<![CDATA[A factory that makes the split for this class. It can be overridden
by sub-classes to make sub-types]]>
</doc>
</method>
<method name="makeSplit" return="org.apache.hadoop.mapred.FileSplit"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="file" type="org.apache.hadoop.fs.Path"/>
<param name="start" type="long"/>
<param name="length" type="long"/>
<param name="hosts" type="java.lang.String[]"/>
<param name="inMemoryHosts" type="java.lang.String[]"/>
<doc>
<![CDATA[A factory that makes the split for this class. It can be overridden
by sub-classes to make sub-types]]>
</doc>
</method>
<method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="numSplits" type="int"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Splits files returned by {@link #listStatus(JobConf)} when
they're too big.]]>
</doc>
</method>
<method name="computeSplitSize" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="goalSize" type="long"/>
<param name="minSize" type="long"/>
<param name="blockSize" type="long"/>
</method>
<method name="getBlockIndex" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="blkLocations" type="org.apache.hadoop.fs.BlockLocation[]"/>
<param name="offset" type="long"/>
</method>
<method name="setInputPaths"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="commaSeparatedPaths" type="java.lang.String"/>
<doc>
<![CDATA[Sets the given comma separated paths as the list of inputs
for the map-reduce job.
@param conf Configuration of the job
@param commaSeparatedPaths Comma separated paths to be set as
the list of inputs for the map-reduce job.]]>
</doc>
</method>
<method name="addInputPaths"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="commaSeparatedPaths" type="java.lang.String"/>
<doc>
<![CDATA[Add the given comma separated paths to the list of inputs for
the map-reduce job.
@param conf The configuration of the job
@param commaSeparatedPaths Comma separated paths to be added to
the list of inputs for the map-reduce job.]]>
</doc>
</method>
<method name="setInputPaths"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="inputPaths" type="org.apache.hadoop.fs.Path[]"/>
<doc>
<![CDATA[Set the array of {@link Path}s as the list of inputs
for the map-reduce job.
@param conf Configuration of the job.
@param inputPaths the {@link Path}s of the input directories/files
for the map-reduce job.]]>
</doc>
</method>
<method name="addInputPath"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="path" type="org.apache.hadoop.fs.Path"/>
<doc>
<![CDATA[Add a {@link Path} to the list of inputs for the map-reduce job.
@param conf The configuration of the job
@param path {@link Path} to be added to the list of inputs for
the map-reduce job.]]>
</doc>
</method>
<method name="getInputPaths" return="org.apache.hadoop.fs.Path[]"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Get the list of input {@link Path}s for the map-reduce job.
@param conf The configuration of the job
@return the list of input {@link Path}s for the map-reduce job.]]>
</doc>
</method>
<method name="getSplitHosts" return="java.lang.String[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="blkLocations" type="org.apache.hadoop.fs.BlockLocation[]"/>
<param name="offset" type="long"/>
<param name="splitSize" type="long"/>
<param name="clusterMap" type="org.apache.hadoop.net.NetworkTopology"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[This function identifies and returns the hosts that contribute
most for a given split. For calculating the contribution, rack
locality is treated on par with host locality, so hosts from racks
that contribute the most are preferred over hosts on racks that
contribute less
@param blkLocations The list of block locations
@param offset
@param splitSize
@return an array of hosts that contribute most to this split
@throws IOException]]>
</doc>
</method>
<field name="LOG" type="org.slf4j.Logger"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="NUM_INPUT_FILES" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="INPUT_DIR_RECURSIVE" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[A base class for file-based {@link InputFormat}.
<p><code>FileInputFormat</code> is the base class for all file-based
<code>InputFormat</code>s. This provides a generic implementation of
{@link #getSplits(JobConf, int)}.
Implementations of <code>FileInputFormat</code> can also override the
{@link #isSplitable(FileSystem, Path)} method to prevent input files
from being split-up in certain situations. Implementations that may
deal with non-splittable files <i>must</i> override this method, since
the default implementation assumes splitting is always possible.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.FileInputFormat -->
<!-- start class org.apache.hadoop.mapred.FileOutputCommitter -->
<class name="FileOutputCommitter" extends="org.apache.hadoop.mapred.OutputCommitter"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="FileOutputCommitter"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getWorkPath" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapred.TaskAttemptContext"/>
<param name="outputPath" type="org.apache.hadoop.fs.Path"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="setupJob"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapred.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="commitJob"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapred.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="cleanupJob"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapred.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="abortJob"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapred.JobContext"/>
<param name="runState" type="int"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="setupTask"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapred.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="commitTask"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapred.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="abortTask"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapred.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="needsTaskCommit" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapred.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="isRecoverySupported" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="isCommitJobRepeatable" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapred.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="isRecoverySupported" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapred.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="recoverTask"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapred.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<field name="LOG" type="org.slf4j.Logger"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="TEMP_DIR_NAME" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Temporary directory name]]>
</doc>
</field>
<field name="SUCCEEDED_FILE_NAME" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[An {@link OutputCommitter} that commits files specified
in job output directory i.e. ${mapreduce.output.fileoutputformat.outputdir}.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.FileOutputCommitter -->
<!-- start class org.apache.hadoop.mapred.FileOutputFormat -->
<class name="FileOutputFormat" extends="java.lang.Object"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.OutputFormat"/>
<constructor name="FileOutputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="setCompressOutput"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="compress" type="boolean"/>
<doc>
<![CDATA[Set whether the output of the job is compressed.
@param conf the {@link JobConf} to modify
@param compress should the output of the job be compressed?]]>
</doc>
</method>
<method name="getCompressOutput" return="boolean"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Is the job output compressed?
@param conf the {@link JobConf} to look in
@return <code>true</code> if the job output should be compressed,
<code>false</code> otherwise]]>
</doc>
</method>
<method name="setOutputCompressorClass"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="codecClass" type="java.lang.Class"/>
<doc>
<![CDATA[Set the {@link CompressionCodec} to be used to compress job outputs.
@param conf the {@link JobConf} to modify
@param codecClass the {@link CompressionCodec} to be used to
compress the job outputs]]>
</doc>
</method>
<method name="getOutputCompressorClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="defaultValue" type="java.lang.Class"/>
<doc>
<![CDATA[Get the {@link CompressionCodec} for compressing the job outputs.
@param conf the {@link JobConf} to look in
@param defaultValue the {@link CompressionCodec} to return if not set
@return the {@link CompressionCodec} to be used to compress the
job outputs
@throws IllegalArgumentException if the class was specified, but not found]]>
</doc>
</method>
<method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="name" type="java.lang.String"/>
<param name="progress" type="org.apache.hadoop.util.Progressable"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="checkOutputSpecs"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<exception name="FileAlreadyExistsException" type="org.apache.hadoop.mapred.FileAlreadyExistsException"/>
<exception name="InvalidJobConfException" type="org.apache.hadoop.mapred.InvalidJobConfException"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="setOutputPath"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="outputDir" type="org.apache.hadoop.fs.Path"/>
<doc>
<![CDATA[Set the {@link Path} of the output directory for the map-reduce job.
@param conf The configuration of the job.
@param outputDir the {@link Path} of the output directory for
the map-reduce job.]]>
</doc>
</method>
<method name="getOutputPath" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Get the {@link Path} to the output directory for the map-reduce job.
@return the {@link Path} to the output directory for the map-reduce job.
@see FileOutputFormat#getWorkOutputPath(JobConf)]]>
</doc>
</method>
<method name="getWorkOutputPath" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Get the {@link Path} to the task's temporary output directory
for the map-reduce job
<b id="SideEffectFiles">Tasks' Side-Effect Files</b>
<p><i>Note:</i> The following is valid only if the {@link OutputCommitter}
is {@link FileOutputCommitter}. If <code>OutputCommitter</code> is not
a <code>FileOutputCommitter</code>, the task's temporary output
directory is same as {@link #getOutputPath(JobConf)} i.e.
<tt>${mapreduce.output.fileoutputformat.outputdir}$</tt></p>
<p>Some applications need to create/write-to side-files, which differ from
the actual job-outputs.
<p>In such cases there could be issues with 2 instances of the same TIP
(running simultaneously e.g. speculative tasks) trying to open/write-to the
same file (path) on HDFS. Hence the application-writer will have to pick
unique names per task-attempt (e.g. using the attemptid, say
<tt>attempt_200709221812_0001_m_000000_0</tt>), not just per TIP.</p>
<p>To get around this the Map-Reduce framework helps the application-writer
out by maintaining a special
<tt>${mapreduce.output.fileoutputformat.outputdir}/_temporary/_${taskid}</tt>
sub-directory for each task-attempt on HDFS where the output of the
task-attempt goes. On successful completion of the task-attempt the files
in the <tt>${mapreduce.output.fileoutputformat.outputdir}/_temporary/_${taskid}</tt> (only)
are <i>promoted</i> to <tt>${mapreduce.output.fileoutputformat.outputdir}</tt>. Of course, the
framework discards the sub-directory of unsuccessful task-attempts. This
is completely transparent to the application.</p>
<p>The application-writer can take advantage of this by creating any
side-files required in <tt>${mapreduce.task.output.dir}</tt> during execution
of his reduce-task i.e. via {@link #getWorkOutputPath(JobConf)}, and the
framework will move them out similarly - thus she doesn't have to pick
unique paths per task-attempt.</p>
<p><i>Note</i>: the value of <tt>${mapreduce.task.output.dir}</tt> during
execution of a particular task-attempt is actually
<tt>${mapreduce.output.fileoutputformat.outputdir}/_temporary/_{$taskid}</tt>, and this value is
set by the map-reduce framework. So, just create any side-files in the
path returned by {@link #getWorkOutputPath(JobConf)} from map/reduce
task to take advantage of this feature.</p>
<p>The entire discussion holds true for maps of jobs with
reducer=NONE (i.e. 0 reduces) since output of the map, in that case,
goes directly to HDFS.</p>
@return the {@link Path} to the task's temporary output directory
for the map-reduce job.]]>
</doc>
</method>
<method name="getTaskOutputPath" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="name" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Helper function to create the task's temporary output directory and
return the path to the task's output file.
@param conf job-configuration
@param name temporary task-output filename
@return path to the task's temporary output file
@throws IOException]]>
</doc>
</method>
<method name="getUniqueName" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="name" type="java.lang.String"/>
<doc>
<![CDATA[Helper function to generate a name that is unique for the task.
<p>The generated name can be used to create custom files from within the
different tasks for the job, the names for different tasks will not collide
with each other.</p>
<p>The given name is postfixed with the task type, 'm' for maps, 'r' for
reduces and the task partition number. For example, give a name 'test'
running on the first map o the job the generated name will be
'test-m-00000'.</p>
@param conf the configuration for the job.
@param name the name to make unique.
@return a unique name accross all tasks of the job.]]>
</doc>
</method>
<method name="getPathForCustomFile" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="name" type="java.lang.String"/>
<doc>
<![CDATA[Helper function to generate a {@link Path} for a file that is unique for
the task within the job output directory.
<p>The path can be used to create custom files from within the map and
reduce tasks. The path name will be unique for each task. The path parent
will be the job output directory.</p>ls
<p>This method uses the {@link #getUniqueName} method to make the file name
unique for the task.</p>
@param conf the configuration for the job.
@param name the name for the file.
@return a unique path accross all tasks of the job.]]>
</doc>
</method>
<doc>
<![CDATA[A base class for {@link OutputFormat}.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.FileOutputFormat -->
<!-- start class org.apache.hadoop.mapred.FileSplit -->
<class name="FileSplit" extends="org.apache.hadoop.mapreduce.InputSplit"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.InputSplitWithLocationInfo"/>
<constructor name="FileSplit"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</constructor>
<constructor name="FileSplit" type="org.apache.hadoop.fs.Path, long, long, org.apache.hadoop.mapred.JobConf"
static="false" final="false" visibility="public"
deprecated="deprecated, no comment">
<doc>
<![CDATA[Constructs a split.
@deprecated
@param file the file name
@param start the position of the first byte in the file to process
@param length the number of bytes in the file to process]]>
</doc>
</constructor>
<constructor name="FileSplit" type="org.apache.hadoop.fs.Path, long, long, java.lang.String[]"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Constructs a split with host information
@param file the file name
@param start the position of the first byte in the file to process
@param length the number of bytes in the file to process
@param hosts the list of hosts containing the block, possibly null]]>
</doc>
</constructor>
<constructor name="FileSplit" type="org.apache.hadoop.fs.Path, long, long, java.lang.String[], java.lang.String[]"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Constructs a split with host information
@param file the file name
@param start the position of the first byte in the file to process
@param length the number of bytes in the file to process
@param hosts the list of hosts containing the block, possibly null
@param inMemoryHosts the list of hosts containing the block in memory]]>
</doc>
</constructor>
<constructor name="FileSplit" type="org.apache.hadoop.mapreduce.lib.input.FileSplit"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getPath" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[The file containing this split's data.]]>
</doc>
</method>
<method name="getStart" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[The position of the first byte in the file to process.]]>
</doc>
</method>
<method name="getLength" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[The number of bytes in the file to process.]]>
</doc>
</method>
<method name="toString" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="write"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="out" type="java.io.DataOutput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="readFields"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type="java.io.DataInput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getLocations" return="java.lang.String[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getLocationInfo" return="org.apache.hadoop.mapred.SplitLocationInfo[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[A section of an input file. Returned by {@link
InputFormat#getSplits(JobConf, int)} and passed to
{@link InputFormat#getRecordReader(InputSplit,JobConf,Reporter)}.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.FileSplit -->
<!-- start class org.apache.hadoop.mapred.FixedLengthInputFormat -->
<class name="FixedLengthInputFormat" extends="org.apache.hadoop.mapred.FileInputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.JobConfigurable"/>
<constructor name="FixedLengthInputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="setRecordLength"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<param name="recordLength" type="int"/>
<doc>
<![CDATA[Set the length of each record
@param conf configuration
@param recordLength the length of a record]]>
</doc>
</method>
<method name="getRecordLength" return="int"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<doc>
<![CDATA[Get record length value
@param conf configuration
@return the record length, zero means none was set]]>
</doc>
</method>
<method name="configure"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
</method>
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="genericSplit" type="org.apache.hadoop.mapred.InputSplit"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="isSplitable" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="fs" type="org.apache.hadoop.fs.FileSystem"/>
<param name="file" type="org.apache.hadoop.fs.Path"/>
</method>
<field name="FIXED_RECORD_LENGTH" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[FixedLengthInputFormat is an input format used to read input files
which contain fixed length records. The content of a record need not be
text. It can be arbitrary binary data. Users must configure the record
length property by calling:
FixedLengthInputFormat.setRecordLength(conf, recordLength);<br><br> or
conf.setInt(FixedLengthInputFormat.FIXED_RECORD_LENGTH, recordLength);
<br><br>
@see FixedLengthRecordReader]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.FixedLengthInputFormat -->
<!-- start class org.apache.hadoop.mapred.ID -->
<class name="ID" extends="org.apache.hadoop.mapreduce.ID"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="ID" type="int"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[constructs an ID object from the given int]]>
</doc>
</constructor>
<constructor name="ID"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</constructor>
<doc>
<![CDATA[A general identifier, which internally stores the id
as an integer. This is the super class of {@link JobID},
{@link TaskID} and {@link TaskAttemptID}.
@see JobID
@see TaskID
@see TaskAttemptID]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.ID -->
<!-- start interface org.apache.hadoop.mapred.InputFormat -->
<interface name="InputFormat" abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="numSplits" type="int"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Logically split the set of input files for the job.
<p>Each {@link InputSplit} is then assigned to an individual {@link Mapper}
for processing.</p>
<p><i>Note</i>: The split is a <i>logical</i> split of the inputs and the
input files are not physically split into chunks. For e.g. a split could
be <i>&lt;input-file-path, start, offset&gt;</i> tuple.
@param job job configuration.
@param numSplits the desired number of splits, a hint.
@return an array of {@link InputSplit}s for the job.]]>
</doc>
</method>
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapred.InputSplit"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get the {@link RecordReader} for the given {@link InputSplit}.
<p>It is the responsibility of the <code>RecordReader</code> to respect
record boundaries while processing the logical split to present a
record-oriented view to the individual task.</p>
@param split the {@link InputSplit}
@param job the job that this split belongs to
@return a {@link RecordReader}]]>
</doc>
</method>
<doc>
<![CDATA[<code>InputFormat</code> describes the input-specification for a
Map-Reduce job.
<p>The Map-Reduce framework relies on the <code>InputFormat</code> of the
job to:<p>
<ol>
<li>
Validate the input-specification of the job.
<li>
Split-up the input file(s) into logical {@link InputSplit}s, each of
which is then assigned to an individual {@link Mapper}.
</li>
<li>
Provide the {@link RecordReader} implementation to be used to glean
input records from the logical <code>InputSplit</code> for processing by
the {@link Mapper}.
</li>
</ol>
<p>The default behavior of file-based {@link InputFormat}s, typically
sub-classes of {@link FileInputFormat}, is to split the
input into <i>logical</i> {@link InputSplit}s based on the total size, in
bytes, of the input files. However, the {@link FileSystem} blocksize of
the input files is treated as an upper bound for input splits. A lower bound
on the split size can be set via
<a href="{@docRoot}/../hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml#mapreduce.input.fileinputformat.split.minsize">
mapreduce.input.fileinputformat.split.minsize</a>.</p>
<p>Clearly, logical splits based on input-size is insufficient for many
applications since record boundaries are to be respected. In such cases, the
application has to also implement a {@link RecordReader} on whom lies the
responsibilty to respect record-boundaries and present a record-oriented
view of the logical <code>InputSplit</code> to the individual task.
@see InputSplit
@see RecordReader
@see JobClient
@see FileInputFormat]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapred.InputFormat -->
<!-- start interface org.apache.hadoop.mapred.InputSplit -->
<interface name="InputSplit" abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.io.Writable"/>
<method name="getLength" return="long"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get the total number of bytes in the data of the <code>InputSplit</code>.
@return the number of bytes in the input split.
@throws IOException]]>
</doc>
</method>
<method name="getLocations" return="java.lang.String[]"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get the list of hostnames where the input split is located.
@return list of hostnames where data of the <code>InputSplit</code> is
located as an array of <code>String</code>s.
@throws IOException]]>
</doc>
</method>
<doc>
<![CDATA[<code>InputSplit</code> represents the data to be processed by an
individual {@link Mapper}.
<p>Typically, it presents a byte-oriented view on the input and is the
responsibility of {@link RecordReader} of the job to process this and present
a record-oriented view.
@see InputFormat
@see RecordReader]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapred.InputSplit -->
<!-- start interface org.apache.hadoop.mapred.InputSplitWithLocationInfo -->
<interface name="InputSplitWithLocationInfo" abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.InputSplit"/>
<method name="getLocationInfo" return="org.apache.hadoop.mapred.SplitLocationInfo[]"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Gets info about which nodes the input split is stored on and how it is
stored at each location.
@return list of <code>SplitLocationInfo</code>s describing how the split
data is stored at each location. A null value indicates that all the
locations have the data stored on disk.
@throws IOException]]>
</doc>
</method>
</interface>
<!-- end interface org.apache.hadoop.mapred.InputSplitWithLocationInfo -->
<!-- start class org.apache.hadoop.mapred.InvalidFileTypeException -->
<class name="InvalidFileTypeException" extends="java.io.IOException"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="InvalidFileTypeException"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<constructor name="InvalidFileTypeException" type="java.lang.String"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<doc>
<![CDATA[Used when file type differs from the desired file type. like
getting a file when a directory is expected. Or a wrong file type.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.InvalidFileTypeException -->
<!-- start class org.apache.hadoop.mapred.InvalidInputException -->
<class name="InvalidInputException" extends="java.io.IOException"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="InvalidInputException" type="java.util.List"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Create the exception with the given list.
@param probs the list of problems to report. this list is not copied.]]>
</doc>
</constructor>
<method name="getProblems" return="java.util.List"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the complete list of the problems reported.
@return the list of problems, which must not be modified]]>
</doc>
</method>
<method name="getMessage" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get a summary message of the problems found.
@return the concatenated messages from all of the problems.]]>
</doc>
</method>
<doc>
<![CDATA[This class wraps a list of problems with the input, so that the user
can get a list of problems together instead of finding and fixing them one
by one.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.InvalidInputException -->
<!-- start class org.apache.hadoop.mapred.InvalidJobConfException -->
<class name="InvalidJobConfException" extends="java.io.IOException"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="InvalidJobConfException"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<constructor name="InvalidJobConfException" type="java.lang.String"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<constructor name="InvalidJobConfException" type="java.lang.String, java.lang.Throwable"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<constructor name="InvalidJobConfException" type="java.lang.Throwable"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<doc>
<![CDATA[This exception is thrown when jobconf misses some mendatory attributes
or value of some attributes is invalid.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.InvalidJobConfException -->
<!-- start class org.apache.hadoop.mapred.JobClient -->
<class name="JobClient" extends="org.apache.hadoop.mapreduce.tools.CLI"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="java.lang.AutoCloseable"/>
<constructor name="JobClient"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Create a job client.]]>
</doc>
</constructor>
<constructor name="JobClient" type="org.apache.hadoop.mapred.JobConf"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Build a job client with the given {@link JobConf}, and connect to the
default cluster
@param conf the job configuration.
@throws IOException]]>
</doc>
</constructor>
<constructor name="JobClient" type="org.apache.hadoop.conf.Configuration"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Build a job client with the given {@link Configuration},
and connect to the default cluster
@param conf the configuration.
@throws IOException]]>
</doc>
</constructor>
<constructor name="JobClient" type="java.net.InetSocketAddress, org.apache.hadoop.conf.Configuration"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Build a job client, connect to the indicated job tracker.
@param jobTrackAddr the job tracker to connect to.
@param conf configuration.]]>
</doc>
</constructor>
<method name="init"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Connect to the default cluster
@param conf the job configuration.
@throws IOException]]>
</doc>
</method>
<method name="close"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Close the <code>JobClient</code>.]]>
</doc>
</method>
<method name="getFs" return="org.apache.hadoop.fs.FileSystem"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get a filesystem handle. We need this to prepare jobs
for submission to the MapReduce system.
@return the filesystem handle.]]>
</doc>
</method>
<method name="getClusterHandle" return="org.apache.hadoop.mapreduce.Cluster"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get a handle to the Cluster]]>
</doc>
</method>
<method name="submitJob" return="org.apache.hadoop.mapred.RunningJob"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobFile" type="java.lang.String"/>
<exception name="FileNotFoundException" type="java.io.FileNotFoundException"/>
<exception name="InvalidJobConfException" type="org.apache.hadoop.mapred.InvalidJobConfException"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Submit a job to the MR system.
This returns a handle to the {@link RunningJob} which can be used to track
the running-job.
@param jobFile the job configuration.
@return a handle to the {@link RunningJob} which can be used to track the
running-job.
@throws FileNotFoundException
@throws InvalidJobConfException
@throws IOException]]>
</doc>
</method>
<method name="submitJob" return="org.apache.hadoop.mapred.RunningJob"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<exception name="FileNotFoundException" type="java.io.FileNotFoundException"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Submit a job to the MR system.
This returns a handle to the {@link RunningJob} which can be used to track
the running-job.
@param conf the job configuration.
@return a handle to the {@link RunningJob} which can be used to track the
running-job.
@throws FileNotFoundException
@throws IOException]]>
</doc>
</method>
<method name="getJobInner" return="org.apache.hadoop.mapred.RunningJob"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="jobid" type="org.apache.hadoop.mapred.JobID"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getJob" return="org.apache.hadoop.mapred.RunningJob"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobid" type="org.apache.hadoop.mapred.JobID"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get an {@link RunningJob} object to track an ongoing job. Returns
null if the id does not correspond to any known job.
@param jobid the jobid of the job.
@return the {@link RunningJob} handle to track the job, null if the
<code>jobid</code> doesn't correspond to any known job.
@throws IOException]]>
</doc>
</method>
<method name="getJob" return="org.apache.hadoop.mapred.RunningJob"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="Applications should rather use {@link #getJob(JobID)}.">
<param name="jobid" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[@deprecated Applications should rather use {@link #getJob(JobID)}.]]>
</doc>
</method>
<method name="getMapTaskReports" return="org.apache.hadoop.mapred.TaskReport[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobId" type="org.apache.hadoop.mapred.JobID"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get the information of the current state of the map tasks of a job.
@param jobId the job to query.
@return the list of all of the map tips.
@throws IOException]]>
</doc>
</method>
<method name="getMapTaskReports" return="org.apache.hadoop.mapred.TaskReport[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="Applications should rather use {@link #getMapTaskReports(JobID)}">
<param name="jobId" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[@deprecated Applications should rather use {@link #getMapTaskReports(JobID)}]]>
</doc>
</method>
<method name="getReduceTaskReports" return="org.apache.hadoop.mapred.TaskReport[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobId" type="org.apache.hadoop.mapred.JobID"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get the information of the current state of the reduce tasks of a job.
@param jobId the job to query.
@return the list of all of the reduce tips.
@throws IOException]]>
</doc>
</method>
<method name="getCleanupTaskReports" return="org.apache.hadoop.mapred.TaskReport[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobId" type="org.apache.hadoop.mapred.JobID"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get the information of the current state of the cleanup tasks of a job.
@param jobId the job to query.
@return the list of all of the cleanup tips.
@throws IOException]]>
</doc>
</method>
<method name="getSetupTaskReports" return="org.apache.hadoop.mapred.TaskReport[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobId" type="org.apache.hadoop.mapred.JobID"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get the information of the current state of the setup tasks of a job.
@param jobId the job to query.
@return the list of all of the setup tips.
@throws IOException]]>
</doc>
</method>
<method name="getReduceTaskReports" return="org.apache.hadoop.mapred.TaskReport[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="Applications should rather use {@link #getReduceTaskReports(JobID)}">
<param name="jobId" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[@deprecated Applications should rather use {@link #getReduceTaskReports(JobID)}]]>
</doc>
</method>
<method name="displayTasks"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobId" type="org.apache.hadoop.mapred.JobID"/>
<param name="type" type="java.lang.String"/>
<param name="state" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Display the information about a job's tasks, of a particular type and
in a particular state
@param jobId the ID of the job
@param type the type of the task (map/reduce/setup/cleanup)
@param state the state of the task
(pending/running/completed/failed/killed)
@throws IOException when there is an error communicating with the master
@throws IllegalArgumentException if an invalid type/state is passed]]>
</doc>
</method>
<method name="getClusterStatus" return="org.apache.hadoop.mapred.ClusterStatus"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get status information about the Map-Reduce cluster.
@return the status information about the Map-Reduce cluster as an object
of {@link ClusterStatus}.
@throws IOException]]>
</doc>
</method>
<method name="getClusterStatus" return="org.apache.hadoop.mapred.ClusterStatus"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="detailed" type="boolean"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get status information about the Map-Reduce cluster.
@param detailed if true then get a detailed status including the
tracker names
@return the status information about the Map-Reduce cluster as an object
of {@link ClusterStatus}.
@throws IOException]]>
</doc>
</method>
<method name="jobsToComplete" return="org.apache.hadoop.mapred.JobStatus[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get the jobs that are not completed and not failed.
@return array of {@link JobStatus} for the running/to-be-run jobs.
@throws IOException]]>
</doc>
</method>
<method name="getAllJobs" return="org.apache.hadoop.mapred.JobStatus[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get the jobs that are submitted.
@return array of {@link JobStatus} for the submitted jobs.
@throws IOException]]>
</doc>
</method>
<method name="runJob" return="org.apache.hadoop.mapred.RunningJob"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Utility that submits a job, then polls for progress until the job is
complete.
@param job the job configuration.
@throws IOException if the job fails]]>
</doc>
</method>
<method name="monitorAndPrintJob" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="job" type="org.apache.hadoop.mapred.RunningJob"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Monitor a job and print status in real-time as progress is made and tasks
fail.
@param conf the job's configuration
@param job the job to track
@return true if the job succeeded
@throws IOException if communication to the JobTracker fails]]>
</doc>
</method>
<method name="setTaskOutputFilter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="newValue" type="org.apache.hadoop.mapred.JobClient.TaskStatusFilter"/>
<doc>
<![CDATA[Sets the output filter for tasks. only those tasks are printed whose
output matches the filter.
@param newValue task filter.]]>
</doc>
</method>
<method name="getTaskOutputFilter" return="org.apache.hadoop.mapred.JobClient.TaskStatusFilter"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Get the task output filter out of the JobConf.
@param job the JobConf to examine.
@return the filter level.]]>
</doc>
</method>
<method name="setTaskOutputFilter"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="newValue" type="org.apache.hadoop.mapred.JobClient.TaskStatusFilter"/>
<doc>
<![CDATA[Modify the JobConf to set the task output filter.
@param job the JobConf to modify.
@param newValue the value to set.]]>
</doc>
</method>
<method name="getTaskOutputFilter" return="org.apache.hadoop.mapred.JobClient.TaskStatusFilter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns task output filter.
@return task filter.]]>
</doc>
</method>
<method name="getCounter" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="cntrs" type="org.apache.hadoop.mapreduce.Counters"/>
<param name="counterGroupName" type="java.lang.String"/>
<param name="counterName" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getDefaultMaps" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get status information about the max available Maps in the cluster.
@return the max available Maps in the cluster
@throws IOException]]>
</doc>
</method>
<method name="getDefaultReduces" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get status information about the max available Reduces in the cluster.
@return the max available Reduces in the cluster
@throws IOException]]>
</doc>
</method>
<method name="getSystemDir" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Grab the jobtracker system directory path where job-specific files are to be placed.
@return the system directory where job-specific files are to be placed.]]>
</doc>
</method>
<method name="isJobDirValid" return="boolean"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobDirPath" type="org.apache.hadoop.fs.Path"/>
<param name="fs" type="org.apache.hadoop.fs.FileSystem"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Checks if the job directory is clean and has all the required components
for (re) starting the job]]>
</doc>
</method>
<method name="getStagingAreaDir" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Fetch the staging area directory for the application
@return path to staging area directory
@throws IOException]]>
</doc>
</method>
<method name="getRootQueues" return="org.apache.hadoop.mapred.JobQueueInfo[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Returns an array of queue information objects about root level queues
configured
@return the array of root level JobQueueInfo objects
@throws IOException]]>
</doc>
</method>
<method name="getChildQueues" return="org.apache.hadoop.mapred.JobQueueInfo[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="queueName" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Returns an array of queue information objects about immediate children
of queue queueName.
@param queueName
@return the array of immediate children JobQueueInfo objects
@throws IOException]]>
</doc>
</method>
<method name="getQueues" return="org.apache.hadoop.mapred.JobQueueInfo[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Return an array of queue information objects about all the Job Queues
configured.
@return Array of JobQueueInfo objects
@throws IOException]]>
</doc>
</method>
<method name="getJobsFromQueue" return="org.apache.hadoop.mapred.JobStatus[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="queueName" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Gets all the jobs which were added to particular Job Queue
@param queueName name of the Job Queue
@return Array of jobs present in the job queue
@throws IOException]]>
</doc>
</method>
<method name="getQueueInfo" return="org.apache.hadoop.mapred.JobQueueInfo"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="queueName" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Gets the queue information associated to a particular Job Queue
@param queueName name of the job queue.
@return Queue information associated to particular queue.
@throws IOException]]>
</doc>
</method>
<method name="getQueueAclsForCurrentUser" return="org.apache.hadoop.mapred.QueueAclsInfo[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Gets the Queue ACLs for current user
@return array of QueueAclsInfo object for current user.
@throws IOException]]>
</doc>
</method>
<method name="getDelegationToken" return="org.apache.hadoop.security.token.Token"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="renewer" type="org.apache.hadoop.io.Text"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Get a delegation token for the user from the JobTracker.
@param renewer the user who can renew the token
@return the new token
@throws IOException]]>
</doc>
</method>
<method name="renewDelegationToken" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="Use {@link Token#renew} instead">
<param name="token" type="org.apache.hadoop.security.token.Token"/>
<exception name="SecretManager.InvalidToken" type="org.apache.hadoop.security.token.SecretManager.InvalidToken"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Renew a delegation token
@param token the token to renew
@return true if the renewal went well
@throws InvalidToken
@throws IOException
@deprecated Use {@link Token#renew} instead]]>
</doc>
</method>
<method name="cancelDelegationToken"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="Use {@link Token#cancel} instead">
<param name="token" type="org.apache.hadoop.security.token.Token"/>
<exception name="SecretManager.InvalidToken" type="org.apache.hadoop.security.token.SecretManager.InvalidToken"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Cancel a delegation token from the JobTracker
@param token the token to cancel
@throws IOException
@deprecated Use {@link Token#cancel} instead]]>
</doc>
</method>
<method name="main"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="argv" type="java.lang.String[]"/>
<exception name="Exception" type="java.lang.Exception"/>
</method>
<field name="MAPREDUCE_CLIENT_RETRY_POLICY_ENABLED_KEY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="MAPREDUCE_CLIENT_RETRY_POLICY_ENABLED_DEFAULT" type="boolean"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="MAPREDUCE_CLIENT_RETRY_POLICY_SPEC_KEY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="MAPREDUCE_CLIENT_RETRY_POLICY_SPEC_DEFAULT" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[<code>JobClient</code> is the primary interface for the user-job to interact
with the cluster.
<code>JobClient</code> provides facilities to submit jobs, track their
progress, access component-tasks' reports/logs, get the Map-Reduce cluster
status information etc.
<p>The job submission process involves:
<ol>
<li>
Checking the input and output specifications of the job.
</li>
<li>
Computing the {@link InputSplit}s for the job.
</li>
<li>
Setup the requisite accounting information for the {@link DistributedCache}
of the job, if necessary.
</li>
<li>
Copying the job's jar and configuration to the map-reduce system directory
on the distributed file-system.
</li>
<li>
Submitting the job to the cluster and optionally monitoring
it's status.
</li>
</ol>
Normally the user creates the application, describes various facets of the
job via {@link JobConf} and then uses the <code>JobClient</code> to submit
the job and monitor its progress.
<p>Here is an example on how to use <code>JobClient</code>:</p>
<p><blockquote><pre>
// Create a new JobConf
JobConf job = new JobConf(new Configuration(), MyJob.class);
// Specify various job-specific parameters
job.setJobName("myjob");
job.setInputPath(new Path("in"));
job.setOutputPath(new Path("out"));
job.setMapperClass(MyJob.MyMapper.class);
job.setReducerClass(MyJob.MyReducer.class);
// Submit the job, then poll for progress until the job is complete
JobClient.runJob(job);
</pre></blockquote>
<b id="JobControl">Job Control</b>
<p>At times clients would chain map-reduce jobs to accomplish complex tasks
which cannot be done via a single map-reduce job. This is fairly easy since
the output of the job, typically, goes to distributed file-system and that
can be used as the input for the next job.</p>
<p>However, this also means that the onus on ensuring jobs are complete
(success/failure) lies squarely on the clients. In such situations the
various job-control options are:
<ol>
<li>
{@link #runJob(JobConf)} : submits the job and returns only after
the job has completed.
</li>
<li>
{@link #submitJob(JobConf)} : only submits the job, then poll the
returned handle to the {@link RunningJob} to query status and make
scheduling decisions.
</li>
<li>
{@link JobConf#setJobEndNotificationURI(String)} : setup a notification
on job-completion, thus avoiding polling.
</li>
</ol>
@see JobConf
@see ClusterStatus
@see Tool
@see DistributedCache]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.JobClient -->
<!-- start class org.apache.hadoop.mapred.JobConf -->
<class name="JobConf" extends="org.apache.hadoop.conf.Configuration"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="JobConf"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Construct a map/reduce job configuration.]]>
</doc>
</constructor>
<constructor name="JobConf" type="java.lang.Class"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Construct a map/reduce job configuration.
@param exampleClass a class whose containing jar is used as the job's jar.]]>
</doc>
</constructor>
<constructor name="JobConf" type="org.apache.hadoop.conf.Configuration"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Construct a map/reduce job configuration.
@param conf a Configuration whose settings will be inherited.]]>
</doc>
</constructor>
<constructor name="JobConf" type="org.apache.hadoop.conf.Configuration, java.lang.Class"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Construct a map/reduce job configuration.
@param conf a Configuration whose settings will be inherited.
@param exampleClass a class whose containing jar is used as the job's jar.]]>
</doc>
</constructor>
<constructor name="JobConf" type="java.lang.String"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Construct a map/reduce configuration.
@param config a Configuration-format XML job description file.]]>
</doc>
</constructor>
<constructor name="JobConf" type="org.apache.hadoop.fs.Path"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Construct a map/reduce configuration.
@param config a Configuration-format XML job description file.]]>
</doc>
</constructor>
<constructor name="JobConf" type="boolean"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[A new map/reduce configuration where the behavior of reading from the
default resources can be turned off.
<p>
If the parameter {@code loadDefaults} is false, the new instance
will not load resources from the default files.
@param loadDefaults specifies whether to load from the default files]]>
</doc>
</constructor>
<method name="getCredentials" return="org.apache.hadoop.security.Credentials"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get credentials for the job.
@return credentials for the job]]>
</doc>
</method>
<method name="getJar" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the user jar for the map-reduce job.
@return the user jar for the map-reduce job.]]>
</doc>
</method>
<method name="setJar"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jar" type="java.lang.String"/>
<doc>
<![CDATA[Set the user jar for the map-reduce job.
@param jar the user jar for the map-reduce job.]]>
</doc>
</method>
<method name="getJarUnpackPattern" return="java.util.regex.Pattern"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the pattern for jar contents to unpack on the tasktracker]]>
</doc>
</method>
<method name="setJarByClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="cls" type="java.lang.Class"/>
<doc>
<![CDATA[Set the job's jar file by finding an example class location.
@param cls the example class.]]>
</doc>
</method>
<method name="getLocalDirs" return="java.lang.String[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="deleteLocalFiles"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Use MRAsyncDiskService.moveAndDeleteAllVolumes instead.]]>
</doc>
</method>
<method name="deleteLocalFiles"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="subdir" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getLocalPath" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="pathString" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Constructs a local file name. Files are distributed among configured
local directories.]]>
</doc>
</method>
<method name="getUser" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the reported username for this job.
@return the username]]>
</doc>
</method>
<method name="setUser"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="user" type="java.lang.String"/>
<doc>
<![CDATA[Set the reported username for this job.
@param user the username for this job.]]>
</doc>
</method>
<method name="setKeepFailedTaskFiles"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="keep" type="boolean"/>
<doc>
<![CDATA[Set whether the framework should keep the intermediate files for
failed tasks.
@param keep <code>true</code> if framework should keep the intermediate files
for failed tasks, <code>false</code> otherwise.]]>
</doc>
</method>
<method name="getKeepFailedTaskFiles" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Should the temporary files for failed tasks be kept?
@return should the files be kept?]]>
</doc>
</method>
<method name="setKeepTaskFilesPattern"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="pattern" type="java.lang.String"/>
<doc>
<![CDATA[Set a regular expression for task names that should be kept.
The regular expression ".*_m_000123_0" would keep the files
for the first instance of map 123 that ran.
@param pattern the java.util.regex.Pattern to match against the
task names.]]>
</doc>
</method>
<method name="getKeepTaskFilesPattern" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the regular expression that is matched against the task names
to see if we need to keep the files.
@return the pattern as a string, if it was set, othewise null.]]>
</doc>
</method>
<method name="setWorkingDirectory"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="dir" type="org.apache.hadoop.fs.Path"/>
<doc>
<![CDATA[Set the current working directory for the default file system.
@param dir the new current working directory.]]>
</doc>
</method>
<method name="getWorkingDirectory" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the current working directory for the default file system.
@return the directory name.]]>
</doc>
</method>
<method name="setNumTasksToExecutePerJvm"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="numTasks" type="int"/>
<doc>
<![CDATA[Sets the number of tasks that a spawned task JVM should run
before it exits
@param numTasks the number of tasks to execute; defaults to 1;
-1 signifies no limit]]>
</doc>
</method>
<method name="getNumTasksToExecutePerJvm" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the number of tasks that a spawned JVM should execute]]>
</doc>
</method>
<method name="getInputFormat" return="org.apache.hadoop.mapred.InputFormat"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the {@link InputFormat} implementation for the map-reduce job,
defaults to {@link TextInputFormat} if not specified explicity.
@return the {@link InputFormat} implementation for the map-reduce job.]]>
</doc>
</method>
<method name="setInputFormat"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="theClass" type="java.lang.Class"/>
<doc>
<![CDATA[Set the {@link InputFormat} implementation for the map-reduce job.
@param theClass the {@link InputFormat} implementation for the map-reduce
job.]]>
</doc>
</method>
<method name="getOutputFormat" return="org.apache.hadoop.mapred.OutputFormat"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the {@link OutputFormat} implementation for the map-reduce job,
defaults to {@link TextOutputFormat} if not specified explicity.
@return the {@link OutputFormat} implementation for the map-reduce job.]]>
</doc>
</method>
<method name="getOutputCommitter" return="org.apache.hadoop.mapred.OutputCommitter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the {@link OutputCommitter} implementation for the map-reduce job,
defaults to {@link FileOutputCommitter} if not specified explicitly.
@return the {@link OutputCommitter} implementation for the map-reduce job.]]>
</doc>
</method>
<method name="setOutputCommitter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="theClass" type="java.lang.Class"/>
<doc>
<![CDATA[Set the {@link OutputCommitter} implementation for the map-reduce job.
@param theClass the {@link OutputCommitter} implementation for the map-reduce
job.]]>
</doc>
</method>
<method name="setOutputFormat"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="theClass" type="java.lang.Class"/>
<doc>
<![CDATA[Set the {@link OutputFormat} implementation for the map-reduce job.
@param theClass the {@link OutputFormat} implementation for the map-reduce
job.]]>
</doc>
</method>
<method name="setCompressMapOutput"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="compress" type="boolean"/>
<doc>
<![CDATA[Should the map outputs be compressed before transfer?
@param compress should the map outputs be compressed?]]>
</doc>
</method>
<method name="getCompressMapOutput" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Are the outputs of the maps be compressed?
@return <code>true</code> if the outputs of the maps are to be compressed,
<code>false</code> otherwise.]]>
</doc>
</method>
<method name="setMapOutputCompressorClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="codecClass" type="java.lang.Class"/>
<doc>
<![CDATA[Set the given class as the {@link CompressionCodec} for the map outputs.
@param codecClass the {@link CompressionCodec} class that will compress
the map outputs.]]>
</doc>
</method>
<method name="getMapOutputCompressorClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="defaultValue" type="java.lang.Class"/>
<doc>
<![CDATA[Get the {@link CompressionCodec} for compressing the map outputs.
@param defaultValue the {@link CompressionCodec} to return if not set
@return the {@link CompressionCodec} class that should be used to compress the
map outputs.
@throws IllegalArgumentException if the class was specified, but not found]]>
</doc>
</method>
<method name="getMapOutputKeyClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the key class for the map output data. If it is not set, use the
(final) output key class. This allows the map output key class to be
different than the final output key class.
@return the map output key class.]]>
</doc>
</method>
<method name="setMapOutputKeyClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="theClass" type="java.lang.Class"/>
<doc>
<![CDATA[Set the key class for the map output data. This allows the user to
specify the map output key class to be different than the final output
value class.
@param theClass the map output key class.]]>
</doc>
</method>
<method name="getMapOutputValueClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the value class for the map output data. If it is not set, use the
(final) output value class This allows the map output value class to be
different than the final output value class.
@return the map output value class.]]>
</doc>
</method>
<method name="setMapOutputValueClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="theClass" type="java.lang.Class"/>
<doc>
<![CDATA[Set the value class for the map output data. This allows the user to
specify the map output value class to be different than the final output
value class.
@param theClass the map output value class.]]>
</doc>
</method>
<method name="getOutputKeyClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the key class for the job output data.
@return the key class for the job output data.]]>
</doc>
</method>
<method name="setOutputKeyClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="theClass" type="java.lang.Class"/>
<doc>
<![CDATA[Set the key class for the job output data.
@param theClass the key class for the job output data.]]>
</doc>
</method>
<method name="getOutputKeyComparator" return="org.apache.hadoop.io.RawComparator"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the {@link RawComparator} comparator used to compare keys.
@return the {@link RawComparator} comparator used to compare keys.]]>
</doc>
</method>
<method name="setOutputKeyComparatorClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="theClass" type="java.lang.Class"/>
<doc>
<![CDATA[Set the {@link RawComparator} comparator used to compare keys.
@param theClass the {@link RawComparator} comparator used to
compare keys.
@see #setOutputValueGroupingComparator(Class)]]>
</doc>
</method>
<method name="setKeyFieldComparatorOptions"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="keySpec" type="java.lang.String"/>
<doc>
<![CDATA[Set the {@link KeyFieldBasedComparator} options used to compare keys.
@param keySpec the key specification of the form -k pos1[,pos2], where,
pos is of the form f[.c][opts], where f is the number
of the key field to use, and c is the number of the first character from
the beginning of the field. Fields and character posns are numbered
starting with 1; a character position of zero in pos2 indicates the
field's last character. If '.c' is omitted from pos1, it defaults to 1
(the beginning of the field); if omitted from pos2, it defaults to 0
(the end of the field). opts are ordering options. The supported options
are:
-n, (Sort numerically)
-r, (Reverse the result of comparison)]]>
</doc>
</method>
<method name="getKeyFieldComparatorOption" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the {@link KeyFieldBasedComparator} options]]>
</doc>
</method>
<method name="setKeyFieldPartitionerOptions"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="keySpec" type="java.lang.String"/>
<doc>
<![CDATA[Set the {@link KeyFieldBasedPartitioner} options used for
{@link Partitioner}
@param keySpec the key specification of the form -k pos1[,pos2], where,
pos is of the form f[.c][opts], where f is the number
of the key field to use, and c is the number of the first character from
the beginning of the field. Fields and character posns are numbered
starting with 1; a character position of zero in pos2 indicates the
field's last character. If '.c' is omitted from pos1, it defaults to 1
(the beginning of the field); if omitted from pos2, it defaults to 0
(the end of the field).]]>
</doc>
</method>
<method name="getKeyFieldPartitionerOption" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the {@link KeyFieldBasedPartitioner} options]]>
</doc>
</method>
<method name="getCombinerKeyGroupingComparator" return="org.apache.hadoop.io.RawComparator"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the user defined {@link WritableComparable} comparator for
grouping keys of inputs to the combiner.
@return comparator set by the user for grouping values.
@see #setCombinerKeyGroupingComparator(Class) for details.]]>
</doc>
</method>
<method name="getOutputValueGroupingComparator" return="org.apache.hadoop.io.RawComparator"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the user defined {@link WritableComparable} comparator for
grouping keys of inputs to the reduce.
@return comparator set by the user for grouping values.
@see #setOutputValueGroupingComparator(Class) for details.]]>
</doc>
</method>
<method name="setCombinerKeyGroupingComparator"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="theClass" type="java.lang.Class"/>
<doc>
<![CDATA[Set the user defined {@link RawComparator} comparator for
grouping keys in the input to the combiner.
<p>This comparator should be provided if the equivalence rules for keys
for sorting the intermediates are different from those for grouping keys
before each call to
{@link Reducer#reduce(Object, java.util.Iterator, OutputCollector, Reporter)}.</p>
<p>For key-value pairs (K1,V1) and (K2,V2), the values (V1, V2) are passed
in a single call to the reduce function if K1 and K2 compare as equal.</p>
<p>Since {@link #setOutputKeyComparatorClass(Class)} can be used to control
how keys are sorted, this can be used in conjunction to simulate
<i>secondary sort on values</i>.</p>
<p><i>Note</i>: This is not a guarantee of the combiner sort being
<i>stable</i> in any sense. (In any case, with the order of available
map-outputs to the combiner being non-deterministic, it wouldn't make
that much sense.)</p>
@param theClass the comparator class to be used for grouping keys for the
combiner. It should implement <code>RawComparator</code>.
@see #setOutputKeyComparatorClass(Class)]]>
</doc>
</method>
<method name="setOutputValueGroupingComparator"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="theClass" type="java.lang.Class"/>
<doc>
<![CDATA[Set the user defined {@link RawComparator} comparator for
grouping keys in the input to the reduce.
<p>This comparator should be provided if the equivalence rules for keys
for sorting the intermediates are different from those for grouping keys
before each call to
{@link Reducer#reduce(Object, java.util.Iterator, OutputCollector, Reporter)}.</p>
<p>For key-value pairs (K1,V1) and (K2,V2), the values (V1, V2) are passed
in a single call to the reduce function if K1 and K2 compare as equal.</p>
<p>Since {@link #setOutputKeyComparatorClass(Class)} can be used to control
how keys are sorted, this can be used in conjunction to simulate
<i>secondary sort on values</i>.</p>
<p><i>Note</i>: This is not a guarantee of the reduce sort being
<i>stable</i> in any sense. (In any case, with the order of available
map-outputs to the reduce being non-deterministic, it wouldn't make
that much sense.)</p>
@param theClass the comparator class to be used for grouping keys.
It should implement <code>RawComparator</code>.
@see #setOutputKeyComparatorClass(Class)
@see #setCombinerKeyGroupingComparator(Class)]]>
</doc>
</method>
<method name="getUseNewMapper" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Should the framework use the new context-object code for running
the mapper?
@return true, if the new api should be used]]>
</doc>
</method>
<method name="setUseNewMapper"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="flag" type="boolean"/>
<doc>
<![CDATA[Set whether the framework should use the new api for the mapper.
This is the default for jobs submitted with the new Job api.
@param flag true, if the new api should be used]]>
</doc>
</method>
<method name="getUseNewReducer" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Should the framework use the new context-object code for running
the reducer?
@return true, if the new api should be used]]>
</doc>
</method>
<method name="setUseNewReducer"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="flag" type="boolean"/>
<doc>
<![CDATA[Set whether the framework should use the new api for the reducer.
This is the default for jobs submitted with the new Job api.
@param flag true, if the new api should be used]]>
</doc>
</method>
<method name="getOutputValueClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the value class for job outputs.
@return the value class for job outputs.]]>
</doc>
</method>
<method name="setOutputValueClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="theClass" type="java.lang.Class"/>
<doc>
<![CDATA[Set the value class for job outputs.
@param theClass the value class for job outputs.]]>
</doc>
</method>
<method name="getMapperClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the {@link Mapper} class for the job.
@return the {@link Mapper} class for the job.]]>
</doc>
</method>
<method name="setMapperClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="theClass" type="java.lang.Class"/>
<doc>
<![CDATA[Set the {@link Mapper} class for the job.
@param theClass the {@link Mapper} class for the job.]]>
</doc>
</method>
<method name="getMapRunnerClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the {@link MapRunnable} class for the job.
@return the {@link MapRunnable} class for the job.]]>
</doc>
</method>
<method name="setMapRunnerClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="theClass" type="java.lang.Class"/>
<doc>
<![CDATA[Expert: Set the {@link MapRunnable} class for the job.
Typically used to exert greater control on {@link Mapper}s.
@param theClass the {@link MapRunnable} class for the job.]]>
</doc>
</method>
<method name="getPartitionerClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the {@link Partitioner} used to partition {@link Mapper}-outputs
to be sent to the {@link Reducer}s.
@return the {@link Partitioner} used to partition map-outputs.]]>
</doc>
</method>
<method name="setPartitionerClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="theClass" type="java.lang.Class"/>
<doc>
<![CDATA[Set the {@link Partitioner} class used to partition
{@link Mapper}-outputs to be sent to the {@link Reducer}s.
@param theClass the {@link Partitioner} used to partition map-outputs.]]>
</doc>
</method>
<method name="getReducerClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the {@link Reducer} class for the job.
@return the {@link Reducer} class for the job.]]>
</doc>
</method>
<method name="setReducerClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="theClass" type="java.lang.Class"/>
<doc>
<![CDATA[Set the {@link Reducer} class for the job.
@param theClass the {@link Reducer} class for the job.]]>
</doc>
</method>
<method name="getCombinerClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the user-defined <i>combiner</i> class used to combine map-outputs
before being sent to the reducers. Typically the combiner is same as the
the {@link Reducer} for the job i.e. {@link #getReducerClass()}.
@return the user-defined combiner class used to combine map-outputs.]]>
</doc>
</method>
<method name="setCombinerClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="theClass" type="java.lang.Class"/>
<doc>
<![CDATA[Set the user-defined <i>combiner</i> class used to combine map-outputs
before being sent to the reducers.
<p>The combiner is an application-specified aggregation operation, which
can help cut down the amount of data transferred between the
{@link Mapper} and the {@link Reducer}, leading to better performance.</p>
<p>The framework may invoke the combiner 0, 1, or multiple times, in both
the mapper and reducer tasks. In general, the combiner is called as the
sort/merge result is written to disk. The combiner must:
<ul>
<li> be side-effect free</li>
<li> have the same input and output key types and the same input and
output value types</li>
</ul>
<p>Typically the combiner is same as the <code>Reducer</code> for the
job i.e. {@link #setReducerClass(Class)}.</p>
@param theClass the user-defined combiner class used to combine
map-outputs.]]>
</doc>
</method>
<method name="getSpeculativeExecution" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Should speculative execution be used for this job?
Defaults to <code>true</code>.
@return <code>true</code> if speculative execution be used for this job,
<code>false</code> otherwise.]]>
</doc>
</method>
<method name="setSpeculativeExecution"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="speculativeExecution" type="boolean"/>
<doc>
<![CDATA[Turn speculative execution on or off for this job.
@param speculativeExecution <code>true</code> if speculative execution
should be turned on, else <code>false</code>.]]>
</doc>
</method>
<method name="getMapSpeculativeExecution" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Should speculative execution be used for this job for map tasks?
Defaults to <code>true</code>.
@return <code>true</code> if speculative execution be
used for this job for map tasks,
<code>false</code> otherwise.]]>
</doc>
</method>
<method name="setMapSpeculativeExecution"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="speculativeExecution" type="boolean"/>
<doc>
<![CDATA[Turn speculative execution on or off for this job for map tasks.
@param speculativeExecution <code>true</code> if speculative execution
should be turned on for map tasks,
else <code>false</code>.]]>
</doc>
</method>
<method name="getReduceSpeculativeExecution" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Should speculative execution be used for this job for reduce tasks?
Defaults to <code>true</code>.
@return <code>true</code> if speculative execution be used
for reduce tasks for this job,
<code>false</code> otherwise.]]>
</doc>
</method>
<method name="setReduceSpeculativeExecution"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="speculativeExecution" type="boolean"/>
<doc>
<![CDATA[Turn speculative execution on or off for this job for reduce tasks.
@param speculativeExecution <code>true</code> if speculative execution
should be turned on for reduce tasks,
else <code>false</code>.]]>
</doc>
</method>
<method name="getNumMapTasks" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the configured number of map tasks for this job.
Defaults to <code>1</code>.
@return the number of map tasks for this job.]]>
</doc>
</method>
<method name="setNumMapTasks"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="n" type="int"/>
<doc>
<![CDATA[Set the number of map tasks for this job.
<p><i>Note</i>: This is only a <i>hint</i> to the framework. The actual
number of spawned map tasks depends on the number of {@link InputSplit}s
generated by the job's {@link InputFormat#getSplits(JobConf, int)}.
A custom {@link InputFormat} is typically used to accurately control
the number of map tasks for the job.</p>
<b id="NoOfMaps">How many maps?</b>
<p>The number of maps is usually driven by the total size of the inputs
i.e. total number of blocks of the input files.</p>
<p>The right level of parallelism for maps seems to be around 10-100 maps
per-node, although it has been set up to 300 or so for very cpu-light map
tasks. Task setup takes awhile, so it is best if the maps take at least a
minute to execute.</p>
<p>The default behavior of file-based {@link InputFormat}s is to split the
input into <i>logical</i> {@link InputSplit}s based on the total size, in
bytes, of input files. However, the {@link FileSystem} blocksize of the
input files is treated as an upper bound for input splits. A lower bound
on the split size can be set via
<a href="{@docRoot}/../hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml#mapreduce.input.fileinputformat.split.minsize">
mapreduce.input.fileinputformat.split.minsize</a>.</p>
<p>Thus, if you expect 10TB of input data and have a blocksize of 128MB,
you'll end up with 82,000 maps, unless {@link #setNumMapTasks(int)} is
used to set it even higher.</p>
@param n the number of map tasks for this job.
@see InputFormat#getSplits(JobConf, int)
@see FileInputFormat
@see FileSystem#getDefaultBlockSize()
@see FileStatus#getBlockSize()]]>
</doc>
</method>
<method name="getNumReduceTasks" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the configured number of reduce tasks for this job. Defaults to
<code>1</code>.
@return the number of reduce tasks for this job.]]>
</doc>
</method>
<method name="setNumReduceTasks"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="n" type="int"/>
<doc>
<![CDATA[Set the requisite number of reduce tasks for this job.
<b id="NoOfReduces">How many reduces?</b>
<p>The right number of reduces seems to be <code>0.95</code> or
<code>1.75</code> multiplied by (
<i>available memory for reduce tasks</i>
(The value of this should be smaller than
numNodes * yarn.nodemanager.resource.memory-mb
since the resource of memory is shared by map tasks and other
applications) /
<a href="{@docRoot}/../hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml#mapreduce.reduce.memory.mb">
mapreduce.reduce.memory.mb</a>).
</p>
<p>With <code>0.95</code> all of the reduces can launch immediately and
start transfering map outputs as the maps finish. With <code>1.75</code>
the faster nodes will finish their first round of reduces and launch a
second wave of reduces doing a much better job of load balancing.</p>
<p>Increasing the number of reduces increases the framework overhead, but
increases load balancing and lowers the cost of failures.</p>
<p>The scaling factors above are slightly less than whole numbers to
reserve a few reduce slots in the framework for speculative-tasks, failures
etc.</p>
<b id="ReducerNone">Reducer NONE</b>
<p>It is legal to set the number of reduce-tasks to <code>zero</code>.</p>
<p>In this case the output of the map-tasks directly go to distributed
file-system, to the path set by
{@link FileOutputFormat#setOutputPath(JobConf, Path)}. Also, the
framework doesn't sort the map-outputs before writing it out to HDFS.</p>
@param n the number of reduce tasks for this job.]]>
</doc>
</method>
<method name="getMaxMapAttempts" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the configured number of maximum attempts that will be made to run a
map task, as specified by the <code>mapreduce.map.maxattempts</code>
property. If this property is not already set, the default is 4 attempts.
@return the max number of attempts per map task.]]>
</doc>
</method>
<method name="setMaxMapAttempts"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="n" type="int"/>
<doc>
<![CDATA[Expert: Set the number of maximum attempts that will be made to run a
map task.
@param n the number of attempts per map task.]]>
</doc>
</method>
<method name="getMaxReduceAttempts" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the configured number of maximum attempts that will be made to run a
reduce task, as specified by the <code>mapreduce.reduce.maxattempts</code>
property. If this property is not already set, the default is 4 attempts.
@return the max number of attempts per reduce task.]]>
</doc>
</method>
<method name="setMaxReduceAttempts"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="n" type="int"/>
<doc>
<![CDATA[Expert: Set the number of maximum attempts that will be made to run a
reduce task.
@param n the number of attempts per reduce task.]]>
</doc>
</method>
<method name="getJobName" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the user-specified job name. This is only used to identify the
job to the user.
@return the job's name, defaulting to "".]]>
</doc>
</method>
<method name="setJobName"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="name" type="java.lang.String"/>
<doc>
<![CDATA[Set the user-specified job name.
@param name the job's new name.]]>
</doc>
</method>
<method name="getSessionId" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the user-specified session identifier. The default is the empty string.
The session identifier is used to tag metric data that is reported to some
performance metrics system via the org.apache.hadoop.metrics API. The
session identifier is intended, in particular, for use by Hadoop-On-Demand
(HOD) which allocates a virtual Hadoop cluster dynamically and transiently.
HOD will set the session identifier by modifying the mapred-site.xml file
before starting the cluster.
When not running under HOD, this identifer is expected to remain set to
the empty string.
@return the session identifier, defaulting to "".]]>
</doc>
</method>
<method name="setSessionId"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="sessionId" type="java.lang.String"/>
<doc>
<![CDATA[Set the user-specified session identifier.
@param sessionId the new session id.]]>
</doc>
</method>
<method name="setMaxTaskFailuresPerTracker"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="noFailures" type="int"/>
<doc>
<![CDATA[Set the maximum no. of failures of a given job per tasktracker.
If the no. of task failures exceeds <code>noFailures</code>, the
tasktracker is <i>blacklisted</i> for this job.
@param noFailures maximum no. of failures of a given job per tasktracker.]]>
</doc>
</method>
<method name="getMaxTaskFailuresPerTracker" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Expert: Get the maximum no. of failures of a given job per tasktracker.
If the no. of task failures exceeds this, the tasktracker is
<i>blacklisted</i> for this job.
@return the maximum no. of failures of a given job per tasktracker.]]>
</doc>
</method>
<method name="getMaxMapTaskFailuresPercent" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the maximum percentage of map tasks that can fail without
the job being aborted.
Each map task is executed a minimum of {@link #getMaxMapAttempts()}
attempts before being declared as <i>failed</i>.
Defaults to <code>zero</code>, i.e. <i>any</i> failed map-task results in
the job being declared as {@link JobStatus#FAILED}.
@return the maximum percentage of map tasks that can fail without
the job being aborted.]]>
</doc>
</method>
<method name="setMaxMapTaskFailuresPercent"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="percent" type="int"/>
<doc>
<![CDATA[Expert: Set the maximum percentage of map tasks that can fail without the
job being aborted.
Each map task is executed a minimum of {@link #getMaxMapAttempts} attempts
before being declared as <i>failed</i>.
@param percent the maximum percentage of map tasks that can fail without
the job being aborted.]]>
</doc>
</method>
<method name="getMaxReduceTaskFailuresPercent" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the maximum percentage of reduce tasks that can fail without
the job being aborted.
Each reduce task is executed a minimum of {@link #getMaxReduceAttempts()}
attempts before being declared as <i>failed</i>.
Defaults to <code>zero</code>, i.e. <i>any</i> failed reduce-task results
in the job being declared as {@link JobStatus#FAILED}.
@return the maximum percentage of reduce tasks that can fail without
the job being aborted.]]>
</doc>
</method>
<method name="setMaxReduceTaskFailuresPercent"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="percent" type="int"/>
<doc>
<![CDATA[Set the maximum percentage of reduce tasks that can fail without the job
being aborted.
Each reduce task is executed a minimum of {@link #getMaxReduceAttempts()}
attempts before being declared as <i>failed</i>.
@param percent the maximum percentage of reduce tasks that can fail without
the job being aborted.]]>
</doc>
</method>
<method name="setJobPriority"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="prio" type="org.apache.hadoop.mapred.JobPriority"/>
<doc>
<![CDATA[Set {@link JobPriority} for this job.
@param prio the {@link JobPriority} for this job.]]>
</doc>
</method>
<method name="setJobPriorityAsInteger"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="prio" type="int"/>
<doc>
<![CDATA[Set {@link JobPriority} for this job.
@param prio the {@link JobPriority} for this job.]]>
</doc>
</method>
<method name="getJobPriority" return="org.apache.hadoop.mapred.JobPriority"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the {@link JobPriority} for this job.
@return the {@link JobPriority} for this job.]]>
</doc>
</method>
<method name="getJobPriorityAsInteger" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the priority for this job.
@return the priority for this job.]]>
</doc>
</method>
<method name="getProfileEnabled" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get whether the task profiling is enabled.
@return true if some tasks will be profiled]]>
</doc>
</method>
<method name="setProfileEnabled"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="newValue" type="boolean"/>
<doc>
<![CDATA[Set whether the system should collect profiler information for some of
the tasks in this job? The information is stored in the user log
directory.
@param newValue true means it should be gathered]]>
</doc>
</method>
<method name="getProfileParams" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the profiler configuration arguments.
The default value for this property is
"-agentlib:hprof=cpu=samples,heap=sites,force=n,thread=y,verbose=n,file=%s"
@return the parameters to pass to the task child to configure profiling]]>
</doc>
</method>
<method name="setProfileParams"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="value" type="java.lang.String"/>
<doc>
<![CDATA[Set the profiler configuration arguments. If the string contains a '%s' it
will be replaced with the name of the profiling output file when the task
runs.
This value is passed to the task child JVM on the command line.
@param value the configuration string]]>
</doc>
</method>
<method name="getProfileTaskRange" return="org.apache.hadoop.conf.Configuration.IntegerRanges"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="isMap" type="boolean"/>
<doc>
<![CDATA[Get the range of maps or reduces to profile.
@param isMap is the task a map?
@return the task ranges]]>
</doc>
</method>
<method name="setProfileTaskRange"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="isMap" type="boolean"/>
<param name="newValue" type="java.lang.String"/>
<doc>
<![CDATA[Set the ranges of maps or reduces to profile. setProfileEnabled(true)
must also be called.
@param newValue a set of integer ranges of the map ids]]>
</doc>
</method>
<method name="setMapDebugScript"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="mDbgScript" type="java.lang.String"/>
<doc>
<![CDATA[Set the debug script to run when the map tasks fail.
<p>The debug script can aid debugging of failed map tasks. The script is
given task's stdout, stderr, syslog, jobconf files as arguments.</p>
<p>The debug command, run on the node where the map failed, is:</p>
<p><blockquote><pre>
$script $stdout $stderr $syslog $jobconf.
</pre></blockquote>
<p> The script file is distributed through {@link DistributedCache}
APIs. The script needs to be symlinked. </p>
<p>Here is an example on how to submit a script
<p><blockquote><pre>
job.setMapDebugScript("./myscript");
DistributedCache.createSymlink(job);
DistributedCache.addCacheFile("/debug/scripts/myscript#myscript");
</pre></blockquote>
@param mDbgScript the script name]]>
</doc>
</method>
<method name="getMapDebugScript" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the map task's debug script.
@return the debug Script for the mapred job for failed map tasks.
@see #setMapDebugScript(String)]]>
</doc>
</method>
<method name="setReduceDebugScript"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="rDbgScript" type="java.lang.String"/>
<doc>
<![CDATA[Set the debug script to run when the reduce tasks fail.
<p>The debug script can aid debugging of failed reduce tasks. The script
is given task's stdout, stderr, syslog, jobconf files as arguments.</p>
<p>The debug command, run on the node where the map failed, is:</p>
<p><blockquote><pre>
$script $stdout $stderr $syslog $jobconf.
</pre></blockquote>
<p> The script file is distributed through {@link DistributedCache}
APIs. The script file needs to be symlinked </p>
<p>Here is an example on how to submit a script
<p><blockquote><pre>
job.setReduceDebugScript("./myscript");
DistributedCache.createSymlink(job);
DistributedCache.addCacheFile("/debug/scripts/myscript#myscript");
</pre></blockquote>
@param rDbgScript the script name]]>
</doc>
</method>
<method name="getReduceDebugScript" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the reduce task's debug Script
@return the debug script for the mapred job for failed reduce tasks.
@see #setReduceDebugScript(String)]]>
</doc>
</method>
<method name="getJobEndNotificationURI" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the uri to be invoked in-order to send a notification after the job
has completed (success/failure).
@return the job end notification uri, <code>null</code> if it hasn't
been set.
@see #setJobEndNotificationURI(String)]]>
</doc>
</method>
<method name="setJobEndNotificationURI"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="uri" type="java.lang.String"/>
<doc>
<![CDATA[Set the uri to be invoked in-order to send a notification after the job
has completed (success/failure).
<p>The uri can contain 2 special parameters: <tt>$jobId</tt> and
<tt>$jobStatus</tt>. Those, if present, are replaced by the job's
identifier and completion-status respectively.</p>
<p>This is typically used by application-writers to implement chaining of
Map-Reduce jobs in an <i>asynchronous manner</i>.</p>
@param uri the job end notification uri
@see JobStatus]]>
</doc>
</method>
<method name="getJobLocalDir" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get job-specific shared directory for use as scratch space
<p>
When a job starts, a shared directory is created at location
<code>
${mapreduce.cluster.local.dir}/taskTracker/$user/jobcache/$jobid/work/ </code>.
This directory is exposed to the users through
<code>mapreduce.job.local.dir </code>.
So, the tasks can use this space
as scratch space and share files among them. </p>
This value is available as System property also.
@return The localized job specific shared directory]]>
</doc>
</method>
<method name="getMemoryForMapTask" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get memory required to run a map task of the job, in MB.
If a value is specified in the configuration, it is returned.
Else, it returns {@link JobContext#DEFAULT_MAP_MEMORY_MB}.
<p>
For backward compatibility, if the job configuration sets the
key {@link #MAPRED_TASK_MAXVMEM_PROPERTY} to a value different
from {@link #DISABLED_MEMORY_LIMIT}, that value will be used
after converting it from bytes to MB.
@return memory required to run a map task of the job, in MB,]]>
</doc>
</method>
<method name="setMemoryForMapTask"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="mem" type="long"/>
</method>
<method name="getMemoryForReduceTask" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get memory required to run a reduce task of the job, in MB.
If a value is specified in the configuration, it is returned.
Else, it returns {@link JobContext#DEFAULT_REDUCE_MEMORY_MB}.
<p>
For backward compatibility, if the job configuration sets the
key {@link #MAPRED_TASK_MAXVMEM_PROPERTY} to a value different
from {@link #DISABLED_MEMORY_LIMIT}, that value will be used
after converting it from bytes to MB.
@return memory required to run a reduce task of the job, in MB.]]>
</doc>
</method>
<method name="setMemoryForReduceTask"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="mem" type="long"/>
</method>
<method name="getQueueName" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Return the name of the queue to which this job is submitted.
Defaults to 'default'.
@return name of the queue]]>
</doc>
</method>
<method name="setQueueName"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="queueName" type="java.lang.String"/>
<doc>
<![CDATA[Set the name of the queue to which this job should be submitted.
@param queueName Name of the queue]]>
</doc>
</method>
<method name="normalizeMemoryConfigValue" return="long"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="val" type="long"/>
<doc>
<![CDATA[Normalize the negative values in configuration
@param val
@return normalized value]]>
</doc>
</method>
<method name="findContainingJar" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="my_class" type="java.lang.Class"/>
<doc>
<![CDATA[Find a jar that contains a class of the same name, if any.
It will return a jar file, even if that is not the first thing
on the class path that has a class with the same name.
@param my_class the class to find.
@return a jar file that contains the class, or null.]]>
</doc>
</method>
<method name="getMaxVirtualMemoryForTask" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="Use {@link #getMemoryForMapTask()} and
{@link #getMemoryForReduceTask()}">
<doc>
<![CDATA[Get the memory required to run a task of this job, in bytes. See
{@link #MAPRED_TASK_MAXVMEM_PROPERTY}
<p>
This method is deprecated. Now, different memory limits can be
set for map and reduce tasks of a job, in MB.
<p>
For backward compatibility, if the job configuration sets the
key {@link #MAPRED_TASK_MAXVMEM_PROPERTY}, that value is returned.
Otherwise, this method will return the larger of the values returned by
{@link #getMemoryForMapTask()} and {@link #getMemoryForReduceTask()}
after converting them into bytes.
@return Memory required to run a task of this job, in bytes.
@see #setMaxVirtualMemoryForTask(long)
@deprecated Use {@link #getMemoryForMapTask()} and
{@link #getMemoryForReduceTask()}]]>
</doc>
</method>
<method name="setMaxVirtualMemoryForTask"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="Use {@link #setMemoryForMapTask(long mem)} and
Use {@link #setMemoryForReduceTask(long mem)}">
<param name="vmem" type="long"/>
<doc>
<![CDATA[Set the maximum amount of memory any task of this job can use. See
{@link #MAPRED_TASK_MAXVMEM_PROPERTY}
<p>
mapred.task.maxvmem is split into
mapreduce.map.memory.mb
and mapreduce.map.memory.mb,mapred
each of the new key are set
as mapred.task.maxvmem / 1024
as new values are in MB
@param vmem Maximum amount of virtual memory in bytes any task of this job
can use.
@see #getMaxVirtualMemoryForTask()
@deprecated
Use {@link #setMemoryForMapTask(long mem)} and
Use {@link #setMemoryForReduceTask(long mem)}]]>
</doc>
</method>
<method name="getMaxPhysicalMemoryForTask" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="this variable is deprecated and nolonger in use.">
<doc>
<![CDATA[@deprecated this variable is deprecated and nolonger in use.]]>
</doc>
</method>
<method name="setMaxPhysicalMemoryForTask"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="mem" type="long"/>
</method>
<method name="main"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="args" type="java.lang.String[]"/>
<exception name="Exception" type="java.lang.Exception"/>
</method>
<field name="MAPRED_TASK_MAXVMEM_PROPERTY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="Use {@link #MAPREDUCE_JOB_MAP_MEMORY_MB_PROPERTY} and
{@link #MAPREDUCE_JOB_REDUCE_MEMORY_MB_PROPERTY}">
<doc>
<![CDATA[@deprecated Use {@link #MAPREDUCE_JOB_MAP_MEMORY_MB_PROPERTY} and
{@link #MAPREDUCE_JOB_REDUCE_MEMORY_MB_PROPERTY}]]>
</doc>
</field>
<field name="UPPER_LIMIT_ON_TASK_VMEM_PROPERTY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="deprecated, no comment">
<doc>
<![CDATA[@deprecated]]>
</doc>
</field>
<field name="MAPRED_TASK_DEFAULT_MAXVMEM_PROPERTY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="deprecated, no comment">
<doc>
<![CDATA[@deprecated]]>
</doc>
</field>
<field name="MAPRED_TASK_MAXPMEM_PROPERTY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="deprecated, no comment">
<doc>
<![CDATA[@deprecated]]>
</doc>
</field>
<field name="DISABLED_MEMORY_LIMIT" type="long"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[A value which if set for memory related configuration options,
indicates that the options are turned off.
Deprecated because it makes no sense in the context of MR2.]]>
</doc>
</field>
<field name="MAPRED_LOCAL_DIR_PROPERTY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Property name for the configuration property mapreduce.cluster.local.dir]]>
</doc>
</field>
<field name="DEFAULT_QUEUE_NAME" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Name of the queue to which jobs will be submitted, if no queue
name is mentioned.]]>
</doc>
</field>
<field name="MAPRED_JOB_MAP_MEMORY_MB_PROPERTY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[The variable is kept for M/R 1.x applications, while M/R 2.x applications
should use {@link #MAPREDUCE_JOB_MAP_MEMORY_MB_PROPERTY}]]>
</doc>
</field>
<field name="MAPRED_JOB_REDUCE_MEMORY_MB_PROPERTY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[The variable is kept for M/R 1.x applications, while M/R 2.x applications
should use {@link #MAPREDUCE_JOB_REDUCE_MEMORY_MB_PROPERTY}]]>
</doc>
</field>
<field name="UNPACK_JAR_PATTERN_DEFAULT" type="java.util.regex.Pattern"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Pattern for the default unpacking behavior for job jars]]>
</doc>
</field>
<field name="MAPRED_TASK_JAVA_OPTS" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="Use {@link #MAPRED_MAP_TASK_JAVA_OPTS} or
{@link #MAPRED_REDUCE_TASK_JAVA_OPTS}">
<doc>
<![CDATA[Configuration key to set the java command line options for the child
map and reduce tasks.
Java opts for the task tracker child processes.
The following symbol, if present, will be interpolated: @taskid@.
It is replaced by current TaskID. Any other occurrences of '@' will go
unchanged.
For example, to enable verbose gc logging to a file named for the taskid in
/tmp and to set the heap maximum to be a gigabyte, pass a 'value' of:
-Xmx1024m -verbose:gc -Xloggc:/tmp/@taskid@.gc
The configuration variable {@link #MAPRED_TASK_ENV} can be used to pass
other environment variables to the child processes.
@deprecated Use {@link #MAPRED_MAP_TASK_JAVA_OPTS} or
{@link #MAPRED_REDUCE_TASK_JAVA_OPTS}]]>
</doc>
</field>
<field name="MAPRED_MAP_TASK_JAVA_OPTS" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Configuration key to set the java command line options for the map tasks.
Java opts for the task tracker child map processes.
The following symbol, if present, will be interpolated: @taskid@.
It is replaced by current TaskID. Any other occurrences of '@' will go
unchanged.
For example, to enable verbose gc logging to a file named for the taskid in
/tmp and to set the heap maximum to be a gigabyte, pass a 'value' of:
-Xmx1024m -verbose:gc -Xloggc:/tmp/@taskid@.gc
The configuration variable {@link #MAPRED_MAP_TASK_ENV} can be used to pass
other environment variables to the map processes.]]>
</doc>
</field>
<field name="MAPRED_REDUCE_TASK_JAVA_OPTS" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Configuration key to set the java command line options for the reduce tasks.
Java opts for the task tracker child reduce processes.
The following symbol, if present, will be interpolated: @taskid@.
It is replaced by current TaskID. Any other occurrences of '@' will go
unchanged.
For example, to enable verbose gc logging to a file named for the taskid in
/tmp and to set the heap maximum to be a gigabyte, pass a 'value' of:
-Xmx1024m -verbose:gc -Xloggc:/tmp/@taskid@.gc
The configuration variable {@link #MAPRED_REDUCE_TASK_ENV} can be used to
pass process environment variables to the reduce processes.]]>
</doc>
</field>
<field name="DEFAULT_MAPRED_TASK_JAVA_OPTS" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="MAPRED_TASK_ULIMIT" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="Configuration key to set the maximum virtual memory available to the child
map and reduce tasks (in kilo-bytes). This has been deprecated and will no
longer have any effect.">
<doc>
<![CDATA[@deprecated
Configuration key to set the maximum virtual memory available to the child
map and reduce tasks (in kilo-bytes). This has been deprecated and will no
longer have any effect.]]>
</doc>
</field>
<field name="MAPRED_MAP_TASK_ULIMIT" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="Configuration key to set the maximum virtual memory available to the
map tasks (in kilo-bytes). This has been deprecated and will no
longer have any effect.">
<doc>
<![CDATA[@deprecated
Configuration key to set the maximum virtual memory available to the
map tasks (in kilo-bytes). This has been deprecated and will no
longer have any effect.]]>
</doc>
</field>
<field name="MAPRED_REDUCE_TASK_ULIMIT" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="Configuration key to set the maximum virtual memory available to the
reduce tasks (in kilo-bytes). This has been deprecated and will no
longer have any effect.">
<doc>
<![CDATA[@deprecated
Configuration key to set the maximum virtual memory available to the
reduce tasks (in kilo-bytes). This has been deprecated and will no
longer have any effect.]]>
</doc>
</field>
<field name="MAPRED_TASK_ENV" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="Use {@link #MAPRED_MAP_TASK_ENV} or
{@link #MAPRED_REDUCE_TASK_ENV}">
<doc>
<![CDATA[Configuration key to set the environment of the child map/reduce tasks.
The format of the value is <code>k1=v1,k2=v2</code>. Further it can
reference existing environment variables via <code>$key</code> on
Linux or <code>%key%</code> on Windows.
Example:
<ul>
<li> A=foo - This will set the env variable A to foo. </li>
</ul>
@deprecated Use {@link #MAPRED_MAP_TASK_ENV} or
{@link #MAPRED_REDUCE_TASK_ENV}]]>
</doc>
</field>
<field name="MAPRED_MAP_TASK_ENV" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Configuration key to set the environment of the child map tasks.
The format of the value is <code>k1=v1,k2=v2</code>. Further it can
reference existing environment variables via <code>$key</code> on
Linux or <code>%key%</code> on Windows.
Example:
<ul>
<li> A=foo - This will set the env variable A to foo. </li>
</ul>]]>
</doc>
</field>
<field name="MAPRED_REDUCE_TASK_ENV" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Configuration key to set the environment of the child reduce tasks.
The format of the value is <code>k1=v1,k2=v2</code>. Further it can
reference existing environment variables via <code>$key</code> on
Linux or <code>%key%</code> on Windows.
Example:
<ul>
<li> A=foo - This will set the env variable A to foo. </li>
</ul>]]>
</doc>
</field>
<field name="MAPRED_MAP_TASK_LOG_LEVEL" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Configuration key to set the logging level for the map task.
The allowed logging levels are:
OFF, FATAL, ERROR, WARN, INFO, DEBUG, TRACE and ALL.]]>
</doc>
</field>
<field name="MAPRED_REDUCE_TASK_LOG_LEVEL" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Configuration key to set the logging level for the reduce task.
The allowed logging levels are:
OFF, FATAL, ERROR, WARN, INFO, DEBUG, TRACE and ALL.]]>
</doc>
</field>
<field name="DEFAULT_LOG_LEVEL" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Default logging level for map/reduce tasks.]]>
</doc>
</field>
<field name="WORKFLOW_ID" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[The variable is kept for M/R 1.x applications, M/R 2.x applications should
use {@link MRJobConfig#WORKFLOW_ID} instead]]>
</doc>
</field>
<field name="WORKFLOW_NAME" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[The variable is kept for M/R 1.x applications, M/R 2.x applications should
use {@link MRJobConfig#WORKFLOW_NAME} instead]]>
</doc>
</field>
<field name="WORKFLOW_NODE_NAME" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[The variable is kept for M/R 1.x applications, M/R 2.x applications should
use {@link MRJobConfig#WORKFLOW_NODE_NAME} instead]]>
</doc>
</field>
<field name="WORKFLOW_ADJACENCY_PREFIX_STRING" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[The variable is kept for M/R 1.x applications, M/R 2.x applications should
use {@link MRJobConfig#WORKFLOW_ADJACENCY_PREFIX_STRING} instead]]>
</doc>
</field>
<field name="WORKFLOW_ADJACENCY_PREFIX_PATTERN" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[The variable is kept for M/R 1.x applications, M/R 2.x applications should
use {@link MRJobConfig#WORKFLOW_ADJACENCY_PREFIX_PATTERN} instead]]>
</doc>
</field>
<field name="WORKFLOW_TAGS" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[The variable is kept for M/R 1.x applications, M/R 2.x applications should
use {@link MRJobConfig#WORKFLOW_TAGS} instead]]>
</doc>
</field>
<field name="MAPREDUCE_RECOVER_JOB" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[The variable is kept for M/R 1.x applications, M/R 2.x applications should
not use it]]>
</doc>
</field>
<field name="DEFAULT_MAPREDUCE_RECOVER_JOB" type="boolean"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[The variable is kept for M/R 1.x applications, M/R 2.x applications should
not use it]]>
</doc>
</field>
<doc>
<![CDATA[A map/reduce job configuration.
<p><code>JobConf</code> is the primary interface for a user to describe a
map-reduce job to the Hadoop framework for execution. The framework tries to
faithfully execute the job as-is described by <code>JobConf</code>, however:
<ol>
<li>
Some configuration parameters might have been marked as
<a href="{@docRoot}/org/apache/hadoop/conf/Configuration.html#FinalParams">
final</a> by administrators and hence cannot be altered.
</li>
<li>
While some job parameters are straight-forward to set
(e.g. {@link #setNumReduceTasks(int)}), some parameters interact subtly
with the rest of the framework and/or job-configuration and is relatively
more complex for the user to control finely
(e.g. {@link #setNumMapTasks(int)}).
</li>
</ol>
<p><code>JobConf</code> typically specifies the {@link Mapper}, combiner
(if any), {@link Partitioner}, {@link Reducer}, {@link InputFormat} and
{@link OutputFormat} implementations to be used etc.
<p>Optionally <code>JobConf</code> is used to specify other advanced facets
of the job such as <code>Comparator</code>s to be used, files to be put in
the {@link DistributedCache}, whether or not intermediate and/or job outputs
are to be compressed (and how), debugability via user-provided scripts
( {@link #setMapDebugScript(String)}/{@link #setReduceDebugScript(String)}),
for doing post-processing on task logs, task's stdout, stderr, syslog.
and etc.</p>
<p>Here is an example on how to configure a job via <code>JobConf</code>:</p>
<p><blockquote><pre>
// Create a new JobConf
JobConf job = new JobConf(new Configuration(), MyJob.class);
// Specify various job-specific parameters
job.setJobName("myjob");
FileInputFormat.setInputPaths(job, new Path("in"));
FileOutputFormat.setOutputPath(job, new Path("out"));
job.setMapperClass(MyJob.MyMapper.class);
job.setCombinerClass(MyJob.MyReducer.class);
job.setReducerClass(MyJob.MyReducer.class);
job.setInputFormat(SequenceFileInputFormat.class);
job.setOutputFormat(SequenceFileOutputFormat.class);
</pre></blockquote>
@see JobClient
@see ClusterStatus
@see Tool
@see DistributedCache]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.JobConf -->
<!-- start interface org.apache.hadoop.mapred.JobConfigurable -->
<interface name="JobConfigurable" abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<method name="configure"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Initializes a new instance from a {@link JobConf}.
@param job the configuration]]>
</doc>
</method>
<doc>
<![CDATA[That what may be configured.]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapred.JobConfigurable -->
<!-- start interface org.apache.hadoop.mapred.JobContext -->
<interface name="JobContext" abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapreduce.JobContext"/>
<method name="getJobConf" return="org.apache.hadoop.mapred.JobConf"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the job Configuration
@return JobConf]]>
</doc>
</method>
<method name="getProgressible" return="org.apache.hadoop.util.Progressable"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the progress mechanism for reporting progress.
@return progress mechanism]]>
</doc>
</method>
</interface>
<!-- end interface org.apache.hadoop.mapred.JobContext -->
<!-- start class org.apache.hadoop.mapred.JobID -->
<class name="JobID" extends="org.apache.hadoop.mapreduce.JobID"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="JobID" type="java.lang.String, int"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Constructs a JobID object
@param jtIdentifier jobTracker identifier
@param id job number]]>
</doc>
</constructor>
<constructor name="JobID"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="downgrade" return="org.apache.hadoop.mapred.JobID"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="old" type="org.apache.hadoop.mapreduce.JobID"/>
<doc>
<![CDATA[Downgrade a new JobID to an old one
@param old a new or old JobID
@return either old or a new JobID build to match old]]>
</doc>
</method>
<method name="read" return="org.apache.hadoop.mapred.JobID"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type="java.io.DataInput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="forName" return="org.apache.hadoop.mapred.JobID"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="str" type="java.lang.String"/>
<exception name="IllegalArgumentException" type="java.lang.IllegalArgumentException"/>
<doc>
<![CDATA[Construct a JobId object from given string
@return constructed JobId object or null if the given String is null
@throws IllegalArgumentException if the given string is malformed]]>
</doc>
</method>
<method name="getJobIDsPattern" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="jtIdentifier" type="java.lang.String"/>
<param name="jobId" type="java.lang.Integer"/>
<doc>
<![CDATA[Returns a regex pattern which matches task IDs. Arguments can
be given null, in which case that part of the regex will be generic.
For example to obtain a regex matching <i>any job</i>
run on the jobtracker started at <i>200707121733</i>, we would use :
<pre>
JobID.getTaskIDsPattern("200707121733", null);
</pre>
which will return :
<pre> "job_200707121733_[0-9]*" </pre>
@param jtIdentifier jobTracker identifier, or null
@param jobId job number, or null
@return a regex pattern matching JobIDs]]>
</doc>
</method>
<doc>
<![CDATA[JobID represents the immutable and unique identifier for
the job. JobID consists of two parts. First part
represents the jobtracker identifier, so that jobID to jobtracker map
is defined. For cluster setup this string is the jobtracker
start time, for local setting, it is "local".
Second part of the JobID is the job number. <br>
An example JobID is :
<code>job_200707121733_0003</code> , which represents the third job
running at the jobtracker started at <code>200707121733</code>.
<p>
Applications should never construct or parse JobID strings, but rather
use appropriate constructors or {@link #forName(String)} method.
@see TaskID
@see TaskAttemptID]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.JobID -->
<!-- start class org.apache.hadoop.mapred.JobPriority -->
<class name="JobPriority" extends="java.lang.Enum"
abstract="false"
static="false" final="true" visibility="public"
deprecated="not deprecated">
<method name="values" return="org.apache.hadoop.mapred.JobPriority[]"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="valueOf" return="org.apache.hadoop.mapred.JobPriority"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="name" type="java.lang.String"/>
</method>
<doc>
<![CDATA[Used to describe the priority of the running job.
DEFAULT : While submitting a job, if the user is not specifying priority,
YARN has the capability to pick the default priority as per its config.
Hence MapReduce can indicate such cases with this new enum.
UNDEFINED_PRIORITY : YARN supports priority as an integer. Hence other than
the five defined enums, YARN can consider other integers also. To generalize
such cases, this specific enum is used.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.JobPriority -->
<!-- start class org.apache.hadoop.mapred.JobQueueInfo -->
<class name="JobQueueInfo" extends="org.apache.hadoop.mapreduce.QueueInfo"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="JobQueueInfo"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Default constructor for Job Queue Info.]]>
</doc>
</constructor>
<constructor name="JobQueueInfo" type="java.lang.String, java.lang.String"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Construct a new JobQueueInfo object using the queue name and the
scheduling information passed.
@param queueName Name of the job queue
@param schedulingInfo Scheduling Information associated with the job
queue]]>
</doc>
</constructor>
<method name="getQueueState" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Use getState() instead]]>
</doc>
</method>
<method name="getChildren" return="java.util.List"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<doc>
<![CDATA[Class that contains the information regarding the Job Queues which are
maintained by the Hadoop Map/Reduce framework.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.JobQueueInfo -->
<!-- start class org.apache.hadoop.mapred.JobStatus -->
<class name="JobStatus" extends="org.apache.hadoop.mapreduce.JobStatus"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="JobStatus"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<constructor name="JobStatus" type="org.apache.hadoop.mapred.JobID, float, float, float, int"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<constructor name="JobStatus" type="org.apache.hadoop.mapred.JobID, float, float, int"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Create a job status object for a given jobid.
@param jobid The jobid of the job
@param mapProgress The progress made on the maps
@param reduceProgress The progress made on the reduces
@param runState The current state of the job]]>
</doc>
</constructor>
<constructor name="JobStatus" type="org.apache.hadoop.mapred.JobID, float, float, float, int, org.apache.hadoop.mapred.JobPriority"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Create a job status object for a given jobid.
@param jobid The jobid of the job
@param mapProgress The progress made on the maps
@param reduceProgress The progress made on the reduces
@param runState The current state of the job
@param jp Priority of the job.]]>
</doc>
</constructor>
<constructor name="JobStatus" type="org.apache.hadoop.mapred.JobID, float, float, float, float, int, org.apache.hadoop.mapred.JobPriority"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Create a job status object for a given jobid.
@param jobid The jobid of the job
@param setupProgress The progress made on the setup
@param mapProgress The progress made on the maps
@param reduceProgress The progress made on the reduces
@param cleanupProgress The progress made on the cleanup
@param runState The current state of the job
@param jp Priority of the job.]]>
</doc>
</constructor>
<constructor name="JobStatus" type="org.apache.hadoop.mapred.JobID, float, float, float, int, java.lang.String, java.lang.String, java.lang.String, java.lang.String"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Create a job status object for a given jobid.
@param jobid The jobid of the job
@param mapProgress The progress made on the maps
@param reduceProgress The progress made on the reduces
@param cleanupProgress The progress made on cleanup
@param runState The current state of the job
@param user userid of the person who submitted the job.
@param jobName user-specified job name.
@param jobFile job configuration file.
@param trackingUrl link to the web-ui for details of the job.]]>
</doc>
</constructor>
<constructor name="JobStatus" type="org.apache.hadoop.mapred.JobID, float, float, int, java.lang.String, java.lang.String, java.lang.String, java.lang.String"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Create a job status object for a given jobid.
@param jobid The jobid of the job
@param mapProgress The progress made on the maps
@param reduceProgress The progress made on the reduces
@param runState The current state of the job
@param user userid of the person who submitted the job.
@param jobName user-specified job name.
@param jobFile job configuration file.
@param trackingUrl link to the web-ui for details of the job.]]>
</doc>
</constructor>
<constructor name="JobStatus" type="org.apache.hadoop.mapred.JobID, float, float, float, int, org.apache.hadoop.mapred.JobPriority, java.lang.String, java.lang.String, java.lang.String, java.lang.String"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Create a job status object for a given jobid.
@param jobid The jobid of the job
@param mapProgress The progress made on the maps
@param reduceProgress The progress made on the reduces
@param runState The current state of the job
@param jp Priority of the job.
@param user userid of the person who submitted the job.
@param jobName user-specified job name.
@param jobFile job configuration file.
@param trackingUrl link to the web-ui for details of the job.]]>
</doc>
</constructor>
<constructor name="JobStatus" type="org.apache.hadoop.mapred.JobID, float, float, float, float, int, org.apache.hadoop.mapred.JobPriority, java.lang.String, java.lang.String, java.lang.String, java.lang.String"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Create a job status object for a given jobid.
@param jobid The jobid of the job
@param setupProgress The progress made on the setup
@param mapProgress The progress made on the maps
@param reduceProgress The progress made on the reduces
@param cleanupProgress The progress made on the cleanup
@param runState The current state of the job
@param jp Priority of the job.
@param user userid of the person who submitted the job.
@param jobName user-specified job name.
@param jobFile job configuration file.
@param trackingUrl link to the web-ui for details of the job.]]>
</doc>
</constructor>
<constructor name="JobStatus" type="org.apache.hadoop.mapred.JobID, float, float, float, float, int, org.apache.hadoop.mapred.JobPriority, java.lang.String, java.lang.String, java.lang.String, java.lang.String, boolean"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Create a job status object for a given jobid.
@param jobid The jobid of the job
@param setupProgress The progress made on the setup
@param mapProgress The progress made on the maps
@param reduceProgress The progress made on the reduces
@param cleanupProgress The progress made on the cleanup
@param runState The current state of the job
@param jp Priority of the job.
@param user userid of the person who submitted the job.
@param jobName user-specified job name.
@param jobFile job configuration file.
@param trackingUrl link to the web-ui for details of the job.
@param isUber Whether job running in uber mode]]>
</doc>
</constructor>
<constructor name="JobStatus" type="org.apache.hadoop.mapred.JobID, float, float, float, float, int, org.apache.hadoop.mapred.JobPriority, java.lang.String, java.lang.String, java.lang.String, java.lang.String, boolean, java.lang.String"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Create a job status object for a given jobid.
@param jobid The jobid of the job
@param setupProgress The progress made on the setup
@param mapProgress The progress made on the maps
@param reduceProgress The progress made on the reduces
@param cleanupProgress The progress made on the cleanup
@param runState The current state of the job
@param jp Priority of the job.
@param user userid of the person who submitted the job.
@param jobName user-specified job name.
@param jobFile job configuration file.
@param trackingUrl link to the web-ui for details of the job.
@param isUber Whether job running in uber mode
@param historyFile history file]]>
</doc>
</constructor>
<constructor name="JobStatus" type="org.apache.hadoop.mapred.JobID, float, float, float, float, int, org.apache.hadoop.mapred.JobPriority, java.lang.String, java.lang.String, java.lang.String, java.lang.String, java.lang.String"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Create a job status object for a given jobid.
@param jobid The jobid of the job
@param setupProgress The progress made on the setup
@param mapProgress The progress made on the maps
@param reduceProgress The progress made on the reduces
@param cleanupProgress The progress made on the cleanup
@param runState The current state of the job
@param jp Priority of the job.
@param user userid of the person who submitted the job.
@param jobName user-specified job name.
@param queue job queue name.
@param jobFile job configuration file.
@param trackingUrl link to the web-ui for details of the job.]]>
</doc>
</constructor>
<constructor name="JobStatus" type="org.apache.hadoop.mapred.JobID, float, float, float, float, int, org.apache.hadoop.mapred.JobPriority, java.lang.String, java.lang.String, java.lang.String, java.lang.String, java.lang.String, boolean"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Create a job status object for a given jobid.
@param jobid The jobid of the job
@param setupProgress The progress made on the setup
@param mapProgress The progress made on the maps
@param reduceProgress The progress made on the reduces
@param cleanupProgress The progress made on the cleanup
@param runState The current state of the job
@param jp Priority of the job.
@param user userid of the person who submitted the job.
@param jobName user-specified job name.
@param queue job queue name.
@param jobFile job configuration file.
@param trackingUrl link to the web-ui for details of the job.
@param isUber Whether job running in uber mode]]>
</doc>
</constructor>
<constructor name="JobStatus" type="org.apache.hadoop.mapred.JobID, float, float, float, float, int, org.apache.hadoop.mapred.JobPriority, java.lang.String, java.lang.String, java.lang.String, java.lang.String, java.lang.String, boolean, java.lang.String"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Create a job status object for a given jobid.
@param jobid The jobid of the job
@param setupProgress The progress made on the setup
@param mapProgress The progress made on the maps
@param reduceProgress The progress made on the reduces
@param cleanupProgress The progress made on the cleanup
@param runState The current state of the job
@param jp Priority of the job.
@param user userid of the person who submitted the job.
@param jobName user-specified job name.
@param queue job queue name.
@param jobFile job configuration file.
@param trackingUrl link to the web-ui for details of the job.
@param isUber Whether job running in uber mode
@param historyFile history file]]>
</doc>
</constructor>
<method name="getJobRunState" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="state" type="int"/>
<doc>
<![CDATA[Helper method to get human-readable state of the job.
@param state job state
@return human-readable state of the job]]>
</doc>
</method>
<method name="downgrade" return="org.apache.hadoop.mapred.JobStatus"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="stat" type="org.apache.hadoop.mapreduce.JobStatus"/>
</method>
<method name="getJobId" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="use getJobID instead">
<doc>
<![CDATA[@deprecated use getJobID instead]]>
</doc>
</method>
<method name="getJobID" return="org.apache.hadoop.mapred.JobID"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return The jobid of the Job]]>
</doc>
</method>
<method name="getJobPriority" return="org.apache.hadoop.mapred.JobPriority"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Return the priority of the job
@return job priority]]>
</doc>
</method>
<method name="setMapProgress"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="p" type="float"/>
<doc>
<![CDATA[Sets the map progress of this job
@param p The value of map progress to set to]]>
</doc>
</method>
<method name="setCleanupProgress"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="p" type="float"/>
<doc>
<![CDATA[Sets the cleanup progress of this job
@param p The value of cleanup progress to set to]]>
</doc>
</method>
<method name="setSetupProgress"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="p" type="float"/>
<doc>
<![CDATA[Sets the setup progress of this job
@param p The value of setup progress to set to]]>
</doc>
</method>
<method name="setReduceProgress"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="p" type="float"/>
<doc>
<![CDATA[Sets the reduce progress of this Job
@param p The value of reduce progress to set to]]>
</doc>
</method>
<method name="setFinishTime"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="finishTime" type="long"/>
<doc>
<![CDATA[Set the finish time of the job
@param finishTime The finishTime of the job]]>
</doc>
</method>
<method name="setHistoryFile"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="historyFile" type="java.lang.String"/>
<doc>
<![CDATA[Set the job history file url for a completed job]]>
</doc>
</method>
<method name="setTrackingUrl"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="trackingUrl" type="java.lang.String"/>
<doc>
<![CDATA[Set the link to the web-ui for details of the job.]]>
</doc>
</method>
<method name="setRetired"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<doc>
<![CDATA[Set the job retire flag to true.]]>
</doc>
</method>
<method name="getRunState" return="int"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return running state of the job]]>
</doc>
</method>
<method name="setStartTime"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="startTime" type="long"/>
<doc>
<![CDATA[Set the start time of the job
@param startTime The startTime of the job]]>
</doc>
</method>
<method name="setUsername"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="userName" type="java.lang.String"/>
<doc>
<![CDATA[@param userName The username of the job]]>
</doc>
</method>
<method name="setJobACLs"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="acls" type="java.util.Map"/>
</method>
<method name="setFailureInfo"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="failureInfo" type="java.lang.String"/>
</method>
<method name="setJobPriority"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jp" type="org.apache.hadoop.mapred.JobPriority"/>
<doc>
<![CDATA[Set the priority of the job, defaulting to NORMAL.
@param jp new job priority]]>
</doc>
</method>
<method name="mapProgress" return="float"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return Percentage of progress in maps]]>
</doc>
</method>
<method name="cleanupProgress" return="float"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return Percentage of progress in cleanup]]>
</doc>
</method>
<method name="setupProgress" return="float"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return Percentage of progress in setup]]>
</doc>
</method>
<method name="reduceProgress" return="float"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return Percentage of progress in reduce]]>
</doc>
</method>
<field name="RUNNING" type="int"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="SUCCEEDED" type="int"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="FAILED" type="int"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="PREP" type="int"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="KILLED" type="int"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[Describes the current status of a job. This is
not intended to be a comprehensive piece of data.
For that, look at JobProfile.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.JobStatus -->
<!-- start class org.apache.hadoop.mapred.KeyValueLineRecordReader -->
<class name="KeyValueLineRecordReader" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.RecordReader"/>
<constructor name="KeyValueLineRecordReader" type="org.apache.hadoop.conf.Configuration, org.apache.hadoop.mapred.FileSplit"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</constructor>
<method name="getKeyClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="createKey" return="org.apache.hadoop.io.Text"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="createValue" return="org.apache.hadoop.io.Text"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="findSeparator" return="int"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="utf" type="byte[]"/>
<param name="start" type="int"/>
<param name="length" type="int"/>
<param name="sep" type="byte"/>
</method>
<method name="next" return="boolean"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="org.apache.hadoop.io.Text"/>
<param name="value" type="org.apache.hadoop.io.Text"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Read key/value pair in a line.]]>
</doc>
</method>
<method name="getProgress" return="float"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getPos" return="long"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="close"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[This class treats a line in the input as a key/value pair separated by a
separator character. The separator can be specified in config file
under the attribute name mapreduce.input.keyvaluelinerecordreader.key.value.separator. The default
separator is the tab character ('\t').]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.KeyValueLineRecordReader -->
<!-- start class org.apache.hadoop.mapred.KeyValueTextInputFormat -->
<class name="KeyValueTextInputFormat" extends="org.apache.hadoop.mapred.FileInputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.JobConfigurable"/>
<constructor name="KeyValueTextInputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="configure"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
</method>
<method name="isSplitable" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="fs" type="org.apache.hadoop.fs.FileSystem"/>
<param name="file" type="org.apache.hadoop.fs.Path"/>
</method>
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="genericSplit" type="org.apache.hadoop.mapred.InputSplit"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[An {@link InputFormat} for plain text files. Files are broken into lines.
Either linefeed or carriage-return are used to signal end of line. Each line
is divided into key and value parts by a separator byte. If no such a byte
exists, the key will be the entire line and value will be empty.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.KeyValueTextInputFormat -->
<!-- start class org.apache.hadoop.mapred.MapFileOutputFormat -->
<class name="MapFileOutputFormat" extends="org.apache.hadoop.mapred.FileOutputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="MapFileOutputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="name" type="java.lang.String"/>
<param name="progress" type="org.apache.hadoop.util.Progressable"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getReaders" return="org.apache.hadoop.io.MapFile.Reader[]"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
<param name="dir" type="org.apache.hadoop.fs.Path"/>
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Open the output generated by this format.]]>
</doc>
</method>
<method name="getEntry" return="org.apache.hadoop.io.Writable"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="readers" type="org.apache.hadoop.io.MapFile.Reader[]"/>
<param name="partitioner" type="org.apache.hadoop.mapred.Partitioner"/>
<param name="key" type="K"/>
<param name="value" type="V"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get an entry from output generated by this class.]]>
</doc>
</method>
<doc>
<![CDATA[An {@link OutputFormat} that writes {@link MapFile}s.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.MapFileOutputFormat -->
<!-- start interface org.apache.hadoop.mapred.Mapper -->
<interface name="Mapper" abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.JobConfigurable"/>
<implements name="org.apache.hadoop.io.Closeable"/>
<method name="map"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="K1"/>
<param name="value" type="V1"/>
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Maps a single input key/value pair into an intermediate key/value pair.
<p>Output pairs need not be of the same types as input pairs. A given
input pair may map to zero or many output pairs. Output pairs are
collected with calls to
{@link OutputCollector#collect(Object,Object)}.</p>
<p>Applications can use the {@link Reporter} provided to report progress
or just indicate that they are alive. In scenarios where the application
takes significant amount of time to process individual key/value
pairs, this is crucial since the framework might assume that the task has
timed-out and kill that task. The other way of avoiding this is to set
<a href="{@docRoot}/../hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml#mapreduce.task.timeout">
mapreduce.task.timeout</a> to a high-enough value (or even zero for no
time-outs).</p>
@param key the input key.
@param value the input value.
@param output collects mapped keys and values.
@param reporter facility to report progress.]]>
</doc>
</method>
<doc>
<![CDATA[Maps input key/value pairs to a set of intermediate key/value pairs.
<p>Maps are the individual tasks which transform input records into a
intermediate records. The transformed intermediate records need not be of
the same type as the input records. A given input pair may map to zero or
many output pairs.</p>
<p>The Hadoop Map-Reduce framework spawns one map task for each
{@link InputSplit} generated by the {@link InputFormat} for the job.
<code>Mapper</code> implementations can access the {@link JobConf} for the
job via the {@link JobConfigurable#configure(JobConf)} and initialize
themselves. Similarly they can use the {@link Closeable#close()} method for
de-initialization.</p>
<p>The framework then calls
{@link #map(Object, Object, OutputCollector, Reporter)}
for each key/value pair in the <code>InputSplit</code> for that task.</p>
<p>All intermediate values associated with a given output key are
subsequently grouped by the framework, and passed to a {@link Reducer} to
determine the final output. Users can control the grouping by specifying
a <code>Comparator</code> via
{@link JobConf#setOutputKeyComparatorClass(Class)}.</p>
<p>The grouped <code>Mapper</code> outputs are partitioned per
<code>Reducer</code>. Users can control which keys (and hence records) go to
which <code>Reducer</code> by implementing a custom {@link Partitioner}.
<p>Users can optionally specify a <code>combiner</code>, via
{@link JobConf#setCombinerClass(Class)}, to perform local aggregation of the
intermediate outputs, which helps to cut down the amount of data transferred
from the <code>Mapper</code> to the <code>Reducer</code>.
<p>The intermediate, grouped outputs are always stored in
{@link SequenceFile}s. Applications can specify if and how the intermediate
outputs are to be compressed and which {@link CompressionCodec}s are to be
used via the <code>JobConf</code>.</p>
<p>If the job has
<a href="{@docRoot}/org/apache/hadoop/mapred/JobConf.html#ReducerNone">zero
reduces</a> then the output of the <code>Mapper</code> is directly written
to the {@link FileSystem} without grouping by keys.</p>
<p>Example:</p>
<p><blockquote><pre>
public class MyMapper&lt;K extends WritableComparable, V extends Writable&gt;
extends MapReduceBase implements Mapper&lt;K, V, K, V&gt; {
static enum MyCounters { NUM_RECORDS }
private String mapTaskId;
private String inputFile;
private int noRecords = 0;
public void configure(JobConf job) {
mapTaskId = job.get(JobContext.TASK_ATTEMPT_ID);
inputFile = job.get(JobContext.MAP_INPUT_FILE);
}
public void map(K key, V val,
OutputCollector&lt;K, V&gt; output, Reporter reporter)
throws IOException {
// Process the &lt;key, value&gt; pair (assume this takes a while)
// ...
// ...
// Let the framework know that we are alive, and kicking!
// reporter.progress();
// Process some more
// ...
// ...
// Increment the no. of &lt;key, value&gt; pairs processed
++noRecords;
// Increment counters
reporter.incrCounter(NUM_RECORDS, 1);
// Every 100 records update application-level status
if ((noRecords%100) == 0) {
reporter.setStatus(mapTaskId + " processed " + noRecords +
" from input-file: " + inputFile);
}
// Output the result
output.collect(key, val);
}
}
</pre></blockquote>
<p>Applications may write a custom {@link MapRunnable} to exert greater
control on map processing e.g. multi-threaded <code>Mapper</code>s etc.</p>
@see JobConf
@see InputFormat
@see Partitioner
@see Reducer
@see MapReduceBase
@see MapRunnable
@see SequenceFile]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapred.Mapper -->
<!-- start class org.apache.hadoop.mapred.MapReduceBase -->
<class name="MapReduceBase" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.io.Closeable"/>
<implements name="org.apache.hadoop.mapred.JobConfigurable"/>
<constructor name="MapReduceBase"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="close"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Default implementation that does nothing.]]>
</doc>
</method>
<method name="configure"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Default implementation that does nothing.]]>
</doc>
</method>
<doc>
<![CDATA[Base class for {@link Mapper} and {@link Reducer} implementations.
<p>Provides default no-op implementations for a few methods, most non-trivial
applications need to override some of them.</p>]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.MapReduceBase -->
<!-- start interface org.apache.hadoop.mapred.MapRunnable -->
<interface name="MapRunnable" abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.JobConfigurable"/>
<method name="run"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="input" type="org.apache.hadoop.mapred.RecordReader"/>
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Start mapping input <tt>&lt;key, value&gt;</tt> pairs.
<p>Mapping of input records to output records is complete when this method
returns.</p>
@param input the {@link RecordReader} to read the input records.
@param output the {@link OutputCollector} to collect the outputrecords.
@param reporter {@link Reporter} to report progress, status-updates etc.
@throws IOException]]>
</doc>
</method>
<doc>
<![CDATA[Expert: Generic interface for {@link Mapper}s.
<p>Custom implementations of <code>MapRunnable</code> can exert greater
control on map processing e.g. multi-threaded, asynchronous mappers etc.</p>
@see Mapper]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapred.MapRunnable -->
<!-- start class org.apache.hadoop.mapred.MapRunner -->
<class name="MapRunner" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.MapRunnable"/>
<constructor name="MapRunner"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="configure"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
</method>
<method name="run"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="input" type="org.apache.hadoop.mapred.RecordReader"/>
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getMapper" return="org.apache.hadoop.mapred.Mapper"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</method>
<doc>
<![CDATA[Default {@link MapRunnable} implementation.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.MapRunner -->
<!-- start class org.apache.hadoop.mapred.MultiFileInputFormat -->
<class name="MultiFileInputFormat" extends="org.apache.hadoop.mapred.FileInputFormat"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="MultiFileInputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="numSplits" type="int"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapred.InputSplit"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[An abstract {@link InputFormat} that returns {@link MultiFileSplit}'s
in {@link #getSplits(JobConf, int)} method. Splits are constructed from
the files under the input paths. Each split returned contains <i>nearly</i>
equal content length. <br>
Subclasses implement {@link #getRecordReader(InputSplit, JobConf, Reporter)}
to construct <code>RecordReader</code>'s for <code>MultiFileSplit</code>'s.
@see MultiFileSplit]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.MultiFileInputFormat -->
<!-- start class org.apache.hadoop.mapred.MultiFileSplit -->
<class name="MultiFileSplit" extends="org.apache.hadoop.mapred.lib.CombineFileSplit"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="MultiFileSplit" type="org.apache.hadoop.mapred.JobConf, org.apache.hadoop.fs.Path[], long[]"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getLocations" return="java.lang.String[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="toString" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<doc>
<![CDATA[A sub-collection of input files. Unlike {@link FileSplit}, MultiFileSplit
class does not represent a split of a file, but a split of input files
into smaller sets. The atomic unit of split is a file. <br>
MultiFileSplit can be used to implement {@link RecordReader}'s, with
reading one record per file.
@see FileSplit
@see MultiFileInputFormat]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.MultiFileSplit -->
<!-- start interface org.apache.hadoop.mapred.OutputCollector -->
<interface name="OutputCollector" abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<method name="collect"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="K"/>
<param name="value" type="V"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Adds a key/value pair to the output.
@param key the key to collect.
@param value to value to collect.
@throws IOException]]>
</doc>
</method>
<doc>
<![CDATA[Collects the <code>&lt;key, value&gt;</code> pairs output by {@link Mapper}s
and {@link Reducer}s.
<p><code>OutputCollector</code> is the generalization of the facility
provided by the Map-Reduce framework to collect data output by either the
<code>Mapper</code> or the <code>Reducer</code> i.e. intermediate outputs
or the output of the job.</p>]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapred.OutputCollector -->
<!-- start class org.apache.hadoop.mapred.OutputCommitter -->
<class name="OutputCommitter" extends="org.apache.hadoop.mapreduce.OutputCommitter"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="OutputCommitter"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="setupJob"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobContext" type="org.apache.hadoop.mapred.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[For the framework to setup the job output during initialization. This is
called from the application master process for the entire job. This will be
called multiple times, once per job attempt.
@param jobContext Context of the job whose output is being written.
@throws IOException if temporary output could not be created]]>
</doc>
</method>
<method name="cleanupJob"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="Use {@link #commitJob(JobContext)} or
{@link #abortJob(JobContext, int)} instead.">
<param name="jobContext" type="org.apache.hadoop.mapred.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[For cleaning up the job's output after job completion. This is called
from the application master process for the entire job. This may be called
multiple times.
@param jobContext Context of the job whose output is being written.
@throws IOException
@deprecated Use {@link #commitJob(JobContext)} or
{@link #abortJob(JobContext, int)} instead.]]>
</doc>
</method>
<method name="commitJob"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobContext" type="org.apache.hadoop.mapred.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[For committing job's output after successful job completion. Note that this
is invoked for jobs with final runstate as SUCCESSFUL. This is called
from the application master process for the entire job. This is guaranteed
to only be called once. If it throws an exception the entire job will
fail.
@param jobContext Context of the job whose output is being written.
@throws IOException]]>
</doc>
</method>
<method name="abortJob"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobContext" type="org.apache.hadoop.mapred.JobContext"/>
<param name="status" type="int"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[For aborting an unsuccessful job's output. Note that this is invoked for
jobs with final runstate as {@link JobStatus#FAILED} or
{@link JobStatus#KILLED}. This is called from the application
master process for the entire job. This may be called multiple times.
@param jobContext Context of the job whose output is being written.
@param status final runstate of the job
@throws IOException]]>
</doc>
</method>
<method name="setupTask"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskContext" type="org.apache.hadoop.mapred.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Sets up output for the task. This is called from each individual task's
process that will output to HDFS, and it is called just for that task. This
may be called multiple times for the same task, but for different task
attempts.
@param taskContext Context of the task whose output is being written.
@throws IOException]]>
</doc>
</method>
<method name="needsTaskCommit" return="boolean"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskContext" type="org.apache.hadoop.mapred.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Check whether task needs a commit. This is called from each individual
task's process that will output to HDFS, and it is called just for that
task.
@param taskContext
@return true/false
@throws IOException]]>
</doc>
</method>
<method name="commitTask"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskContext" type="org.apache.hadoop.mapred.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[To promote the task's temporary output to final output location.
If {@link #needsTaskCommit(TaskAttemptContext)} returns true and this
task is the task that the AM determines finished first, this method
is called to commit an individual task's output. This is to mark
that tasks output as complete, as {@link #commitJob(JobContext)} will
also be called later on if the entire job finished successfully. This
is called from a task's process. This may be called multiple times for the
same task, but different task attempts. It should be very rare for this to
be called multiple times and requires odd networking failures to make this
happen. In the future the Hadoop framework may eliminate this race.
@param taskContext Context of the task whose output is being written.
@throws IOException if commit is not]]>
</doc>
</method>
<method name="abortTask"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskContext" type="org.apache.hadoop.mapred.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Discard the task output. This is called from a task's process to clean
up a single task's output that can not yet been committed. This may be
called multiple times for the same task, but for different task attempts.
@param taskContext
@throws IOException]]>
</doc>
</method>
<method name="isRecoverySupported" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="Use {@link #isRecoverySupported(JobContext)} instead.">
<doc>
<![CDATA[This method implements the new interface by calling the old method. Note
that the input types are different between the new and old apis and this is
a bridge between the two.
@deprecated Use {@link #isRecoverySupported(JobContext)} instead.]]>
</doc>
</method>
<method name="isRecoverySupported" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobContext" type="org.apache.hadoop.mapred.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Is task output recovery supported for restarting jobs?
If task output recovery is supported, job restart can be done more
efficiently.
@param jobContext
Context of the job whose output is being written.
@return <code>true</code> if task output recovery is supported,
<code>false</code> otherwise
@throws IOException
@see #recoverTask(TaskAttemptContext)]]>
</doc>
</method>
<method name="isCommitJobRepeatable" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobContext" type="org.apache.hadoop.mapred.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Returns true if an in-progress job commit can be retried. If the MR AM is
re-run then it will check this value to determine if it can retry an
in-progress commit that was started by a previous version.
Note that in rare scenarios, the previous AM version might still be running
at that time, due to system anomalies. Hence if this method returns true
then the retry commit operation should be able to run concurrently with
the previous operation.
If repeatable job commit is supported, job restart can tolerate previous
AM failures during job commit.
By default, it is not supported. Extended classes (like:
FileOutputCommitter) should explicitly override it if provide support.
@param jobContext
Context of the job whose output is being written.
@return <code>true</code> repeatable job commit is supported,
<code>false</code> otherwise
@throws IOException]]>
</doc>
</method>
<method name="isCommitJobRepeatable" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobContext" type="org.apache.hadoop.mapreduce.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="recoverTask"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskContext" type="org.apache.hadoop.mapred.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Recover the task output.
The retry-count for the job will be passed via the
{@link MRConstants#APPLICATION_ATTEMPT_ID} key in
{@link TaskAttemptContext#getConfiguration()} for the
<code>OutputCommitter</code>. This is called from the application master
process, but it is called individually for each task.
If an exception is thrown the task will be attempted again.
@param taskContext Context of the task whose output is being recovered
@throws IOException]]>
</doc>
</method>
<method name="setupJob"
abstract="false" native="false" synchronized="false"
static="false" final="true" visibility="public"
deprecated="not deprecated">
<param name="jobContext" type="org.apache.hadoop.mapreduce.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[This method implements the new interface by calling the old method. Note
that the input types are different between the new and old apis and this
is a bridge between the two.]]>
</doc>
</method>
<method name="cleanupJob"
abstract="false" native="false" synchronized="false"
static="false" final="true" visibility="public"
deprecated="Use {@link #commitJob(org.apache.hadoop.mapreduce.JobContext)}
or {@link #abortJob(org.apache.hadoop.mapreduce.JobContext, org.apache.hadoop.mapreduce.JobStatus.State)}
instead.">
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[This method implements the new interface by calling the old method. Note
that the input types are different between the new and old apis and this
is a bridge between the two.
@deprecated Use {@link #commitJob(org.apache.hadoop.mapreduce.JobContext)}
or {@link #abortJob(org.apache.hadoop.mapreduce.JobContext, org.apache.hadoop.mapreduce.JobStatus.State)}
instead.]]>
</doc>
</method>
<method name="commitJob"
abstract="false" native="false" synchronized="false"
static="false" final="true" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[This method implements the new interface by calling the old method. Note
that the input types are different between the new and old apis and this
is a bridge between the two.]]>
</doc>
</method>
<method name="abortJob"
abstract="false" native="false" synchronized="false"
static="false" final="true" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
<param name="runState" type="org.apache.hadoop.mapreduce.JobStatus.State"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[This method implements the new interface by calling the old method. Note
that the input types are different between the new and old apis and this
is a bridge between the two.]]>
</doc>
</method>
<method name="setupTask"
abstract="false" native="false" synchronized="false"
static="false" final="true" visibility="public"
deprecated="not deprecated">
<param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[This method implements the new interface by calling the old method. Note
that the input types are different between the new and old apis and this
is a bridge between the two.]]>
</doc>
</method>
<method name="needsTaskCommit" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="true" visibility="public"
deprecated="not deprecated">
<param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[This method implements the new interface by calling the old method. Note
that the input types are different between the new and old apis and this
is a bridge between the two.]]>
</doc>
</method>
<method name="commitTask"
abstract="false" native="false" synchronized="false"
static="false" final="true" visibility="public"
deprecated="not deprecated">
<param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[This method implements the new interface by calling the old method. Note
that the input types are different between the new and old apis and this
is a bridge between the two.]]>
</doc>
</method>
<method name="abortTask"
abstract="false" native="false" synchronized="false"
static="false" final="true" visibility="public"
deprecated="not deprecated">
<param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[This method implements the new interface by calling the old method. Note
that the input types are different between the new and old apis and this
is a bridge between the two.]]>
</doc>
</method>
<method name="recoverTask"
abstract="false" native="false" synchronized="false"
static="false" final="true" visibility="public"
deprecated="not deprecated">
<param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[This method implements the new interface by calling the old method. Note
that the input types are different between the new and old apis and this
is a bridge between the two.]]>
</doc>
</method>
<method name="isRecoverySupported" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="true" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[This method implements the new interface by calling the old method. Note
that the input types are different between the new and old apis and this is
a bridge between the two.]]>
</doc>
</method>
<doc>
<![CDATA[<code>OutputCommitter</code> describes the commit of task output for a
Map-Reduce job.
<p>The Map-Reduce framework relies on the <code>OutputCommitter</code> of
the job to:<p>
<ol>
<li>
Setup the job during initialization. For example, create the temporary
output directory for the job during the initialization of the job.
</li>
<li>
Cleanup the job after the job completion. For example, remove the
temporary output directory after the job completion.
</li>
<li>
Setup the task temporary output.
</li>
<li>
Check whether a task needs a commit. This is to avoid the commit
procedure if a task does not need commit.
</li>
<li>
Commit of the task output.
</li>
<li>
Discard the task commit.
</li>
</ol>
The methods in this class can be called from several different processes and
from several different contexts. It is important to know which process and
which context each is called from. Each method should be marked accordingly
in its documentation. It is also important to note that not all methods are
guaranteed to be called once and only once. If a method is not guaranteed to
have this property the output committer needs to handle this appropriately.
Also note it will only be in rare situations where they may be called
multiple times for the same task.
@see FileOutputCommitter
@see JobContext
@see TaskAttemptContext]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.OutputCommitter -->
<!-- start interface org.apache.hadoop.mapred.OutputFormat -->
<interface name="OutputFormat" abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="name" type="java.lang.String"/>
<param name="progress" type="org.apache.hadoop.util.Progressable"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get the {@link RecordWriter} for the given job.
@param ignored
@param job configuration for the job whose output is being written.
@param name the unique name for this part of the output.
@param progress mechanism for reporting progress while writing to file.
@return a {@link RecordWriter} to write the output for the job.
@throws IOException]]>
</doc>
</method>
<method name="checkOutputSpecs"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Check for validity of the output-specification for the job.
<p>This is to validate the output specification for the job when it is
a job is submitted. Typically checks that it does not already exist,
throwing an exception when it already exists, so that output is not
overwritten.</p>
@param ignored
@param job job configuration.
@throws IOException when output should not be attempted]]>
</doc>
</method>
<doc>
<![CDATA[<code>OutputFormat</code> describes the output-specification for a
Map-Reduce job.
<p>The Map-Reduce framework relies on the <code>OutputFormat</code> of the
job to:<p>
<ol>
<li>
Validate the output-specification of the job. For e.g. check that the
output directory doesn't already exist.
<li>
Provide the {@link RecordWriter} implementation to be used to write out
the output files of the job. Output files are stored in a
{@link FileSystem}.
</li>
</ol>
@see RecordWriter
@see JobConf]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapred.OutputFormat -->
<!-- start class org.apache.hadoop.mapred.OutputLogFilter -->
<class name="OutputLogFilter" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.fs.PathFilter"/>
<constructor name="OutputLogFilter"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="accept" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="path" type="org.apache.hadoop.fs.Path"/>
</method>
<doc>
<![CDATA[This class filters log files from directory given
It doesnt accept paths having _logs.
This can be used to list paths of output directory as follows:
Path[] fileList = FileUtil.stat2Paths(fs.listStatus(outDir,
new OutputLogFilter()));]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.OutputLogFilter -->
<!-- start interface org.apache.hadoop.mapred.Partitioner -->
<interface name="Partitioner" abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.JobConfigurable"/>
<method name="getPartition" return="int"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="K2"/>
<param name="value" type="V2"/>
<param name="numPartitions" type="int"/>
<doc>
<![CDATA[Get the paritition number for a given key (hence record) given the total
number of partitions i.e. number of reduce-tasks for the job.
<p>Typically a hash function on a all or a subset of the key.</p>
@param key the key to be paritioned.
@param value the entry value.
@param numPartitions the total number of partitions.
@return the partition number for the <code>key</code>.]]>
</doc>
</method>
<doc>
<![CDATA[Partitions the key space.
<p><code>Partitioner</code> controls the partitioning of the keys of the
intermediate map-outputs. The key (or a subset of the key) is used to derive
the partition, typically by a hash function. The total number of partitions
is the same as the number of reduce tasks for the job. Hence this controls
which of the <code>m</code> reduce tasks the intermediate key (and hence the
record) is sent for reduction.</p>
<p>Note: A <code>Partitioner</code> is created only when there are multiple
reducers.</p>
@see Reducer]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapred.Partitioner -->
<!-- start interface org.apache.hadoop.mapred.RecordReader -->
<interface name="RecordReader" abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="java.io.Closeable"/>
<method name="next" return="boolean"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="K"/>
<param name="value" type="V"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Reads the next key/value pair from the input for processing.
@param key the key to read data into
@param value the value to read data into
@return true iff a key/value was read, false if at EOF]]>
</doc>
</method>
<method name="createKey" return="K"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Create an object of the appropriate type to be used as a key.
@return a new key object.]]>
</doc>
</method>
<method name="createValue" return="V"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Create an object of the appropriate type to be used as a value.
@return a new value object.]]>
</doc>
</method>
<method name="getPos" return="long"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Returns the current position in the input.
@return the current position in the input.
@throws IOException]]>
</doc>
</method>
<method name="close"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Close this {@link InputSplit} to future operations.
@throws IOException]]>
</doc>
</method>
<method name="getProgress" return="float"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[How much of the input has the {@link RecordReader} consumed i.e.
has been processed by?
@return progress from <code>0.0</code> to <code>1.0</code>.
@throws IOException]]>
</doc>
</method>
<doc>
<![CDATA[<code>RecordReader</code> reads &lt;key, value&gt; pairs from an
{@link InputSplit}.
<p><code>RecordReader</code>, typically, converts the byte-oriented view of
the input, provided by the <code>InputSplit</code>, and presents a
record-oriented view for the {@link Mapper} and {@link Reducer} tasks for
processing. It thus assumes the responsibility of processing record
boundaries and presenting the tasks with keys and values.</p>
@see InputSplit
@see InputFormat]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapred.RecordReader -->
<!-- start interface org.apache.hadoop.mapred.RecordWriter -->
<interface name="RecordWriter" abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<method name="write"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="K"/>
<param name="value" type="V"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Writes a key/value pair.
@param key the key to write.
@param value the value to write.
@throws IOException]]>
</doc>
</method>
<method name="close"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Close this <code>RecordWriter</code> to future operations.
@param reporter facility to report progress.
@throws IOException]]>
</doc>
</method>
<doc>
<![CDATA[<code>RecordWriter</code> writes the output &lt;key, value&gt; pairs
to an output file.
<p><code>RecordWriter</code> implementations write the job outputs to the
{@link FileSystem}.
@see OutputFormat]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapred.RecordWriter -->
<!-- start interface org.apache.hadoop.mapred.Reducer -->
<interface name="Reducer" abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.JobConfigurable"/>
<implements name="org.apache.hadoop.io.Closeable"/>
<method name="reduce"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="K2"/>
<param name="values" type="java.util.Iterator"/>
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[<i>Reduces</i> values for a given key.
<p>The framework calls this method for each
<code>&lt;key, (list of values)&gt;</code> pair in the grouped inputs.
Output values must be of the same type as input values. Input keys must
not be altered. The framework will <b>reuse</b> the key and value objects
that are passed into the reduce, therefore the application should clone
the objects they want to keep a copy of. In many cases, all values are
combined into zero or one value.
</p>
<p>Output pairs are collected with calls to
{@link OutputCollector#collect(Object,Object)}.</p>
<p>Applications can use the {@link Reporter} provided to report progress
or just indicate that they are alive. In scenarios where the application
takes a significant amount of time to process individual key/value
pairs, this is crucial since the framework might assume that the task has
timed-out and kill that task. The other way of avoiding this is to set
<a href="{@docRoot}/../hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml#mapreduce.task.timeout">
mapreduce.task.timeout</a> to a high-enough value (or even zero for no
time-outs).</p>
@param key the key.
@param values the list of values to reduce.
@param output to collect keys and combined values.
@param reporter facility to report progress.]]>
</doc>
</method>
<doc>
<![CDATA[Reduces a set of intermediate values which share a key to a smaller set of
values.
<p>The number of <code>Reducer</code>s for the job is set by the user via
{@link JobConf#setNumReduceTasks(int)}. <code>Reducer</code> implementations
can access the {@link JobConf} for the job via the
{@link JobConfigurable#configure(JobConf)} method and initialize themselves.
Similarly they can use the {@link Closeable#close()} method for
de-initialization.</p>
<p><code>Reducer</code> has 3 primary phases:</p>
<ol>
<li>
<b id="Shuffle">Shuffle</b>
<p><code>Reducer</code> is input the grouped output of a {@link Mapper}.
In the phase the framework, for each <code>Reducer</code>, fetches the
relevant partition of the output of all the <code>Mapper</code>s, via HTTP.
</p>
</li>
<li>
<b id="Sort">Sort</b>
<p>The framework groups <code>Reducer</code> inputs by <code>key</code>s
(since different <code>Mapper</code>s may have output the same key) in this
stage.</p>
<p>The shuffle and sort phases occur simultaneously i.e. while outputs are
being fetched they are merged.</p>
<b id="SecondarySort">SecondarySort</b>
<p>If equivalence rules for keys while grouping the intermediates are
different from those for grouping keys before reduction, then one may
specify a <code>Comparator</code> via
{@link JobConf#setOutputValueGroupingComparator(Class)}.Since
{@link JobConf#setOutputKeyComparatorClass(Class)} can be used to
control how intermediate keys are grouped, these can be used in conjunction
to simulate <i>secondary sort on values</i>.</p>
For example, say that you want to find duplicate web pages and tag them
all with the url of the "best" known example. You would set up the job
like:
<ul>
<li>Map Input Key: url</li>
<li>Map Input Value: document</li>
<li>Map Output Key: document checksum, url pagerank</li>
<li>Map Output Value: url</li>
<li>Partitioner: by checksum</li>
<li>OutputKeyComparator: by checksum and then decreasing pagerank</li>
<li>OutputValueGroupingComparator: by checksum</li>
</ul>
</li>
<li>
<b id="Reduce">Reduce</b>
<p>In this phase the
{@link #reduce(Object, Iterator, OutputCollector, Reporter)}
method is called for each <code>&lt;key, (list of values)&gt;</code> pair in
the grouped inputs.</p>
<p>The output of the reduce task is typically written to the
{@link FileSystem} via
{@link OutputCollector#collect(Object, Object)}.</p>
</li>
</ol>
<p>The output of the <code>Reducer</code> is <b>not re-sorted</b>.</p>
<p>Example:</p>
<p><blockquote><pre>
public class MyReducer&lt;K extends WritableComparable, V extends Writable&gt;
extends MapReduceBase implements Reducer&lt;K, V, K, V&gt; {
static enum MyCounters { NUM_RECORDS }
private String reduceTaskId;
private int noKeys = 0;
public void configure(JobConf job) {
reduceTaskId = job.get(JobContext.TASK_ATTEMPT_ID);
}
public void reduce(K key, Iterator&lt;V&gt; values,
OutputCollector&lt;K, V&gt; output,
Reporter reporter)
throws IOException {
// Process
int noValues = 0;
while (values.hasNext()) {
V value = values.next();
// Increment the no. of values for this key
++noValues;
// Process the &lt;key, value&gt; pair (assume this takes a while)
// ...
// ...
// Let the framework know that we are alive, and kicking!
if ((noValues%10) == 0) {
reporter.progress();
}
// Process some more
// ...
// ...
// Output the &lt;key, value&gt;
output.collect(key, value);
}
// Increment the no. of &lt;key, list of values&gt; pairs processed
++noKeys;
// Increment counters
reporter.incrCounter(NUM_RECORDS, 1);
// Every 100 keys update application-level status
if ((noKeys%100) == 0) {
reporter.setStatus(reduceTaskId + " processed " + noKeys);
}
}
}
</pre></blockquote>
@see Mapper
@see Partitioner
@see Reporter
@see MapReduceBase]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapred.Reducer -->
<!-- start interface org.apache.hadoop.mapred.Reporter -->
<interface name="Reporter" abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.util.Progressable"/>
<method name="setStatus"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="status" type="java.lang.String"/>
<doc>
<![CDATA[Set the status description for the task.
@param status brief description of the current status.]]>
</doc>
</method>
<method name="getCounter" return="org.apache.hadoop.mapred.Counters.Counter"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="name" type="java.lang.Enum"/>
<doc>
<![CDATA[Get the {@link Counter} of the given group with the given name.
@param name counter name
@return the <code>Counter</code> of the given group/name.]]>
</doc>
</method>
<method name="getCounter" return="org.apache.hadoop.mapred.Counters.Counter"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="group" type="java.lang.String"/>
<param name="name" type="java.lang.String"/>
<doc>
<![CDATA[Get the {@link Counter} of the given group with the given name.
@param group counter group
@param name counter name
@return the <code>Counter</code> of the given group/name.]]>
</doc>
</method>
<method name="incrCounter"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Enum"/>
<param name="amount" type="long"/>
<doc>
<![CDATA[Increments the counter identified by the key, which can be of
any {@link Enum} type, by the specified amount.
@param key key to identify the counter to be incremented. The key can be
be any <code>Enum</code>.
@param amount A non-negative amount by which the counter is to
be incremented.]]>
</doc>
</method>
<method name="incrCounter"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="group" type="java.lang.String"/>
<param name="counter" type="java.lang.String"/>
<param name="amount" type="long"/>
<doc>
<![CDATA[Increments the counter identified by the group and counter name
by the specified amount.
@param group name to identify the group of the counter to be incremented.
@param counter name to identify the counter within the group.
@param amount A non-negative amount by which the counter is to
be incremented.]]>
</doc>
</method>
<method name="getInputSplit" return="org.apache.hadoop.mapred.InputSplit"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="UnsupportedOperationException" type="java.lang.UnsupportedOperationException"/>
<doc>
<![CDATA[Get the {@link InputSplit} object for a map.
@return the <code>InputSplit</code> that the map is reading from.
@throws UnsupportedOperationException if called outside a mapper]]>
</doc>
</method>
<method name="getProgress" return="float"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the progress of the task. Progress is represented as a number between
0 and 1 (inclusive).]]>
</doc>
</method>
<field name="NULL" type="org.apache.hadoop.mapred.Reporter"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[A constant of Reporter type that does nothing.]]>
</doc>
</field>
<doc>
<![CDATA[A facility for Map-Reduce applications to report progress and update
counters, status information etc.
<p>{@link Mapper} and {@link Reducer} can use the <code>Reporter</code>
provided to report progress or just indicate that they are alive. In
scenarios where the application takes significant amount of time to
process individual key/value pairs, this is crucial since the framework
might assume that the task has timed-out and kill that task.
<p>Applications can also update {@link Counters} via the provided
<code>Reporter</code> .</p>
@see Progressable
@see Counters]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapred.Reporter -->
<!-- start interface org.apache.hadoop.mapred.RunningJob -->
<interface name="RunningJob" abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<method name="getConfiguration" return="org.apache.hadoop.conf.Configuration"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the underlying job configuration
@return the configuration of the job.]]>
</doc>
</method>
<method name="getID" return="org.apache.hadoop.mapred.JobID"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the job identifier.
@return the job identifier.]]>
</doc>
</method>
<method name="getJobID" return="java.lang.String"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="This method is deprecated and will be removed. Applications should
rather use {@link #getID()}.">
<doc>
<![CDATA[@deprecated This method is deprecated and will be removed. Applications should
rather use {@link #getID()}.]]>
</doc>
</method>
<method name="getJobName" return="java.lang.String"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the name of the job.
@return the name of the job.]]>
</doc>
</method>
<method name="getJobFile" return="java.lang.String"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the path of the submitted job configuration.
@return the path of the submitted job configuration.]]>
</doc>
</method>
<method name="getTrackingURL" return="java.lang.String"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the URL where some job progress information will be displayed.
@return the URL where some job progress information will be displayed.]]>
</doc>
</method>
<method name="mapProgress" return="float"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get the <i>progress</i> of the job's map-tasks, as a float between 0.0
and 1.0. When all map tasks have completed, the function returns 1.0.
@return the progress of the job's map-tasks.
@throws IOException]]>
</doc>
</method>
<method name="reduceProgress" return="float"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get the <i>progress</i> of the job's reduce-tasks, as a float between 0.0
and 1.0. When all reduce tasks have completed, the function returns 1.0.
@return the progress of the job's reduce-tasks.
@throws IOException]]>
</doc>
</method>
<method name="cleanupProgress" return="float"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get the <i>progress</i> of the job's cleanup-tasks, as a float between 0.0
and 1.0. When all cleanup tasks have completed, the function returns 1.0.
@return the progress of the job's cleanup-tasks.
@throws IOException]]>
</doc>
</method>
<method name="setupProgress" return="float"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get the <i>progress</i> of the job's setup-tasks, as a float between 0.0
and 1.0. When all setup tasks have completed, the function returns 1.0.
@return the progress of the job's setup-tasks.
@throws IOException]]>
</doc>
</method>
<method name="isComplete" return="boolean"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Check if the job is finished or not.
This is a non-blocking call.
@return <code>true</code> if the job is complete, else <code>false</code>.
@throws IOException]]>
</doc>
</method>
<method name="isSuccessful" return="boolean"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Check if the job completed successfully.
@return <code>true</code> if the job succeeded, else <code>false</code>.
@throws IOException]]>
</doc>
</method>
<method name="waitForCompletion"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Blocks until the job is complete.
@throws IOException]]>
</doc>
</method>
<method name="getJobState" return="int"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Returns the current state of the Job.
{@link JobStatus}
@throws IOException]]>
</doc>
</method>
<method name="getJobStatus" return="org.apache.hadoop.mapred.JobStatus"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Returns a snapshot of the current status, {@link JobStatus}, of the Job.
Need to call again for latest information.
@throws IOException]]>
</doc>
</method>
<method name="killJob"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Kill the running job. Blocks until all job tasks have been killed as well.
If the job is no longer running, it simply returns.
@throws IOException]]>
</doc>
</method>
<method name="setJobPriority"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="priority" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Set the priority of a running job.
@param priority the new priority for the job.
@throws IOException]]>
</doc>
</method>
<method name="getTaskCompletionEvents" return="org.apache.hadoop.mapred.TaskCompletionEvent[]"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="startFrom" type="int"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get events indicating completion (success/failure) of component tasks.
@param startFrom index to start fetching events from
@return an array of {@link TaskCompletionEvent}s
@throws IOException]]>
</doc>
</method>
<method name="killTask"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskId" type="org.apache.hadoop.mapred.TaskAttemptID"/>
<param name="shouldFail" type="boolean"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Kill indicated task attempt.
@param taskId the id of the task to be terminated.
@param shouldFail if true the task is failed and added to failed tasks
list, otherwise it is just killed, w/o affecting
job failure status.
@throws IOException]]>
</doc>
</method>
<method name="killTask"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="Applications should rather use {@link #killTask(TaskAttemptID, boolean)}">
<param name="taskId" type="java.lang.String"/>
<param name="shouldFail" type="boolean"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[@deprecated Applications should rather use {@link #killTask(TaskAttemptID, boolean)}]]>
</doc>
</method>
<method name="getCounters" return="org.apache.hadoop.mapred.Counters"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Gets the counters for this job.
@return the counters for this job or null if the job has been retired.
@throws IOException]]>
</doc>
</method>
<method name="getTaskDiagnostics" return="java.lang.String[]"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskid" type="org.apache.hadoop.mapred.TaskAttemptID"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Gets the diagnostic messages for a given task attempt.
@param taskid
@return the list of diagnostic messages for the task
@throws IOException]]>
</doc>
</method>
<method name="getHistoryUrl" return="java.lang.String"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get the url where history file is archived. Returns empty string if
history file is not available yet.
@return the url where history file is archived
@throws IOException]]>
</doc>
</method>
<method name="isRetired" return="boolean"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Check whether the job has been removed from JobTracker memory and retired.
On retire, the job history file is copied to a location known by
{@link #getHistoryUrl()}
@return <code>true</code> if the job retired, else <code>false</code>.
@throws IOException]]>
</doc>
</method>
<method name="getFailureInfo" return="java.lang.String"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get failure info for the job.
@return the failure info for the job.
@throws IOException]]>
</doc>
</method>
<doc>
<![CDATA[<code>RunningJob</code> is the user-interface to query for details on a
running Map-Reduce job.
<p>Clients can get hold of <code>RunningJob</code> via the {@link JobClient}
and then query the running-job for details such as name, configuration,
progress etc.</p>
@see JobClient]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapred.RunningJob -->
<!-- start class org.apache.hadoop.mapred.SequenceFileAsBinaryInputFormat -->
<class name="SequenceFileAsBinaryInputFormat" extends="org.apache.hadoop.mapred.SequenceFileInputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="SequenceFileAsBinaryInputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapred.InputSplit"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[InputFormat reading keys, values from SequenceFiles in binary (raw)
format.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.SequenceFileAsBinaryInputFormat -->
<!-- start class org.apache.hadoop.mapred.SequenceFileAsBinaryOutputFormat -->
<class name="SequenceFileAsBinaryOutputFormat" extends="org.apache.hadoop.mapred.SequenceFileOutputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="SequenceFileAsBinaryOutputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="setSequenceFileOutputKeyClass"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="theClass" type="java.lang.Class"/>
<doc>
<![CDATA[Set the key class for the {@link SequenceFile}
<p>This allows the user to specify the key class to be different
from the actual class ({@link BytesWritable}) used for writing </p>
@param conf the {@link JobConf} to modify
@param theClass the SequenceFile output key class.]]>
</doc>
</method>
<method name="setSequenceFileOutputValueClass"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="theClass" type="java.lang.Class"/>
<doc>
<![CDATA[Set the value class for the {@link SequenceFile}
<p>This allows the user to specify the value class to be different
from the actual class ({@link BytesWritable}) used for writing </p>
@param conf the {@link JobConf} to modify
@param theClass the SequenceFile output key class.]]>
</doc>
</method>
<method name="getSequenceFileOutputKeyClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Get the key class for the {@link SequenceFile}
@return the key class of the {@link SequenceFile}]]>
</doc>
</method>
<method name="getSequenceFileOutputValueClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Get the value class for the {@link SequenceFile}
@return the value class of the {@link SequenceFile}]]>
</doc>
</method>
<method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="name" type="java.lang.String"/>
<param name="progress" type="org.apache.hadoop.util.Progressable"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="checkOutputSpecs"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[An {@link OutputFormat} that writes keys, values to
{@link SequenceFile}s in binary(raw) format]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.SequenceFileAsBinaryOutputFormat -->
<!-- start class org.apache.hadoop.mapred.SequenceFileAsTextInputFormat -->
<class name="SequenceFileAsTextInputFormat" extends="org.apache.hadoop.mapred.SequenceFileInputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="SequenceFileAsTextInputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapred.InputSplit"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[This class is similar to SequenceFileInputFormat,
except it generates SequenceFileAsTextRecordReader
which converts the input keys and values to their
String forms by calling toString() method.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.SequenceFileAsTextInputFormat -->
<!-- start class org.apache.hadoop.mapred.SequenceFileAsTextRecordReader -->
<class name="SequenceFileAsTextRecordReader" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.RecordReader"/>
<constructor name="SequenceFileAsTextRecordReader" type="org.apache.hadoop.conf.Configuration, org.apache.hadoop.mapred.FileSplit"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</constructor>
<method name="createKey" return="org.apache.hadoop.io.Text"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="createValue" return="org.apache.hadoop.io.Text"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="next" return="boolean"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="org.apache.hadoop.io.Text"/>
<param name="value" type="org.apache.hadoop.io.Text"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Read key/value pair in a line.]]>
</doc>
</method>
<method name="getProgress" return="float"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getPos" return="long"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="close"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[This class converts the input keys and values to their String forms by calling toString()
method. This class to SequenceFileAsTextInputFormat class is as LineRecordReader
class to TextInputFormat class.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.SequenceFileAsTextRecordReader -->
<!-- start class org.apache.hadoop.mapred.SequenceFileInputFilter -->
<class name="SequenceFileInputFilter" extends="org.apache.hadoop.mapred.SequenceFileInputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="SequenceFileInputFilter"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapred.InputSplit"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Create a record reader for the given split
@param split file split
@param job job configuration
@param reporter reporter who sends report to task tracker
@return RecordReader]]>
</doc>
</method>
<method name="setFilterClass"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<param name="filterClass" type="java.lang.Class"/>
<doc>
<![CDATA[set the filter class
@param conf application configuration
@param filterClass filter class]]>
</doc>
</method>
<doc>
<![CDATA[A class that allows a map/red job to work on a sample of sequence files.
The sample is decided by the filter class set by the job.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.SequenceFileInputFilter -->
<!-- start class org.apache.hadoop.mapred.SequenceFileInputFormat -->
<class name="SequenceFileInputFormat" extends="org.apache.hadoop.mapred.FileInputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="SequenceFileInputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="listStatus" return="org.apache.hadoop.fs.FileStatus[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapred.InputSplit"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[An {@link InputFormat} for {@link SequenceFile}s.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.SequenceFileInputFormat -->
<!-- start class org.apache.hadoop.mapred.SequenceFileOutputFormat -->
<class name="SequenceFileOutputFormat" extends="org.apache.hadoop.mapred.FileOutputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="SequenceFileOutputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="name" type="java.lang.String"/>
<param name="progress" type="org.apache.hadoop.util.Progressable"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getReaders" return="org.apache.hadoop.io.SequenceFile.Reader[]"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<param name="dir" type="org.apache.hadoop.fs.Path"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Open the output generated by this format.]]>
</doc>
</method>
<method name="getOutputCompressionType" return="org.apache.hadoop.io.SequenceFile.CompressionType"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Get the {@link CompressionType} for the output {@link SequenceFile}.
@param conf the {@link JobConf}
@return the {@link CompressionType} for the output {@link SequenceFile},
defaulting to {@link CompressionType#RECORD}]]>
</doc>
</method>
<method name="setOutputCompressionType"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="style" type="org.apache.hadoop.io.SequenceFile.CompressionType"/>
<doc>
<![CDATA[Set the {@link CompressionType} for the output {@link SequenceFile}.
@param conf the {@link JobConf} to modify
@param style the {@link CompressionType} for the output
{@link SequenceFile}]]>
</doc>
</method>
<doc>
<![CDATA[An {@link OutputFormat} that writes {@link SequenceFile}s.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.SequenceFileOutputFormat -->
<!-- start class org.apache.hadoop.mapred.SequenceFileRecordReader -->
<class name="SequenceFileRecordReader" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.RecordReader"/>
<constructor name="SequenceFileRecordReader" type="org.apache.hadoop.conf.Configuration, org.apache.hadoop.mapred.FileSplit"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</constructor>
<method name="getKeyClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[The class of key that must be passed to {@link
#next(Object, Object)}..]]>
</doc>
</method>
<method name="getValueClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[The class of value that must be passed to {@link
#next(Object, Object)}..]]>
</doc>
</method>
<method name="createKey" return="K"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="createValue" return="V"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="next" return="boolean"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="K"/>
<param name="value" type="V"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="next" return="boolean"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="key" type="K"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getCurrentValue"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="value" type="V"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getProgress" return="float"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Return the progress within the input split
@return 0.0 to 1.0 of the input byte range]]>
</doc>
</method>
<method name="getPos" return="long"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="seek"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="pos" type="long"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="close"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<field name="conf" type="org.apache.hadoop.conf.Configuration"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[An {@link RecordReader} for {@link SequenceFile}s.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.SequenceFileRecordReader -->
<!-- start class org.apache.hadoop.mapred.SkipBadRecords -->
<class name="SkipBadRecords" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="SkipBadRecords"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getAttemptsToStartSkipping" return="int"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<doc>
<![CDATA[Get the number of Task attempts AFTER which skip mode
will be kicked off. When skip mode is kicked off, the
tasks reports the range of records which it will process
next to the TaskTracker. So that on failures, TT knows which
ones are possibly the bad records. On further executions,
those are skipped.
Default value is 2.
@param conf the configuration
@return attemptsToStartSkipping no of task attempts]]>
</doc>
</method>
<method name="setAttemptsToStartSkipping"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<param name="attemptsToStartSkipping" type="int"/>
<doc>
<![CDATA[Set the number of Task attempts AFTER which skip mode
will be kicked off. When skip mode is kicked off, the
tasks reports the range of records which it will process
next to the TaskTracker. So that on failures, TT knows which
ones are possibly the bad records. On further executions,
those are skipped.
Default value is 2.
@param conf the configuration
@param attemptsToStartSkipping no of task attempts]]>
</doc>
</method>
<method name="getAutoIncrMapperProcCount" return="boolean"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<doc>
<![CDATA[Get the flag which if set to true,
{@link SkipBadRecords#COUNTER_MAP_PROCESSED_RECORDS} is incremented
by MapRunner after invoking the map function. This value must be set to
false for applications which process the records asynchronously
or buffer the input records. For example streaming.
In such cases applications should increment this counter on their own.
Default value is true.
@param conf the configuration
@return <code>true</code> if auto increment
{@link SkipBadRecords#COUNTER_MAP_PROCESSED_RECORDS}.
<code>false</code> otherwise.]]>
</doc>
</method>
<method name="setAutoIncrMapperProcCount"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<param name="autoIncr" type="boolean"/>
<doc>
<![CDATA[Set the flag which if set to true,
{@link SkipBadRecords#COUNTER_MAP_PROCESSED_RECORDS} is incremented
by MapRunner after invoking the map function. This value must be set to
false for applications which process the records asynchronously
or buffer the input records. For example streaming.
In such cases applications should increment this counter on their own.
Default value is true.
@param conf the configuration
@param autoIncr whether to auto increment
{@link SkipBadRecords#COUNTER_MAP_PROCESSED_RECORDS}.]]>
</doc>
</method>
<method name="getAutoIncrReducerProcCount" return="boolean"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<doc>
<![CDATA[Get the flag which if set to true,
{@link SkipBadRecords#COUNTER_REDUCE_PROCESSED_GROUPS} is incremented
by framework after invoking the reduce function. This value must be set to
false for applications which process the records asynchronously
or buffer the input records. For example streaming.
In such cases applications should increment this counter on their own.
Default value is true.
@param conf the configuration
@return <code>true</code> if auto increment
{@link SkipBadRecords#COUNTER_REDUCE_PROCESSED_GROUPS}.
<code>false</code> otherwise.]]>
</doc>
</method>
<method name="setAutoIncrReducerProcCount"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<param name="autoIncr" type="boolean"/>
<doc>
<![CDATA[Set the flag which if set to true,
{@link SkipBadRecords#COUNTER_REDUCE_PROCESSED_GROUPS} is incremented
by framework after invoking the reduce function. This value must be set to
false for applications which process the records asynchronously
or buffer the input records. For example streaming.
In such cases applications should increment this counter on their own.
Default value is true.
@param conf the configuration
@param autoIncr whether to auto increment
{@link SkipBadRecords#COUNTER_REDUCE_PROCESSED_GROUPS}.]]>
</doc>
</method>
<method name="getSkipOutputPath" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<doc>
<![CDATA[Get the directory to which skipped records are written. By default it is
the sub directory of the output _logs directory.
User can stop writing skipped records by setting the value null.
@param conf the configuration.
@return path skip output directory. Null is returned if this is not set
and output directory is also not set.]]>
</doc>
</method>
<method name="setSkipOutputPath"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="path" type="org.apache.hadoop.fs.Path"/>
<doc>
<![CDATA[Set the directory to which skipped records are written. By default it is
the sub directory of the output _logs directory.
User can stop writing skipped records by setting the value null.
@param conf the configuration.
@param path skip output directory path]]>
</doc>
</method>
<method name="getMapperMaxSkipRecords" return="long"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<doc>
<![CDATA[Get the number of acceptable skip records surrounding the bad record PER
bad record in mapper. The number includes the bad record as well.
To turn the feature of detection/skipping of bad records off, set the
value to 0.
The framework tries to narrow down the skipped range by retrying
until this threshold is met OR all attempts get exhausted for this task.
Set the value to Long.MAX_VALUE to indicate that framework need not try to
narrow down. Whatever records(depends on application) get skipped are
acceptable.
Default value is 0.
@param conf the configuration
@return maxSkipRecs acceptable skip records.]]>
</doc>
</method>
<method name="setMapperMaxSkipRecords"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<param name="maxSkipRecs" type="long"/>
<doc>
<![CDATA[Set the number of acceptable skip records surrounding the bad record PER
bad record in mapper. The number includes the bad record as well.
To turn the feature of detection/skipping of bad records off, set the
value to 0.
The framework tries to narrow down the skipped range by retrying
until this threshold is met OR all attempts get exhausted for this task.
Set the value to Long.MAX_VALUE to indicate that framework need not try to
narrow down. Whatever records(depends on application) get skipped are
acceptable.
Default value is 0.
@param conf the configuration
@param maxSkipRecs acceptable skip records.]]>
</doc>
</method>
<method name="getReducerMaxSkipGroups" return="long"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<doc>
<![CDATA[Get the number of acceptable skip groups surrounding the bad group PER
bad group in reducer. The number includes the bad group as well.
To turn the feature of detection/skipping of bad groups off, set the
value to 0.
The framework tries to narrow down the skipped range by retrying
until this threshold is met OR all attempts get exhausted for this task.
Set the value to Long.MAX_VALUE to indicate that framework need not try to
narrow down. Whatever groups(depends on application) get skipped are
acceptable.
Default value is 0.
@param conf the configuration
@return maxSkipGrps acceptable skip groups.]]>
</doc>
</method>
<method name="setReducerMaxSkipGroups"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<param name="maxSkipGrps" type="long"/>
<doc>
<![CDATA[Set the number of acceptable skip groups surrounding the bad group PER
bad group in reducer. The number includes the bad group as well.
To turn the feature of detection/skipping of bad groups off, set the
value to 0.
The framework tries to narrow down the skipped range by retrying
until this threshold is met OR all attempts get exhausted for this task.
Set the value to Long.MAX_VALUE to indicate that framework need not try to
narrow down. Whatever groups(depends on application) get skipped are
acceptable.
Default value is 0.
@param conf the configuration
@param maxSkipGrps acceptable skip groups.]]>
</doc>
</method>
<field name="COUNTER_GROUP" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Special counters which are written by the application and are
used by the framework for detecting bad records. For detecting bad records
these counters must be incremented by the application.]]>
</doc>
</field>
<field name="COUNTER_MAP_PROCESSED_RECORDS" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Number of processed map records.
@see SkipBadRecords#getAutoIncrMapperProcCount(Configuration)]]>
</doc>
</field>
<field name="COUNTER_REDUCE_PROCESSED_GROUPS" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Number of processed reduce groups.
@see SkipBadRecords#getAutoIncrReducerProcCount(Configuration)]]>
</doc>
</field>
<doc>
<![CDATA[Utility class for skip bad records functionality. It contains various
settings related to skipping of bad records.
<p>Hadoop provides an optional mode of execution in which the bad records
are detected and skipped in further attempts.
<p>This feature can be used when map/reduce tasks crashes deterministically on
certain input. This happens due to bugs in the map/reduce function. The usual
course would be to fix these bugs. But sometimes this is not possible;
perhaps the bug is in third party libraries for which the source code is
not available. Due to this, the task never reaches to completion even with
multiple attempts and complete data for that task is lost.</p>
<p>With this feature, only a small portion of data is lost surrounding
the bad record, which may be acceptable for some user applications.
see {@link SkipBadRecords#setMapperMaxSkipRecords(Configuration, long)}</p>
<p>The skipping mode gets kicked off after certain no of failures
see {@link SkipBadRecords#setAttemptsToStartSkipping(Configuration, int)}</p>
<p>In the skipping mode, the map/reduce task maintains the record range which
is getting processed at all times. Before giving the input to the
map/reduce function, it sends this record range to the Task tracker.
If task crashes, the Task tracker knows which one was the last reported
range. On further attempts that range get skipped.</p>]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.SkipBadRecords -->
<!-- start class org.apache.hadoop.mapred.SplitLocationInfo -->
<class name="SplitLocationInfo" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="SplitLocationInfo" type="java.lang.String, boolean"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="isOnDisk" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="isInMemory" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getLocation" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
</class>
<!-- end class org.apache.hadoop.mapred.SplitLocationInfo -->
<!-- start interface org.apache.hadoop.mapred.TaskAttemptContext -->
<interface name="TaskAttemptContext" abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<method name="getTaskAttemptID" return="org.apache.hadoop.mapred.TaskAttemptID"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getProgressible" return="org.apache.hadoop.util.Progressable"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getJobConf" return="org.apache.hadoop.mapred.JobConf"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
</interface>
<!-- end interface org.apache.hadoop.mapred.TaskAttemptContext -->
<!-- start class org.apache.hadoop.mapred.TaskAttemptID -->
<class name="TaskAttemptID" extends="org.apache.hadoop.mapreduce.TaskAttemptID"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="TaskAttemptID" type="org.apache.hadoop.mapred.TaskID, int"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Constructs a TaskAttemptID object from given {@link TaskID}.
@param taskId TaskID that this task belongs to
@param id the task attempt number]]>
</doc>
</constructor>
<constructor name="TaskAttemptID" type="java.lang.String, int, boolean, int, int"
static="false" final="false" visibility="public"
deprecated="Use {@link #TaskAttemptID(String, int, TaskType, int, int)}.">
<doc>
<![CDATA[Constructs a TaskId object from given parts.
@param jtIdentifier jobTracker identifier
@param jobId job number
@param isMap whether the tip is a map
@param taskId taskId number
@param id the task attempt number
@deprecated Use {@link #TaskAttemptID(String, int, TaskType, int, int)}.]]>
</doc>
</constructor>
<constructor name="TaskAttemptID" type="java.lang.String, int, org.apache.hadoop.mapreduce.TaskType, int, int"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Constructs a TaskId object from given parts.
@param jtIdentifier jobTracker identifier
@param jobId job number
@param type the TaskType
@param taskId taskId number
@param id the task attempt number]]>
</doc>
</constructor>
<constructor name="TaskAttemptID"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="downgrade" return="org.apache.hadoop.mapred.TaskAttemptID"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="old" type="org.apache.hadoop.mapreduce.TaskAttemptID"/>
<doc>
<![CDATA[Downgrade a new TaskAttemptID to an old one
@param old the new id
@return either old or a new TaskAttemptID constructed to match old]]>
</doc>
</method>
<method name="getTaskID" return="org.apache.hadoop.mapred.TaskID"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getJobID" return="org.apache.hadoop.mapred.JobID"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="read" return="org.apache.hadoop.mapred.TaskAttemptID"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type="java.io.DataInput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="forName" return="org.apache.hadoop.mapred.TaskAttemptID"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="str" type="java.lang.String"/>
<exception name="IllegalArgumentException" type="java.lang.IllegalArgumentException"/>
<doc>
<![CDATA[Construct a TaskAttemptID object from given string
@return constructed TaskAttemptID object or null if the given String is null
@throws IllegalArgumentException if the given string is malformed]]>
</doc>
</method>
<method name="getTaskAttemptIDsPattern" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="jtIdentifier" type="java.lang.String"/>
<param name="jobId" type="java.lang.Integer"/>
<param name="isMap" type="java.lang.Boolean"/>
<param name="taskId" type="java.lang.Integer"/>
<param name="attemptId" type="java.lang.Integer"/>
<doc>
<![CDATA[Returns a regex pattern which matches task attempt IDs. Arguments can
be given null, in which case that part of the regex will be generic.
For example to obtain a regex matching <i>all task attempt IDs</i>
of <i>any jobtracker</i>, in <i>any job</i>, of the <i>first
map task</i>, we would use :
<pre>
TaskAttemptID.getTaskAttemptIDsPattern(null, null, true, 1, null);
</pre>
which will return :
<pre> "attempt_[^_]*_[0-9]*_m_000001_[0-9]*" </pre>
@param jtIdentifier jobTracker identifier, or null
@param jobId job number, or null
@param isMap whether the tip is a map, or null
@param taskId taskId number, or null
@param attemptId the task attempt number, or null
@return a regex pattern matching TaskAttemptIDs]]>
</doc>
</method>
<method name="getTaskAttemptIDsPattern" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="jtIdentifier" type="java.lang.String"/>
<param name="jobId" type="java.lang.Integer"/>
<param name="type" type="org.apache.hadoop.mapreduce.TaskType"/>
<param name="taskId" type="java.lang.Integer"/>
<param name="attemptId" type="java.lang.Integer"/>
<doc>
<![CDATA[Returns a regex pattern which matches task attempt IDs. Arguments can
be given null, in which case that part of the regex will be generic.
For example to obtain a regex matching <i>all task attempt IDs</i>
of <i>any jobtracker</i>, in <i>any job</i>, of the <i>first
map task</i>, we would use :
<pre>
TaskAttemptID.getTaskAttemptIDsPattern(null, null, TaskType.MAP, 1, null);
</pre>
which will return :
<pre> "attempt_[^_]*_[0-9]*_m_000001_[0-9]*" </pre>
@param jtIdentifier jobTracker identifier, or null
@param jobId job number, or null
@param type the {@link TaskType}
@param taskId taskId number, or null
@param attemptId the task attempt number, or null
@return a regex pattern matching TaskAttemptIDs]]>
</doc>
</method>
<doc>
<![CDATA[TaskAttemptID represents the immutable and unique identifier for
a task attempt. Each task attempt is one particular instance of a Map or
Reduce Task identified by its TaskID.
TaskAttemptID consists of 2 parts. First part is the
{@link TaskID}, that this TaskAttemptID belongs to.
Second part is the task attempt number. <br>
An example TaskAttemptID is :
<code>attempt_200707121733_0003_m_000005_0</code> , which represents the
zeroth task attempt for the fifth map task in the third job
running at the jobtracker started at <code>200707121733</code>.
<p>
Applications should never construct or parse TaskAttemptID strings
, but rather use appropriate constructors or {@link #forName(String)}
method.
@see JobID
@see TaskID]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.TaskAttemptID -->
<!-- start class org.apache.hadoop.mapred.TaskCompletionEvent -->
<class name="TaskCompletionEvent" extends="org.apache.hadoop.mapreduce.TaskCompletionEvent"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="TaskCompletionEvent"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Default constructor for Writable.]]>
</doc>
</constructor>
<constructor name="TaskCompletionEvent" type="int, org.apache.hadoop.mapred.TaskAttemptID, int, boolean, org.apache.hadoop.mapred.TaskCompletionEvent.Status, java.lang.String"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Constructor. eventId should be created externally and incremented
per event for each job.
@param eventId event id, event id should be unique and assigned in
incrementally, starting from 0.
@param taskId task id
@param status task's status
@param taskTrackerHttp task tracker's host:port for http.]]>
</doc>
</constructor>
<method name="getTaskId" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="use {@link #getTaskAttemptId()} instead.">
<doc>
<![CDATA[Returns task id.
@return task id
@deprecated use {@link #getTaskAttemptId()} instead.]]>
</doc>
</method>
<method name="getTaskAttemptId" return="org.apache.hadoop.mapred.TaskAttemptID"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns task id.
@return task id]]>
</doc>
</method>
<method name="getTaskStatus" return="org.apache.hadoop.mapred.TaskCompletionEvent.Status"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns {@link Status}
@return task completion status]]>
</doc>
</method>
<method name="setTaskId"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="use {@link #setTaskAttemptId(TaskAttemptID)} instead.">
<param name="taskId" type="java.lang.String"/>
<doc>
<![CDATA[Sets task id.
@param taskId
@deprecated use {@link #setTaskAttemptId(TaskAttemptID)} instead.]]>
</doc>
</method>
<method name="setTaskID"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="use {@link #setTaskAttemptId(TaskAttemptID)} instead.">
<param name="taskId" type="org.apache.hadoop.mapred.TaskAttemptID"/>
<doc>
<![CDATA[Sets task id.
@param taskId
@deprecated use {@link #setTaskAttemptId(TaskAttemptID)} instead.]]>
</doc>
</method>
<method name="setTaskAttemptId"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="taskId" type="org.apache.hadoop.mapred.TaskAttemptID"/>
<doc>
<![CDATA[Sets task id.
@param taskId]]>
</doc>
</method>
<field name="EMPTY_ARRAY" type="org.apache.hadoop.mapred.TaskCompletionEvent[]"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[This is used to track task completion events on
job tracker.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.TaskCompletionEvent -->
<!-- start class org.apache.hadoop.mapred.TaskCompletionEvent.Status -->
<class name="TaskCompletionEvent.Status" extends="java.lang.Enum"
abstract="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<method name="values" return="org.apache.hadoop.mapred.TaskCompletionEvent.Status[]"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="valueOf" return="org.apache.hadoop.mapred.TaskCompletionEvent.Status"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="name" type="java.lang.String"/>
</method>
</class>
<!-- end class org.apache.hadoop.mapred.TaskCompletionEvent.Status -->
<!-- start class org.apache.hadoop.mapred.TaskID -->
<class name="TaskID" extends="org.apache.hadoop.mapreduce.TaskID"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="TaskID" type="org.apache.hadoop.mapreduce.JobID, boolean, int"
static="false" final="false" visibility="public"
deprecated="Use {@link #TaskID(String, int, TaskType, int)}">
<doc>
<![CDATA[Constructs a TaskID object from given {@link JobID}.
@param jobId JobID that this tip belongs to
@param isMap whether the tip is a map
@param id the tip number
@deprecated Use {@link #TaskID(String, int, TaskType, int)}]]>
</doc>
</constructor>
<constructor name="TaskID" type="java.lang.String, int, boolean, int"
static="false" final="false" visibility="public"
deprecated="Use {@link #TaskID(org.apache.hadoop.mapreduce.JobID, TaskType,
int)}">
<doc>
<![CDATA[Constructs a TaskInProgressId object from given parts.
@param jtIdentifier jobTracker identifier
@param jobId job number
@param isMap whether the tip is a map
@param id the tip number
@deprecated Use {@link #TaskID(org.apache.hadoop.mapreduce.JobID, TaskType,
int)}]]>
</doc>
</constructor>
<constructor name="TaskID" type="org.apache.hadoop.mapreduce.JobID, org.apache.hadoop.mapreduce.TaskType, int"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Constructs a TaskID object from given {@link JobID}.
@param jobId JobID that this tip belongs to
@param type the {@link TaskType}
@param id the tip number]]>
</doc>
</constructor>
<constructor name="TaskID" type="java.lang.String, int, org.apache.hadoop.mapreduce.TaskType, int"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Constructs a TaskInProgressId object from given parts.
@param jtIdentifier jobTracker identifier
@param jobId job number
@param type the {@link TaskType}
@param id the tip number]]>
</doc>
</constructor>
<constructor name="TaskID"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="downgrade" return="org.apache.hadoop.mapred.TaskID"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="old" type="org.apache.hadoop.mapreduce.TaskID"/>
<doc>
<![CDATA[Downgrade a new TaskID to an old one
@param old a new or old TaskID
@return either old or a new TaskID build to match old]]>
</doc>
</method>
<method name="read" return="org.apache.hadoop.mapred.TaskID"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type="java.io.DataInput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getJobID" return="org.apache.hadoop.mapred.JobID"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getTaskIDsPattern" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="Use {@link TaskID#getTaskIDsPattern(String, Integer, TaskType,
Integer)}">
<param name="jtIdentifier" type="java.lang.String"/>
<param name="jobId" type="java.lang.Integer"/>
<param name="isMap" type="java.lang.Boolean"/>
<param name="taskId" type="java.lang.Integer"/>
<doc>
<![CDATA[Returns a regex pattern which matches task IDs. Arguments can
be given null, in which case that part of the regex will be generic.
For example to obtain a regex matching <i>the first map task</i>
of <i>any jobtracker</i>, of <i>any job</i>, we would use :
<pre>
TaskID.getTaskIDsPattern(null, null, true, 1);
</pre>
which will return :
<pre> "task_[^_]*_[0-9]*_m_000001*" </pre>
@param jtIdentifier jobTracker identifier, or null
@param jobId job number, or null
@param isMap whether the tip is a map, or null
@param taskId taskId number, or null
@return a regex pattern matching TaskIDs
@deprecated Use {@link TaskID#getTaskIDsPattern(String, Integer, TaskType,
Integer)}]]>
</doc>
</method>
<method name="getTaskIDsPattern" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="jtIdentifier" type="java.lang.String"/>
<param name="jobId" type="java.lang.Integer"/>
<param name="type" type="org.apache.hadoop.mapreduce.TaskType"/>
<param name="taskId" type="java.lang.Integer"/>
<doc>
<![CDATA[Returns a regex pattern which matches task IDs. Arguments can
be given null, in which case that part of the regex will be generic.
For example to obtain a regex matching <i>the first map task</i>
of <i>any jobtracker</i>, of <i>any job</i>, we would use :
<pre>
TaskID.getTaskIDsPattern(null, null, true, 1);
</pre>
which will return :
<pre> "task_[^_]*_[0-9]*_m_000001*" </pre>
@param jtIdentifier jobTracker identifier, or null
@param jobId job number, or null
@param type the {@link TaskType}, or null
@param taskId taskId number, or null
@return a regex pattern matching TaskIDs]]>
</doc>
</method>
<method name="forName" return="org.apache.hadoop.mapred.TaskID"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="str" type="java.lang.String"/>
<exception name="IllegalArgumentException" type="java.lang.IllegalArgumentException"/>
</method>
<doc>
<![CDATA[TaskID represents the immutable and unique identifier for
a Map or Reduce Task. Each TaskID encompasses multiple attempts made to
execute the Map or Reduce Task, each of which are uniquely indentified by
their TaskAttemptID.
TaskID consists of 3 parts. First part is the {@link JobID}, that this
TaskInProgress belongs to. Second part of the TaskID is either 'm' or 'r'
representing whether the task is a map task or a reduce task.
And the third part is the task number. <br>
An example TaskID is :
<code>task_200707121733_0003_m_000005</code> , which represents the
fifth map task in the third job running at the jobtracker
started at <code>200707121733</code>.
<p>
Applications should never construct or parse TaskID strings
, but rather use appropriate constructors or {@link #forName(String)}
method.
@see JobID
@see TaskAttemptID]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.TaskID -->
<!-- start class org.apache.hadoop.mapred.TaskReport -->
<class name="TaskReport" extends="org.apache.hadoop.mapreduce.TaskReport"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="TaskReport"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getTaskId" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[The string of the task id.]]>
</doc>
</method>
<method name="getTaskID" return="org.apache.hadoop.mapred.TaskID"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[The id of the task.]]>
</doc>
</method>
<method name="getCounters" return="org.apache.hadoop.mapred.Counters"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="setSuccessfulAttempt"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="t" type="org.apache.hadoop.mapred.TaskAttemptID"/>
<doc>
<![CDATA[set successful attempt ID of the task.]]>
</doc>
</method>
<method name="getSuccessfulTaskAttempt" return="org.apache.hadoop.mapred.TaskAttemptID"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the attempt ID that took this task to completion]]>
</doc>
</method>
<method name="setRunningTaskAttempts"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="runningAttempts" type="java.util.Collection"/>
<doc>
<![CDATA[set running attempt(s) of the task.]]>
</doc>
</method>
<method name="getRunningTaskAttempts" return="java.util.Collection"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the running task attempt IDs for this task]]>
</doc>
</method>
<method name="setFinishTime"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="finishTime" type="long"/>
<doc>
<![CDATA[set finish time of task.
@param finishTime finish time of task.]]>
</doc>
</method>
<method name="setStartTime"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="startTime" type="long"/>
<doc>
<![CDATA[set start time of the task.]]>
</doc>
</method>
<doc>
<![CDATA[A report on the state of a task.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.TaskReport -->
<!-- start class org.apache.hadoop.mapred.TextInputFormat -->
<class name="TextInputFormat" extends="org.apache.hadoop.mapred.FileInputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.JobConfigurable"/>
<constructor name="TextInputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="configure"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
</method>
<method name="isSplitable" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="fs" type="org.apache.hadoop.fs.FileSystem"/>
<param name="file" type="org.apache.hadoop.fs.Path"/>
</method>
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="genericSplit" type="org.apache.hadoop.mapred.InputSplit"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[An {@link InputFormat} for plain text files. Files are broken into lines.
Either linefeed or carriage-return are used to signal end of line. Keys are
the position in the file, and values are the line of text..]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.TextInputFormat -->
<!-- start class org.apache.hadoop.mapred.TextOutputFormat -->
<class name="TextOutputFormat" extends="org.apache.hadoop.mapred.FileOutputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="TextOutputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="name" type="java.lang.String"/>
<param name="progress" type="org.apache.hadoop.util.Progressable"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[An {@link OutputFormat} that writes plain text files.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.TextOutputFormat -->
<!-- start class org.apache.hadoop.mapred.Utils -->
<class name="Utils" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="Utils"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<doc>
<![CDATA[A utility class. It provides
A path filter utility to filter out output/part files in the output dir]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.Utils -->
</package>
<package name="org.apache.hadoop.mapred.jobcontrol">
<!-- start class org.apache.hadoop.mapred.jobcontrol.Job -->
<class name="Job" extends="org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="Job" type="org.apache.hadoop.mapred.JobConf, java.util.ArrayList"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Construct a job.
@param jobConf a mapred job configuration representing a job to be executed.
@param dependingJobs an array of jobs the current job depends on]]>
</doc>
</constructor>
<constructor name="Job" type="org.apache.hadoop.mapred.JobConf"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</constructor>
<method name="getAssignedJobID" return="org.apache.hadoop.mapred.JobID"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the mapred ID of this job as assigned by the mapred framework.]]>
</doc>
</method>
<method name="setAssignedJobID"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="setAssignedJobID should not be called.
JOBID is set by the framework.">
<param name="mapredJobID" type="org.apache.hadoop.mapred.JobID"/>
<doc>
<![CDATA[@deprecated setAssignedJobID should not be called.
JOBID is set by the framework.]]>
</doc>
</method>
<method name="getJobConf" return="org.apache.hadoop.mapred.JobConf"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the mapred job conf of this job]]>
</doc>
</method>
<method name="setJobConf"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobConf" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Set the mapred job conf for this job.
@param jobConf the mapred job conf for this job.]]>
</doc>
</method>
<method name="getState" return="int"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the state of this job]]>
</doc>
</method>
<method name="setState"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="state" type="int"/>
<doc>
<![CDATA[This is a no-op function, Its a behavior change from 1.x We no more can
change the state from job
@param state
the new state for this job.]]>
</doc>
</method>
<method name="addDependingJob" return="boolean"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="dependingJob" type="org.apache.hadoop.mapred.jobcontrol.Job"/>
<doc>
<![CDATA[Add a job to this jobs' dependency list.
Dependent jobs can only be added while a Job
is waiting to run, not during or afterwards.
@param dependingJob Job that this Job depends on.
@return <tt>true</tt> if the Job was added.]]>
</doc>
</method>
<method name="getJobClient" return="org.apache.hadoop.mapred.JobClient"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the job client of this job]]>
</doc>
</method>
<method name="getDependingJobs" return="java.util.ArrayList"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the depending jobs of this job]]>
</doc>
</method>
<method name="getMapredJobID" return="java.lang.String"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the mapred ID of this job as assigned by the mapred framework.]]>
</doc>
</method>
<method name="setMapredJobID"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="mapredJobID" type="java.lang.String"/>
<doc>
<![CDATA[This is no-op method for backward compatibility. It's a behavior change
from 1.x, we can not change job ids from job.
@param mapredJobID
the mapred job ID for this job.]]>
</doc>
</method>
<field name="SUCCESS" type="int"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="WAITING" type="int"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="RUNNING" type="int"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="READY" type="int"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="FAILED" type="int"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="DEPENDENT_FAILED" type="int"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
</class>
<!-- end class org.apache.hadoop.mapred.jobcontrol.Job -->
<!-- start class org.apache.hadoop.mapred.jobcontrol.JobControl -->
<class name="JobControl" extends="org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="JobControl" type="java.lang.String"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Construct a job control for a group of jobs.
@param groupName a name identifying this group]]>
</doc>
</constructor>
<method name="getWaitingJobs" return="java.util.ArrayList"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the jobs in the waiting state]]>
</doc>
</method>
<method name="getRunningJobs" return="java.util.ArrayList"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the jobs in the running state]]>
</doc>
</method>
<method name="getReadyJobs" return="java.util.ArrayList"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the jobs in the ready state]]>
</doc>
</method>
<method name="getSuccessfulJobs" return="java.util.ArrayList"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the jobs in the success state]]>
</doc>
</method>
<method name="getFailedJobs" return="java.util.ArrayList"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="addJobs"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobs" type="java.util.Collection"/>
<doc>
<![CDATA[Add a collection of jobs
@param jobs]]>
</doc>
</method>
<method name="getState" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the thread state]]>
</doc>
</method>
</class>
<!-- end class org.apache.hadoop.mapred.jobcontrol.JobControl -->
</package>
<package name="org.apache.hadoop.mapred.join">
<!-- start class org.apache.hadoop.mapred.join.ArrayListBackedIterator -->
<class name="ArrayListBackedIterator" extends="org.apache.hadoop.mapreduce.lib.join.ArrayListBackedIterator"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.join.ResetableIterator"/>
<constructor name="ArrayListBackedIterator"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<constructor name="ArrayListBackedIterator" type="java.util.ArrayList"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<doc>
<![CDATA[This class provides an implementation of ResetableIterator. The
implementation uses an {@link java.util.ArrayList} to store elements
added to it, replaying them as requested.
Prefer {@link StreamBackedIterator}.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.join.ArrayListBackedIterator -->
<!-- start interface org.apache.hadoop.mapred.join.ComposableInputFormat -->
<interface name="ComposableInputFormat" abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.InputFormat"/>
<method name="getRecordReader" return="org.apache.hadoop.mapred.join.ComposableRecordReader"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapred.InputSplit"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[Refinement of InputFormat requiring implementors to provide
ComposableRecordReader instead of RecordReader.]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapred.join.ComposableInputFormat -->
<!-- start interface org.apache.hadoop.mapred.join.ComposableRecordReader -->
<interface name="ComposableRecordReader" abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.RecordReader"/>
<implements name="java.lang.Comparable"/>
<method name="id" return="int"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Return the position in the collector this class occupies.]]>
</doc>
</method>
<method name="key" return="K"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Return the key this RecordReader would supply on a call to next(K,V)]]>
</doc>
</method>
<method name="key"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="K"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Clone the key at the head of this RecordReader into the object provided.]]>
</doc>
</method>
<method name="hasNext" return="boolean"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns true if the stream is not empty, but provides no guarantee that
a call to next(K,V) will succeed.]]>
</doc>
</method>
<method name="skip"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="K"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Skip key-value pairs with keys less than or equal to the key provided.]]>
</doc>
</method>
<method name="accept"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jc" type="org.apache.hadoop.mapred.join.CompositeRecordReader.JoinCollector"/>
<param name="key" type="K"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[While key-value pairs from this RecordReader match the given key, register
them with the JoinCollector provided.]]>
</doc>
</method>
<doc>
<![CDATA[Additional operations required of a RecordReader to participate in a join.]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapred.join.ComposableRecordReader -->
<!-- start class org.apache.hadoop.mapred.join.CompositeInputFormat -->
<class name="CompositeInputFormat" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.join.ComposableInputFormat"/>
<constructor name="CompositeInputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="setFormat"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Interpret a given string as a composite expression.
{@code
func ::= <ident>([<func>,]*<func>)
func ::= tbl(<class>,"<path>")
class ::= @see java.lang.Class#forName(java.lang.String)
path ::= @see org.apache.hadoop.fs.Path#Path(java.lang.String)
}
Reads expression from the <tt>mapred.join.expr</tt> property and
user-supplied join types from <tt>mapred.join.define.&lt;ident&gt;</tt>
types. Paths supplied to <tt>tbl</tt> are given as input paths to the
InputFormat class listed.
@see #compose(java.lang.String, java.lang.Class, java.lang.String...)]]>
</doc>
</method>
<method name="addDefaults"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<doc>
<![CDATA[Adds the default set of identifiers to the parser.]]>
</doc>
</method>
<method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="numSplits" type="int"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Build a CompositeInputSplit from the child InputFormats by assigning the
ith split from each child to the ith composite split.]]>
</doc>
</method>
<method name="getRecordReader" return="org.apache.hadoop.mapred.join.ComposableRecordReader"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapred.InputSplit"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Construct a CompositeRecordReader for the children of this InputFormat
as defined in the init expression.
The outermost join need only be composable, not necessarily a composite.
Mandating TupleWritable isn't strictly correct.]]>
</doc>
</method>
<method name="compose" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="inf" type="java.lang.Class"/>
<param name="path" type="java.lang.String"/>
<doc>
<![CDATA[Convenience method for constructing composite formats.
Given InputFormat class (inf), path (p) return:
{@code tbl(<inf>, <p>) }]]>
</doc>
</method>
<method name="compose" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="op" type="java.lang.String"/>
<param name="inf" type="java.lang.Class"/>
<param name="path" type="java.lang.String[]"/>
<doc>
<![CDATA[Convenience method for constructing composite formats.
Given operation (op), Object class (inf), set of paths (p) return:
{@code <op>(tbl(<inf>,<p1>),tbl(<inf>,<p2>),...,tbl(<inf>,<pn>)) }]]>
</doc>
</method>
<method name="compose" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="op" type="java.lang.String"/>
<param name="inf" type="java.lang.Class"/>
<param name="path" type="org.apache.hadoop.fs.Path[]"/>
<doc>
<![CDATA[Convenience method for constructing composite formats.
Given operation (op), Object class (inf), set of paths (p) return:
{@code <op>(tbl(<inf>,<p1>),tbl(<inf>,<p2>),...,tbl(<inf>,<pn>)) }]]>
</doc>
</method>
<doc>
<![CDATA[An InputFormat capable of performing joins over a set of data sources sorted
and partitioned the same way.
A user may define new join types by setting the property
<tt>mapred.join.define.&lt;ident&gt;</tt> to a classname. In the expression
<tt>mapred.join.expr</tt>, the identifier will be assumed to be a
ComposableRecordReader.
<tt>mapred.join.keycomparator</tt> can be a classname used to compare keys
in the join.
@see #setFormat
@see JoinRecordReader
@see MultiFilterRecordReader]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.join.CompositeInputFormat -->
<!-- start class org.apache.hadoop.mapred.join.CompositeInputSplit -->
<class name="CompositeInputSplit" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.InputSplit"/>
<constructor name="CompositeInputSplit"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<constructor name="CompositeInputSplit" type="int"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="add"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="s" type="org.apache.hadoop.mapred.InputSplit"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Add an InputSplit to this collection.
@throws IOException If capacity was not specified during construction
or if capacity has been reached.]]>
</doc>
</method>
<method name="get" return="org.apache.hadoop.mapred.InputSplit"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="i" type="int"/>
<doc>
<![CDATA[Get ith child InputSplit.]]>
</doc>
</method>
<method name="getLength" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Return the aggregate length of all child InputSplits currently added.]]>
</doc>
</method>
<method name="getLength" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="i" type="int"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get the length of ith child InputSplit.]]>
</doc>
</method>
<method name="getLocations" return="java.lang.String[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Collect a set of hosts from all child InputSplits.]]>
</doc>
</method>
<method name="getLocation" return="java.lang.String[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="i" type="int"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[getLocations from ith InputSplit.]]>
</doc>
</method>
<method name="write"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="out" type="java.io.DataOutput"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Write splits in the following format.
{@code
<count><class1><class2>...<classn><split1><split2>...<splitn>
}]]>
</doc>
</method>
<method name="readFields"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type="java.io.DataInput"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[{@inheritDoc}
@throws IOException If the child InputSplit cannot be read, typically
for faliing access checks.]]>
</doc>
</method>
<doc>
<![CDATA[This InputSplit contains a set of child InputSplits. Any InputSplit inserted
into this collection must have a public default constructor.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.join.CompositeInputSplit -->
<!-- start class org.apache.hadoop.mapred.join.CompositeRecordReader -->
<class name="CompositeRecordReader" extends="java.lang.Object"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.conf.Configurable"/>
<constructor name="CompositeRecordReader" type="int, int, java.lang.Class"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Create a RecordReader with <tt>capacity</tt> children to position
<tt>id</tt> in the parent reader.
The id of a root CompositeRecordReader is -1 by convention, but relying
on this is not recommended.]]>
</doc>
</constructor>
<method name="combine" return="boolean"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="srcs" type="java.lang.Object[]"/>
<param name="value" type="org.apache.hadoop.mapred.join.TupleWritable"/>
</method>
<method name="id" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Return the position in the collector this class occupies.]]>
</doc>
</method>
<method name="setConf"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<method name="getConf" return="org.apache.hadoop.conf.Configuration"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<method name="getRecordReaderQueue" return="java.util.PriorityQueue"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<doc>
<![CDATA[Return sorted list of RecordReaders for this composite.]]>
</doc>
</method>
<method name="getComparator" return="org.apache.hadoop.io.WritableComparator"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<doc>
<![CDATA[Return comparator defining the ordering for RecordReaders in this
composite.]]>
</doc>
</method>
<method name="add"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="rr" type="org.apache.hadoop.mapred.join.ComposableRecordReader"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Add a RecordReader to this collection.
The id() of a RecordReader determines where in the Tuple its
entry will appear. Adding RecordReaders with the same id has
undefined behavior.]]>
</doc>
</method>
<method name="key" return="K"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Return the key for the current join or the value at the top of the
RecordReader heap.]]>
</doc>
</method>
<method name="key"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="K"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Clone the key at the top of this RR into the given object.]]>
</doc>
</method>
<method name="hasNext" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Return true if it is possible that this could emit more values.]]>
</doc>
</method>
<method name="skip"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="K"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Pass skip key to child RRs.]]>
</doc>
</method>
<method name="getDelegate" return="org.apache.hadoop.mapred.join.ResetableIterator"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<doc>
<![CDATA[Obtain an iterator over the child RRs apropos of the value type
ultimately emitted from this join.]]>
</doc>
</method>
<method name="accept"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jc" type="org.apache.hadoop.mapred.join.CompositeRecordReader.JoinCollector"/>
<param name="key" type="K"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[If key provided matches that of this Composite, give JoinCollector
iterator over values it may emit.]]>
</doc>
</method>
<method name="fillJoinCollector"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="iterkey" type="K"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[For all child RRs offering the key provided, obtain an iterator
at that position in the JoinCollector.]]>
</doc>
</method>
<method name="compareTo" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="other" type="org.apache.hadoop.mapred.join.ComposableRecordReader"/>
<doc>
<![CDATA[Implement Comparable contract (compare key of join or head of heap
with that of another).]]>
</doc>
</method>
<method name="createKey" return="K"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Create a new key value common to all child RRs.
@throws ClassCastException if key classes differ.]]>
</doc>
</method>
<method name="createInternalValue" return="org.apache.hadoop.mapred.join.TupleWritable"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<doc>
<![CDATA[Create a value to be used internally for joins.]]>
</doc>
</method>
<method name="getPos" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Unsupported (returns zero in all cases).]]>
</doc>
</method>
<method name="close"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Close all child RRs.]]>
</doc>
</method>
<method name="getProgress" return="float"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Report progress as the minimum of all child RR progress.]]>
</doc>
</method>
<field name="jc" type="org.apache.hadoop.mapred.join.CompositeRecordReader.JoinCollector"
transient="false" volatile="false"
static="false" final="true" visibility="protected"
deprecated="not deprecated">
</field>
<field name="kids" type="org.apache.hadoop.mapred.join.ComposableRecordReader[]"
transient="false" volatile="false"
static="false" final="true" visibility="protected"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[A RecordReader that can effect joins of RecordReaders sharing a common key
type and partitioning.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.join.CompositeRecordReader -->
<!-- start class org.apache.hadoop.mapred.join.InnerJoinRecordReader -->
<class name="InnerJoinRecordReader" extends="org.apache.hadoop.mapred.join.JoinRecordReader"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<method name="combine" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="srcs" type="java.lang.Object[]"/>
<param name="dst" type="org.apache.hadoop.mapred.join.TupleWritable"/>
<doc>
<![CDATA[Return true iff the tuple is full (all data sources contain this key).]]>
</doc>
</method>
<doc>
<![CDATA[Full inner join.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.join.InnerJoinRecordReader -->
<!-- start class org.apache.hadoop.mapred.join.JoinRecordReader -->
<class name="JoinRecordReader" extends="org.apache.hadoop.mapred.join.CompositeRecordReader"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.join.ComposableRecordReader"/>
<constructor name="JoinRecordReader" type="int, org.apache.hadoop.mapred.JobConf, int, java.lang.Class"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</constructor>
<method name="next" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="K"/>
<param name="value" type="org.apache.hadoop.mapred.join.TupleWritable"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Emit the next set of key, value pairs as defined by the child
RecordReaders and operation associated with this composite RR.]]>
</doc>
</method>
<method name="createValue" return="org.apache.hadoop.mapred.join.TupleWritable"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<method name="getDelegate" return="org.apache.hadoop.mapred.join.ResetableIterator"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<doc>
<![CDATA[Return an iterator wrapping the JoinCollector.]]>
</doc>
</method>
<doc>
<![CDATA[Base class for Composite joins returning Tuples of arbitrary Writables.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.join.JoinRecordReader -->
<!-- start class org.apache.hadoop.mapred.join.MultiFilterRecordReader -->
<class name="MultiFilterRecordReader" extends="org.apache.hadoop.mapred.join.CompositeRecordReader"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.join.ComposableRecordReader"/>
<constructor name="MultiFilterRecordReader" type="int, org.apache.hadoop.mapred.JobConf, int, java.lang.Class"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</constructor>
<method name="emit" return="V"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="dst" type="org.apache.hadoop.mapred.join.TupleWritable"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[For each tuple emitted, return a value (typically one of the values
in the tuple).
Modifying the Writables in the tuple is permitted and unlikely to affect
join behavior in most cases, but it is not recommended. It's safer to
clone first.]]>
</doc>
</method>
<method name="combine" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="srcs" type="java.lang.Object[]"/>
<param name="dst" type="org.apache.hadoop.mapred.join.TupleWritable"/>
<doc>
<![CDATA[Default implementation offers {@link #emit} every Tuple from the
collector (the outer join of child RRs).]]>
</doc>
</method>
<method name="next" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="K"/>
<param name="value" type="V"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<method name="createValue" return="V"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<method name="getDelegate" return="org.apache.hadoop.mapred.join.ResetableIterator"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<doc>
<![CDATA[Return an iterator returning a single value from the tuple.
@see MultiFilterDelegationIterator]]>
</doc>
</method>
<doc>
<![CDATA[Base class for Composite join returning values derived from multiple
sources, but generally not tuples.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.join.MultiFilterRecordReader -->
<!-- start class org.apache.hadoop.mapred.join.OuterJoinRecordReader -->
<class name="OuterJoinRecordReader" extends="org.apache.hadoop.mapred.join.JoinRecordReader"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<method name="combine" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="srcs" type="java.lang.Object[]"/>
<param name="dst" type="org.apache.hadoop.mapred.join.TupleWritable"/>
<doc>
<![CDATA[Emit everything from the collector.]]>
</doc>
</method>
<doc>
<![CDATA[Full outer join.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.join.OuterJoinRecordReader -->
<!-- start class org.apache.hadoop.mapred.join.OverrideRecordReader -->
<class name="OverrideRecordReader" extends="org.apache.hadoop.mapred.join.MultiFilterRecordReader"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<method name="emit" return="V"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="dst" type="org.apache.hadoop.mapred.join.TupleWritable"/>
<doc>
<![CDATA[Emit the value with the highest position in the tuple.]]>
</doc>
</method>
<method name="fillJoinCollector"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="iterkey" type="K"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Instead of filling the JoinCollector with iterators from all
data sources, fill only the rightmost for this key.
This not only saves space by discarding the other sources, but
it also emits the number of key-value pairs in the preferred
RecordReader instead of repeating that stream n times, where
n is the cardinality of the cross product of the discarded
streams for the given key.]]>
</doc>
</method>
<doc>
<![CDATA[Prefer the &quot;rightmost&quot; data source for this key.
For example, <tt>override(S1,S2,S3)</tt> will prefer values
from S3 over S2, and values from S2 over S1 for all keys
emitted from all sources.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.join.OverrideRecordReader -->
<!-- start class org.apache.hadoop.mapred.join.Parser -->
<class name="Parser" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="Parser"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<doc>
<![CDATA[Very simple shift-reduce parser for join expressions.
This should be sufficient for the user extension permitted now, but ought to
be replaced with a parser generator if more complex grammars are supported.
In particular, this &quot;shift-reduce&quot; parser has no states. Each set
of formals requires a different internal node type, which is responsible for
interpreting the list of tokens it receives. This is sufficient for the
current grammar, but it has several annoying properties that might inhibit
extension. In particular, parenthesis are always function calls; an
algebraic or filter grammar would not only require a node type, but must
also work around the internals of this parser.
For most other cases, adding classes to the hierarchy- particularly by
extending JoinRecordReader and MultiFilterRecordReader- is fairly
straightforward. One need only override the relevant method(s) (usually only
{@link CompositeRecordReader#combine}) and include a property to map its
value to an identifier in the parser.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.join.Parser -->
<!-- start class org.apache.hadoop.mapred.join.Parser.Node -->
<class name="Parser.Node" extends="java.lang.Object"
abstract="true"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.join.ComposableInputFormat"/>
<constructor name="Node" type="java.lang.String"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</constructor>
<method name="addIdentifier"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="protected"
deprecated="not deprecated">
<param name="ident" type="java.lang.String"/>
<param name="mcstrSig" type="java.lang.Class[]"/>
<param name="nodetype" type="java.lang.Class"/>
<param name="cl" type="java.lang.Class"/>
<exception name="NoSuchMethodException" type="java.lang.NoSuchMethodException"/>
<doc>
<![CDATA[For a given identifier, add a mapping to the nodetype for the parse
tree and to the ComposableRecordReader to be created, including the
formals required to invoke the constructor.
The nodetype and constructor signature should be filled in from the
child node.]]>
</doc>
</method>
<method name="setID"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="id" type="int"/>
</method>
<method name="setKeyComparator"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="cmpcl" type="java.lang.Class"/>
</method>
<field name="rrCstrMap" type="java.util.Map"
transient="false" volatile="false"
static="true" final="true" visibility="protected"
deprecated="not deprecated">
</field>
<field name="id" type="int"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<field name="ident" type="java.lang.String"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<field name="cmpcl" type="java.lang.Class"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
</class>
<!-- end class org.apache.hadoop.mapred.join.Parser.Node -->
<!-- start class org.apache.hadoop.mapred.join.Parser.NodeToken -->
<class name="Parser.NodeToken" extends="org.apache.hadoop.mapred.join.Parser.Token"
abstract="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<method name="getNode" return="org.apache.hadoop.mapred.join.Parser.Node"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
</class>
<!-- end class org.apache.hadoop.mapred.join.Parser.NodeToken -->
<!-- start class org.apache.hadoop.mapred.join.Parser.NumToken -->
<class name="Parser.NumToken" extends="org.apache.hadoop.mapred.join.Parser.Token"
abstract="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="NumToken" type="double"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getNum" return="double"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
</class>
<!-- end class org.apache.hadoop.mapred.join.Parser.NumToken -->
<!-- start class org.apache.hadoop.mapred.join.Parser.StrToken -->
<class name="Parser.StrToken" extends="org.apache.hadoop.mapred.join.Parser.Token"
abstract="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="StrToken" type="org.apache.hadoop.mapred.join.Parser.TType, java.lang.String"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getStr" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
</class>
<!-- end class org.apache.hadoop.mapred.join.Parser.StrToken -->
<!-- start class org.apache.hadoop.mapred.join.Parser.Token -->
<class name="Parser.Token" extends="java.lang.Object"
abstract="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<method name="getType" return="org.apache.hadoop.mapred.join.Parser.TType"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getNode" return="org.apache.hadoop.mapred.join.Parser.Node"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getNum" return="double"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getStr" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[Tagged-union type for tokens from the join expression.
@see Parser.TType]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.join.Parser.Token -->
<!-- start class org.apache.hadoop.mapred.join.Parser.TType -->
<class name="Parser.TType" extends="java.lang.Enum"
abstract="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<method name="values" return="org.apache.hadoop.mapred.join.Parser.TType[]"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="valueOf" return="org.apache.hadoop.mapred.join.Parser.TType"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="name" type="java.lang.String"/>
</method>
</class>
<!-- end class org.apache.hadoop.mapred.join.Parser.TType -->
<!-- start interface org.apache.hadoop.mapred.join.ResetableIterator -->
<interface name="ResetableIterator" abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapreduce.lib.join.ResetableIterator"/>
<doc>
<![CDATA[This defines an interface to a stateful Iterator that can replay elements
added to it directly.
Note that this does not extend {@link java.util.Iterator}.]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapred.join.ResetableIterator -->
<!-- start class org.apache.hadoop.mapred.join.StreamBackedIterator -->
<class name="StreamBackedIterator" extends="org.apache.hadoop.mapreduce.lib.join.StreamBackedIterator"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.join.ResetableIterator"/>
<constructor name="StreamBackedIterator"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<doc>
<![CDATA[This class provides an implementation of ResetableIterator. This
implementation uses a byte array to store elements added to it.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.join.StreamBackedIterator -->
<!-- start class org.apache.hadoop.mapred.join.TupleWritable -->
<class name="TupleWritable" extends="org.apache.hadoop.mapreduce.lib.join.TupleWritable"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="TupleWritable"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Create an empty tuple with no allocated storage for writables.]]>
</doc>
</constructor>
<constructor name="TupleWritable" type="org.apache.hadoop.io.Writable[]"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Initialize tuple with storage; unknown whether any of them contain
&quot;written&quot; values.]]>
</doc>
</constructor>
<doc>
<![CDATA[Writable type storing multiple {@link org.apache.hadoop.io.Writable}s.
This is *not* a general-purpose tuple type. In almost all cases, users are
encouraged to implement their own serializable types, which can perform
better validation and provide more efficient encodings than this class is
capable. TupleWritable relies on the join framework for type safety and
assumes its instances will rarely be persisted, assumptions not only
incompatible with, but contrary to the general case.
@see org.apache.hadoop.io.Writable]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.join.TupleWritable -->
<!-- start class org.apache.hadoop.mapred.join.WrappedRecordReader -->
<class name="WrappedRecordReader" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.join.ComposableRecordReader"/>
<implements name="org.apache.hadoop.conf.Configurable"/>
<method name="id" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<method name="key" return="K"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Return the key at the head of this RR.]]>
</doc>
</method>
<method name="key"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="qkey" type="K"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Clone the key at the head of this RR into the object supplied.]]>
</doc>
</method>
<method name="hasNext" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Return true if the RR- including the k,v pair stored in this object-
is exhausted.]]>
</doc>
</method>
<method name="skip"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="K"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Skip key-value pairs with keys less than or equal to the key provided.]]>
</doc>
</method>
<method name="next" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Read the next k,v pair into the head of this object; return true iff
the RR and this are exhausted.]]>
</doc>
</method>
<method name="accept"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="i" type="org.apache.hadoop.mapred.join.CompositeRecordReader.JoinCollector"/>
<param name="key" type="K"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Add an iterator to the collector at the position occupied by this
RecordReader over the values in this stream paired with the key
provided (ie register a stream of values from this source matching K
with a collector).]]>
</doc>
</method>
<method name="next" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="K"/>
<param name="value" type="U"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Write key-value pair at the head of this stream to the objects provided;
get next key-value pair from proxied RR.]]>
</doc>
</method>
<method name="createKey" return="K"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Request new key from proxied RR.]]>
</doc>
</method>
<method name="createValue" return="U"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Request new value from proxied RR.]]>
</doc>
</method>
<method name="getProgress" return="float"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Request progress from proxied RR.]]>
</doc>
</method>
<method name="getPos" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Request position from proxied RR.]]>
</doc>
</method>
<method name="close"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Forward close request to proxied RR.]]>
</doc>
</method>
<method name="compareTo" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="other" type="org.apache.hadoop.mapred.join.ComposableRecordReader"/>
<doc>
<![CDATA[Implement Comparable contract (compare key at head of proxied RR
with that of another).]]>
</doc>
</method>
<method name="equals" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="other" type="java.lang.Object"/>
<doc>
<![CDATA[Return true iff compareTo(other) retn true.]]>
</doc>
</method>
<method name="hashCode" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="setConf"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
</method>
<method name="getConf" return="org.apache.hadoop.conf.Configuration"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<doc>
<![CDATA[Proxy class for a RecordReader participating in the join framework.
This class keeps track of the &quot;head&quot; key-value pair for the
provided RecordReader and keeps a store of values matching a key when
this source is participating in a join.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.join.WrappedRecordReader -->
</package>
<package name="org.apache.hadoop.mapred.lib">
<!-- start class org.apache.hadoop.mapred.lib.BinaryPartitioner -->
<class name="BinaryPartitioner" extends="org.apache.hadoop.mapreduce.lib.partition.BinaryPartitioner"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.Partitioner"/>
<constructor name="BinaryPartitioner"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="configure"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
</method>
<doc>
<![CDATA[Partition {@link BinaryComparable} keys using a configurable part of
the bytes array returned by {@link BinaryComparable#getBytes()}.
@see org.apache.hadoop.mapreduce.lib.partition.BinaryPartitioner]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.BinaryPartitioner -->
<!-- start class org.apache.hadoop.mapred.lib.ChainMapper -->
<class name="ChainMapper" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.Mapper"/>
<constructor name="ChainMapper"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Constructor.]]>
</doc>
</constructor>
<method name="addMapper"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="klass" type="java.lang.Class"/>
<param name="inputKeyClass" type="java.lang.Class"/>
<param name="inputValueClass" type="java.lang.Class"/>
<param name="outputKeyClass" type="java.lang.Class"/>
<param name="outputValueClass" type="java.lang.Class"/>
<param name="byValue" type="boolean"/>
<param name="mapperConf" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Adds a Mapper class to the chain job's JobConf.
<p>
It has to be specified how key and values are passed from one element of
the chain to the next, by value or by reference. If a Mapper leverages the
assumed semantics that the key and values are not modified by the collector
'by value' must be used. If the Mapper does not expect this semantics, as
an optimization to avoid serialization and deserialization 'by reference'
can be used.
<p>
For the added Mapper the configuration given for it,
<code>mapperConf</code>, have precedence over the job's JobConf. This
precedence is in effect when the task is running.
<p>
IMPORTANT: There is no need to specify the output key/value classes for the
ChainMapper, this is done by the addMapper for the last mapper in the chain
<p>
@param job job's JobConf to add the Mapper class.
@param klass the Mapper class to add.
@param inputKeyClass mapper input key class.
@param inputValueClass mapper input value class.
@param outputKeyClass mapper output key class.
@param outputValueClass mapper output value class.
@param byValue indicates if key/values should be passed by value
to the next Mapper in the chain, if any.
@param mapperConf a JobConf with the configuration for the Mapper
class. It is recommended to use a JobConf without default values using the
<code>JobConf(boolean loadDefaults)</code> constructor with FALSE.]]>
</doc>
</method>
<method name="configure"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Configures the ChainMapper and all the Mappers in the chain.
<p>
If this method is overriden <code>super.configure(...)</code> should be
invoked at the beginning of the overwriter method.]]>
</doc>
</method>
<method name="map"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Object"/>
<param name="value" type="java.lang.Object"/>
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Chains the <code>map(...)</code> methods of the Mappers in the chain.]]>
</doc>
</method>
<method name="close"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Closes the ChainMapper and all the Mappers in the chain.
<p>
If this method is overriden <code>super.close()</code> should be
invoked at the end of the overwriter method.]]>
</doc>
</method>
<doc>
<![CDATA[The ChainMapper class allows to use multiple Mapper classes within a single
Map task.
<p>
The Mapper classes are invoked in a chained (or piped) fashion, the output of
the first becomes the input of the second, and so on until the last Mapper,
the output of the last Mapper will be written to the task's output.
<p>
The key functionality of this feature is that the Mappers in the chain do not
need to be aware that they are executed in a chain. This enables having
reusable specialized Mappers that can be combined to perform composite
operations within a single task.
<p>
Special care has to be taken when creating chains that the key/values output
by a Mapper are valid for the following Mapper in the chain. It is assumed
all Mappers and the Reduce in the chain use maching output and input key and
value classes as no conversion is done by the chaining code.
<p>
Using the ChainMapper and the ChainReducer classes is possible to compose
Map/Reduce jobs that look like <code>[MAP+ / REDUCE MAP*]</code>. And
immediate benefit of this pattern is a dramatic reduction in disk IO.
<p>
IMPORTANT: There is no need to specify the output key/value classes for the
ChainMapper, this is done by the addMapper for the last mapper in the chain.
<p>
ChainMapper usage pattern:
<p>
<pre>
...
conf.setJobName("chain");
conf.setInputFormat(TextInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);
JobConf mapAConf = new JobConf(false);
...
ChainMapper.addMapper(conf, AMap.class, LongWritable.class, Text.class,
Text.class, Text.class, true, mapAConf);
JobConf mapBConf = new JobConf(false);
...
ChainMapper.addMapper(conf, BMap.class, Text.class, Text.class,
LongWritable.class, Text.class, false, mapBConf);
JobConf reduceConf = new JobConf(false);
...
ChainReducer.setReducer(conf, XReduce.class, LongWritable.class, Text.class,
Text.class, Text.class, true, reduceConf);
ChainReducer.addMapper(conf, CMap.class, Text.class, Text.class,
LongWritable.class, Text.class, false, null);
ChainReducer.addMapper(conf, DMap.class, LongWritable.class, Text.class,
LongWritable.class, LongWritable.class, true, null);
FileInputFormat.setInputPaths(conf, inDir);
FileOutputFormat.setOutputPath(conf, outDir);
...
JobClient jc = new JobClient(conf);
RunningJob job = jc.submitJob(conf);
...
</pre>]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.ChainMapper -->
<!-- start class org.apache.hadoop.mapred.lib.ChainReducer -->
<class name="ChainReducer" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.Reducer"/>
<constructor name="ChainReducer"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Constructor.]]>
</doc>
</constructor>
<method name="setReducer"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="klass" type="java.lang.Class"/>
<param name="inputKeyClass" type="java.lang.Class"/>
<param name="inputValueClass" type="java.lang.Class"/>
<param name="outputKeyClass" type="java.lang.Class"/>
<param name="outputValueClass" type="java.lang.Class"/>
<param name="byValue" type="boolean"/>
<param name="reducerConf" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Sets the Reducer class to the chain job's JobConf.
<p>
It has to be specified how key and values are passed from one element of
the chain to the next, by value or by reference. If a Reducer leverages the
assumed semantics that the key and values are not modified by the collector
'by value' must be used. If the Reducer does not expect this semantics, as
an optimization to avoid serialization and deserialization 'by reference'
can be used.
<p>
For the added Reducer the configuration given for it,
<code>reducerConf</code>, have precedence over the job's JobConf. This
precedence is in effect when the task is running.
<p>
IMPORTANT: There is no need to specify the output key/value classes for the
ChainReducer, this is done by the setReducer or the addMapper for the last
element in the chain.
@param job job's JobConf to add the Reducer class.
@param klass the Reducer class to add.
@param inputKeyClass reducer input key class.
@param inputValueClass reducer input value class.
@param outputKeyClass reducer output key class.
@param outputValueClass reducer output value class.
@param byValue indicates if key/values should be passed by value
to the next Mapper in the chain, if any.
@param reducerConf a JobConf with the configuration for the Reducer
class. It is recommended to use a JobConf without default values using the
<code>JobConf(boolean loadDefaults)</code> constructor with FALSE.]]>
</doc>
</method>
<method name="addMapper"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="klass" type="java.lang.Class"/>
<param name="inputKeyClass" type="java.lang.Class"/>
<param name="inputValueClass" type="java.lang.Class"/>
<param name="outputKeyClass" type="java.lang.Class"/>
<param name="outputValueClass" type="java.lang.Class"/>
<param name="byValue" type="boolean"/>
<param name="mapperConf" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Adds a Mapper class to the chain job's JobConf.
<p>
It has to be specified how key and values are passed from one element of
the chain to the next, by value or by reference. If a Mapper leverages the
assumed semantics that the key and values are not modified by the collector
'by value' must be used. If the Mapper does not expect this semantics, as
an optimization to avoid serialization and deserialization 'by reference'
can be used.
<p>
For the added Mapper the configuration given for it,
<code>mapperConf</code>, have precedence over the job's JobConf. This
precedence is in effect when the task is running.
<p>
IMPORTANT: There is no need to specify the output key/value classes for the
ChainMapper, this is done by the addMapper for the last mapper in the chain
.
@param job chain job's JobConf to add the Mapper class.
@param klass the Mapper class to add.
@param inputKeyClass mapper input key class.
@param inputValueClass mapper input value class.
@param outputKeyClass mapper output key class.
@param outputValueClass mapper output value class.
@param byValue indicates if key/values should be passed by value
to the next Mapper in the chain, if any.
@param mapperConf a JobConf with the configuration for the Mapper
class. It is recommended to use a JobConf without default values using the
<code>JobConf(boolean loadDefaults)</code> constructor with FALSE.]]>
</doc>
</method>
<method name="configure"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Configures the ChainReducer, the Reducer and all the Mappers in the chain.
<p>
If this method is overriden <code>super.configure(...)</code> should be
invoked at the beginning of the overwriter method.]]>
</doc>
</method>
<method name="reduce"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Object"/>
<param name="values" type="java.util.Iterator"/>
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Chains the <code>reduce(...)</code> method of the Reducer with the
<code>map(...) </code> methods of the Mappers in the chain.]]>
</doc>
</method>
<method name="close"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Closes the ChainReducer, the Reducer and all the Mappers in the chain.
<p>
If this method is overriden <code>super.close()</code> should be
invoked at the end of the overwriter method.]]>
</doc>
</method>
<doc>
<![CDATA[The ChainReducer class allows to chain multiple Mapper classes after a
Reducer within the Reducer task.
<p>
For each record output by the Reducer, the Mapper classes are invoked in a
chained (or piped) fashion, the output of the first becomes the input of the
second, and so on until the last Mapper, the output of the last Mapper will
be written to the task's output.
<p>
The key functionality of this feature is that the Mappers in the chain do not
need to be aware that they are executed after the Reducer or in a chain.
This enables having reusable specialized Mappers that can be combined to
perform composite operations within a single task.
<p>
Special care has to be taken when creating chains that the key/values output
by a Mapper are valid for the following Mapper in the chain. It is assumed
all Mappers and the Reduce in the chain use maching output and input key and
value classes as no conversion is done by the chaining code.
<p>
Using the ChainMapper and the ChainReducer classes is possible to compose
Map/Reduce jobs that look like <code>[MAP+ / REDUCE MAP*]</code>. And
immediate benefit of this pattern is a dramatic reduction in disk IO.
<p>
IMPORTANT: There is no need to specify the output key/value classes for the
ChainReducer, this is done by the setReducer or the addMapper for the last
element in the chain.
<p>
ChainReducer usage pattern:
<p>
<pre>
...
conf.setJobName("chain");
conf.setInputFormat(TextInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);
JobConf mapAConf = new JobConf(false);
...
ChainMapper.addMapper(conf, AMap.class, LongWritable.class, Text.class,
Text.class, Text.class, true, mapAConf);
JobConf mapBConf = new JobConf(false);
...
ChainMapper.addMapper(conf, BMap.class, Text.class, Text.class,
LongWritable.class, Text.class, false, mapBConf);
JobConf reduceConf = new JobConf(false);
...
ChainReducer.setReducer(conf, XReduce.class, LongWritable.class, Text.class,
Text.class, Text.class, true, reduceConf);
ChainReducer.addMapper(conf, CMap.class, Text.class, Text.class,
LongWritable.class, Text.class, false, null);
ChainReducer.addMapper(conf, DMap.class, LongWritable.class, Text.class,
LongWritable.class, LongWritable.class, true, null);
FileInputFormat.setInputPaths(conf, inDir);
FileOutputFormat.setOutputPath(conf, outDir);
...
JobClient jc = new JobClient(conf);
RunningJob job = jc.submitJob(conf);
...
</pre>]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.ChainReducer -->
<!-- start class org.apache.hadoop.mapred.lib.CombineFileInputFormat -->
<class name="CombineFileInputFormat" extends="org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.InputFormat"/>
<constructor name="CombineFileInputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[default constructor]]>
</doc>
</constructor>
<method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="numSplits" type="int"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="createPool"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="Use {@link #createPool(List)}.">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="filters" type="java.util.List"/>
<doc>
<![CDATA[Create a new pool and add the filters to it.
A split cannot have files from different pools.
@deprecated Use {@link #createPool(List)}.]]>
</doc>
</method>
<method name="createPool"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="Use {@link #createPool(PathFilter...)}.">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="filters" type="org.apache.hadoop.fs.PathFilter[]"/>
<doc>
<![CDATA[Create a new pool and add the filters to it.
A pathname can satisfy any one of the specified filters.
A split cannot have files from different pools.
@deprecated Use {@link #createPool(PathFilter...)}.]]>
</doc>
</method>
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapred.InputSplit"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[This is not implemented yet.]]>
</doc>
</method>
<method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="listStatus" return="org.apache.hadoop.fs.FileStatus[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[List input directories.
Subclasses may override to, e.g., select only files matching a regular
expression.
@param job the job to list input paths for
@return array of FileStatus objects
@throws IOException if zero items.]]>
</doc>
</method>
<method name="isSplitable" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="fs" type="org.apache.hadoop.fs.FileSystem"/>
<param name="file" type="org.apache.hadoop.fs.Path"/>
</method>
<doc>
<![CDATA[An abstract {@link org.apache.hadoop.mapred.InputFormat} that returns {@link CombineFileSplit}'s
in {@link org.apache.hadoop.mapred.InputFormat#getSplits(JobConf, int)} method.
Splits are constructed from the files under the input paths.
A split cannot have files from different pools.
Each split returned may contain blocks from different files.
If a maxSplitSize is specified, then blocks on the same node are
combined to form a single split. Blocks that are left over are
then combined with other blocks in the same rack.
If maxSplitSize is not specified, then blocks from the same rack
are combined in a single split; no attempt is made to create
node-local splits.
If the maxSplitSize is equal to the block size, then this class
is similar to the default spliting behaviour in Hadoop: each
block is a locally processed split.
Subclasses implement {@link org.apache.hadoop.mapred.InputFormat#getRecordReader(InputSplit, JobConf, Reporter)}
to construct <code>RecordReader</code>'s for <code>CombineFileSplit</code>'s.
@see CombineFileSplit]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.CombineFileInputFormat -->
<!-- start class org.apache.hadoop.mapred.lib.CombineFileRecordReader -->
<class name="CombineFileRecordReader" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.RecordReader"/>
<constructor name="CombineFileRecordReader" type="org.apache.hadoop.mapred.JobConf, org.apache.hadoop.mapred.lib.CombineFileSplit, org.apache.hadoop.mapred.Reporter, java.lang.Class"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[A generic RecordReader that can hand out different recordReaders
for each chunk in the CombineFileSplit.]]>
</doc>
</constructor>
<method name="next" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="K"/>
<param name="value" type="V"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="createKey" return="K"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="createValue" return="V"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getPos" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[return the amount of data processed]]>
</doc>
</method>
<method name="close"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getProgress" return="float"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[return progress based on the amount of data processed so far.]]>
</doc>
</method>
<method name="initNextRecordReader" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get the record reader for the next chunk in this CombineFileSplit.]]>
</doc>
</method>
<field name="split" type="org.apache.hadoop.mapred.lib.CombineFileSplit"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<field name="jc" type="org.apache.hadoop.mapred.JobConf"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<field name="reporter" type="org.apache.hadoop.mapred.Reporter"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<field name="rrConstructor" type="java.lang.reflect.Constructor"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<field name="idx" type="int"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<field name="progress" type="long"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<field name="curReader" type="org.apache.hadoop.mapred.RecordReader"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[A generic RecordReader that can hand out different recordReaders
for each chunk in a {@link CombineFileSplit}.
A CombineFileSplit can combine data chunks from multiple files.
This class allows using different RecordReaders for processing
these data chunks from different files.
@see CombineFileSplit]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.CombineFileRecordReader -->
<!-- start class org.apache.hadoop.mapred.lib.CombineFileRecordReaderWrapper -->
<class name="CombineFileRecordReaderWrapper" extends="java.lang.Object"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.RecordReader"/>
<constructor name="CombineFileRecordReaderWrapper" type="org.apache.hadoop.mapred.FileInputFormat, org.apache.hadoop.mapred.lib.CombineFileSplit, org.apache.hadoop.conf.Configuration, org.apache.hadoop.mapred.Reporter, java.lang.Integer"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</constructor>
<method name="next" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="K"/>
<param name="value" type="V"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="createKey" return="K"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="createValue" return="V"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getPos" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="close"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getProgress" return="float"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[A wrapper class for a record reader that handles a single file split. It
delegates most of the methods to the wrapped instance. A concrete subclass
needs to provide a constructor that calls this parent constructor with the
appropriate input format. The subclass constructor must satisfy the specific
constructor signature that is required by
<code>CombineFileRecordReader</code>.
Subclassing is needed to get a concrete record reader wrapper because of the
constructor requirement.
@see CombineFileRecordReader
@see CombineFileInputFormat]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.CombineFileRecordReaderWrapper -->
<!-- start class org.apache.hadoop.mapred.lib.CombineFileSplit -->
<class name="CombineFileSplit" extends="org.apache.hadoop.mapreduce.lib.input.CombineFileSplit"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.InputSplit"/>
<constructor name="CombineFileSplit"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<constructor name="CombineFileSplit" type="org.apache.hadoop.mapred.JobConf, org.apache.hadoop.fs.Path[], long[], long[], java.lang.String[]"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<constructor name="CombineFileSplit" type="org.apache.hadoop.mapred.JobConf, org.apache.hadoop.fs.Path[], long[]"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<constructor name="CombineFileSplit" type="org.apache.hadoop.mapred.lib.CombineFileSplit"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Copy constructor]]>
</doc>
</constructor>
<method name="getJob" return="org.apache.hadoop.mapred.JobConf"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
</class>
<!-- end class org.apache.hadoop.mapred.lib.CombineFileSplit -->
<!-- start class org.apache.hadoop.mapred.lib.CombineSequenceFileInputFormat -->
<class name="CombineSequenceFileInputFormat" extends="org.apache.hadoop.mapred.lib.CombineFileInputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="CombineSequenceFileInputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapred.InputSplit"/>
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[Input format that is a <code>CombineFileInputFormat</code>-equivalent for
<code>SequenceFileInputFormat</code>.
@see CombineFileInputFormat]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.CombineSequenceFileInputFormat -->
<!-- start class org.apache.hadoop.mapred.lib.CombineTextInputFormat -->
<class name="CombineTextInputFormat" extends="org.apache.hadoop.mapred.lib.CombineFileInputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="CombineTextInputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapred.InputSplit"/>
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[Input format that is a <code>CombineFileInputFormat</code>-equivalent for
<code>TextInputFormat</code>.
@see CombineFileInputFormat]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.CombineTextInputFormat -->
<!-- start class org.apache.hadoop.mapred.lib.FieldSelectionMapReduce -->
<class name="FieldSelectionMapReduce" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.Mapper"/>
<implements name="org.apache.hadoop.mapred.Reducer"/>
<constructor name="FieldSelectionMapReduce"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="map"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="K"/>
<param name="val" type="V"/>
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[The identify function. Input key/value pair is written directly to output.]]>
</doc>
</method>
<method name="configure"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
</method>
<method name="close"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="reduce"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="org.apache.hadoop.io.Text"/>
<param name="values" type="java.util.Iterator"/>
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<field name="LOG" type="org.slf4j.Logger"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[This class implements a mapper/reducer class that can be used to perform
field selections in a manner similar to unix cut. The input data is treated
as fields separated by a user specified separator (the default value is
"\t"). The user can specify a list of fields that form the map output keys,
and a list of fields that form the map output values. If the inputformat is
TextInputFormat, the mapper will ignore the key to the map function. and the
fields are from the value only. Otherwise, the fields are the union of those
from the key and those from the value.
The field separator is under attribute "mapreduce.fieldsel.data.field.separator"
The map output field list spec is under attribute
"mapreduce.fieldsel.map.output.key.value.fields.spec".
The value is expected to be like "keyFieldsSpec:valueFieldsSpec"
key/valueFieldsSpec are comma (,) separated field spec: fieldSpec,fieldSpec,fieldSpec ...
Each field spec can be a simple number (e.g. 5) specifying a specific field, or a range
(like 2-5) to specify a range of fields, or an open range (like 3-) specifying all
the fields starting from field 3. The open range field spec applies value fields only.
They have no effect on the key fields.
Here is an example: "4,3,0,1:6,5,1-3,7-". It specifies to use fields 4,3,0 and 1 for keys,
and use fields 6,5,1,2,3,7 and above for values.
The reduce output field list spec is under attribute
"mapreduce.fieldsel.reduce.output.key.value.fields.spec".
The reducer extracts output key/value pairs in a similar manner, except that
the key is never ignored.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.FieldSelectionMapReduce -->
<!-- start class org.apache.hadoop.mapred.lib.FilterOutputFormat -->
<class name="FilterOutputFormat" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.OutputFormat"/>
<constructor name="FilterOutputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<constructor name="FilterOutputFormat" type="org.apache.hadoop.mapred.OutputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Create a FilterOutputFormat based on the supplied output format.
@param out the underlying OutputFormat]]>
</doc>
</constructor>
<method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="name" type="java.lang.String"/>
<param name="progress" type="org.apache.hadoop.util.Progressable"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="checkOutputSpecs"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<field name="baseOut" type="org.apache.hadoop.mapred.OutputFormat"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[FilterOutputFormat is a convenience class that wraps OutputFormat.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.FilterOutputFormat -->
<!-- start class org.apache.hadoop.mapred.lib.HashPartitioner -->
<class name="HashPartitioner" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.Partitioner"/>
<constructor name="HashPartitioner"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="configure"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
</method>
<method name="getPartition" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="K2"/>
<param name="value" type="V2"/>
<param name="numReduceTasks" type="int"/>
<doc>
<![CDATA[Use {@link Object#hashCode()} to partition.]]>
</doc>
</method>
<doc>
<![CDATA[Partition keys by their {@link Object#hashCode()}.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.HashPartitioner -->
<!-- start class org.apache.hadoop.mapred.lib.IdentityMapper -->
<class name="IdentityMapper" extends="org.apache.hadoop.mapred.MapReduceBase"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.Mapper"/>
<constructor name="IdentityMapper"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="map"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="K"/>
<param name="val" type="V"/>
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[The identity function. Input key/value pair is written directly to
output.]]>
</doc>
</method>
<doc>
<![CDATA[Implements the identity function, mapping inputs directly to outputs.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.IdentityMapper -->
<!-- start class org.apache.hadoop.mapred.lib.IdentityReducer -->
<class name="IdentityReducer" extends="org.apache.hadoop.mapred.MapReduceBase"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.Reducer"/>
<constructor name="IdentityReducer"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="reduce"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="K"/>
<param name="values" type="java.util.Iterator"/>
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Writes all keys and values directly to output.]]>
</doc>
</method>
<doc>
<![CDATA[Performs no reduction, writing all input values directly to the output.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.IdentityReducer -->
<!-- start class org.apache.hadoop.mapred.lib.InputSampler -->
<class name="InputSampler" extends="org.apache.hadoop.mapreduce.lib.partition.InputSampler"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="InputSampler" type="org.apache.hadoop.mapred.JobConf"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="writePartitionFile"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="sampler" type="org.apache.hadoop.mapred.lib.InputSampler.Sampler"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
</class>
<!-- end class org.apache.hadoop.mapred.lib.InputSampler -->
<!-- start class org.apache.hadoop.mapred.lib.InverseMapper -->
<class name="InverseMapper" extends="org.apache.hadoop.mapred.MapReduceBase"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.Mapper"/>
<constructor name="InverseMapper"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="map"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="K"/>
<param name="value" type="V"/>
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[The inverse function. Input keys and values are swapped.]]>
</doc>
</method>
<doc>
<![CDATA[A {@link Mapper} that swaps keys and values.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.InverseMapper -->
<!-- start class org.apache.hadoop.mapred.lib.KeyFieldBasedComparator -->
<class name="KeyFieldBasedComparator" extends="org.apache.hadoop.mapreduce.lib.partition.KeyFieldBasedComparator"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.JobConfigurable"/>
<constructor name="KeyFieldBasedComparator"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="configure"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
</method>
<doc>
<![CDATA[This comparator implementation provides a subset of the features provided
by the Unix/GNU Sort. In particular, the supported features are:
-n, (Sort numerically)
-r, (Reverse the result of comparison)
-k pos1[,pos2], where pos is of the form f[.c][opts], where f is the number
of the field to use, and c is the number of the first character from the
beginning of the field. Fields and character posns are numbered starting
with 1; a character position of zero in pos2 indicates the field's last
character. If '.c' is omitted from pos1, it defaults to 1 (the beginning
of the field); if omitted from pos2, it defaults to 0 (the end of the
field). opts are ordering options (any of 'nr' as described above).
We assume that the fields in the key are separated by
{@link JobContext#MAP_OUTPUT_KEY_FIELD_SEPARATOR}]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.KeyFieldBasedComparator -->
<!-- start class org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner -->
<class name="KeyFieldBasedPartitioner" extends="org.apache.hadoop.mapreduce.lib.partition.KeyFieldBasedPartitioner"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.Partitioner"/>
<constructor name="KeyFieldBasedPartitioner"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="configure"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
</method>
<doc>
<![CDATA[Defines a way to partition keys based on certain key fields (also see
{@link KeyFieldBasedComparator}.
The key specification supported is of the form -k pos1[,pos2], where,
pos is of the form f[.c][opts], where f is the number
of the key field to use, and c is the number of the first character from
the beginning of the field. Fields and character posns are numbered
starting with 1; a character position of zero in pos2 indicates the
field's last character. If '.c' is omitted from pos1, it defaults to 1
(the beginning of the field); if omitted from pos2, it defaults to 0
(the end of the field).]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner -->
<!-- start class org.apache.hadoop.mapred.lib.LazyOutputFormat -->
<class name="LazyOutputFormat" extends="org.apache.hadoop.mapred.lib.FilterOutputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="LazyOutputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="setOutputFormatClass"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="theClass" type="java.lang.Class"/>
<doc>
<![CDATA[Set the underlying output format for LazyOutputFormat.
@param job the {@link JobConf} to modify
@param theClass the underlying class]]>
</doc>
</method>
<method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="name" type="java.lang.String"/>
<param name="progress" type="org.apache.hadoop.util.Progressable"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="checkOutputSpecs"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[A Convenience class that creates output lazily.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.LazyOutputFormat -->
<!-- start class org.apache.hadoop.mapred.lib.LongSumReducer -->
<class name="LongSumReducer" extends="org.apache.hadoop.mapred.MapReduceBase"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.Reducer"/>
<constructor name="LongSumReducer"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="reduce"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="K"/>
<param name="values" type="java.util.Iterator"/>
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[A {@link Reducer} that sums long values.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.LongSumReducer -->
<!-- start class org.apache.hadoop.mapred.lib.MultipleInputs -->
<class name="MultipleInputs" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="MultipleInputs"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="addInputPath"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="path" type="org.apache.hadoop.fs.Path"/>
<param name="inputFormatClass" type="java.lang.Class"/>
<doc>
<![CDATA[Add a {@link Path} with a custom {@link InputFormat} to the list of
inputs for the map-reduce job.
@param conf The configuration of the job
@param path {@link Path} to be added to the list of inputs for the job
@param inputFormatClass {@link InputFormat} class to use for this path]]>
</doc>
</method>
<method name="addInputPath"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="path" type="org.apache.hadoop.fs.Path"/>
<param name="inputFormatClass" type="java.lang.Class"/>
<param name="mapperClass" type="java.lang.Class"/>
<doc>
<![CDATA[Add a {@link Path} with a custom {@link InputFormat} and
{@link Mapper} to the list of inputs for the map-reduce job.
@param conf The configuration of the job
@param path {@link Path} to be added to the list of inputs for the job
@param inputFormatClass {@link InputFormat} class to use for this path
@param mapperClass {@link Mapper} class to use for this path]]>
</doc>
</method>
<doc>
<![CDATA[This class supports MapReduce jobs that have multiple input paths with
a different {@link InputFormat} and {@link Mapper} for each path]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.MultipleInputs -->
<!-- start class org.apache.hadoop.mapred.lib.MultipleOutputFormat -->
<class name="MultipleOutputFormat" extends="org.apache.hadoop.mapred.FileOutputFormat"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="MultipleOutputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="fs" type="org.apache.hadoop.fs.FileSystem"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="name" type="java.lang.String"/>
<param name="arg3" type="org.apache.hadoop.util.Progressable"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Create a composite record writer that can write key/value data to different
output files
@param fs
the file system to use
@param job
the job conf for the job
@param name
the leaf file name for the output file (such as part-00000")
@param arg3
a progressable for reporting progress.
@return a composite record writer
@throws IOException]]>
</doc>
</method>
<method name="generateLeafFileName" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="name" type="java.lang.String"/>
<doc>
<![CDATA[Generate the leaf name for the output file name. The default behavior does
not change the leaf file name (such as part-00000)
@param name
the leaf file name for the output file
@return the given leaf file name]]>
</doc>
</method>
<method name="generateFileNameForKeyValue" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="key" type="K"/>
<param name="value" type="V"/>
<param name="name" type="java.lang.String"/>
<doc>
<![CDATA[Generate the file output file name based on the given key and the leaf file
name. The default behavior is that the file name does not depend on the
key.
@param key
the key of the output data
@param name
the leaf file name
@return generated file name]]>
</doc>
</method>
<method name="generateActualKey" return="K"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="key" type="K"/>
<param name="value" type="V"/>
<doc>
<![CDATA[Generate the actual key from the given key/value. The default behavior is that
the actual key is equal to the given key
@param key
the key of the output data
@param value
the value of the output data
@return the actual key derived from the given key/value]]>
</doc>
</method>
<method name="generateActualValue" return="V"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="key" type="K"/>
<param name="value" type="V"/>
<doc>
<![CDATA[Generate the actual value from the given key and value. The default behavior is that
the actual value is equal to the given value
@param key
the key of the output data
@param value
the value of the output data
@return the actual value derived from the given key/value]]>
</doc>
</method>
<method name="getInputFileBasedOutputFileName" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="name" type="java.lang.String"/>
<doc>
<![CDATA[Generate the outfile name based on a given anme and the input file name. If
the {@link JobContext#MAP_INPUT_FILE} does not exists (i.e. this is not for a map only job),
the given name is returned unchanged. If the config value for
"num.of.trailing.legs.to.use" is not set, or set 0 or negative, the given
name is returned unchanged. Otherwise, return a file name consisting of the
N trailing legs of the input file name where N is the config value for
"num.of.trailing.legs.to.use".
@param job
the job config
@param name
the output file name
@return the outfile name based on a given anme and the input file name.]]>
</doc>
</method>
<method name="getBaseRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="fs" type="org.apache.hadoop.fs.FileSystem"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="name" type="java.lang.String"/>
<param name="arg3" type="org.apache.hadoop.util.Progressable"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[@param fs
the file system to use
@param job
a job conf object
@param name
the name of the file over which a record writer object will be
constructed
@param arg3
a progressable object
@return A RecordWriter object over the given file
@throws IOException]]>
</doc>
</method>
<doc>
<![CDATA[This abstract class extends the FileOutputFormat, allowing to write the
output data to different output files. There are three basic use cases for
this class.
Case one: This class is used for a map reduce job with at least one reducer.
The reducer wants to write data to different files depending on the actual
keys. It is assumed that a key (or value) encodes the actual key (value)
and the desired location for the actual key (value).
Case two: This class is used for a map only job. The job wants to use an
output file name that is either a part of the input file name of the input
data, or some derivation of it.
Case three: This class is used for a map only job. The job wants to use an
output file name that depends on both the keys and the input file name,]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.MultipleOutputFormat -->
<!-- start class org.apache.hadoop.mapred.lib.MultipleOutputs -->
<class name="MultipleOutputs" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="MultipleOutputs" type="org.apache.hadoop.mapred.JobConf"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Creates and initializes multiple named outputs support, it should be
instantiated in the Mapper/Reducer configure method.
@param job the job configuration object]]>
</doc>
</constructor>
<method name="getNamedOutputsList" return="java.util.List"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Returns list of channel names.
@param conf job conf
@return List of channel Names]]>
</doc>
</method>
<method name="isMultiNamedOutput" return="boolean"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="namedOutput" type="java.lang.String"/>
<doc>
<![CDATA[Returns if a named output is multiple.
@param conf job conf
@param namedOutput named output
@return <code>true</code> if the name output is multi, <code>false</code>
if it is single. If the name output is not defined it returns
<code>false</code>]]>
</doc>
</method>
<method name="getNamedOutputFormatClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="namedOutput" type="java.lang.String"/>
<doc>
<![CDATA[Returns the named output OutputFormat.
@param conf job conf
@param namedOutput named output
@return namedOutput OutputFormat]]>
</doc>
</method>
<method name="getNamedOutputKeyClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="namedOutput" type="java.lang.String"/>
<doc>
<![CDATA[Returns the key class for a named output.
@param conf job conf
@param namedOutput named output
@return class for the named output key]]>
</doc>
</method>
<method name="getNamedOutputValueClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="namedOutput" type="java.lang.String"/>
<doc>
<![CDATA[Returns the value class for a named output.
@param conf job conf
@param namedOutput named output
@return class of named output value]]>
</doc>
</method>
<method name="addNamedOutput"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="namedOutput" type="java.lang.String"/>
<param name="outputFormatClass" type="java.lang.Class"/>
<param name="keyClass" type="java.lang.Class"/>
<param name="valueClass" type="java.lang.Class"/>
<doc>
<![CDATA[Adds a named output for the job.
@param conf job conf to add the named output
@param namedOutput named output name, it has to be a word, letters
and numbers only, cannot be the word 'part' as
that is reserved for the
default output.
@param outputFormatClass OutputFormat class.
@param keyClass key class
@param valueClass value class]]>
</doc>
</method>
<method name="addMultiNamedOutput"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="namedOutput" type="java.lang.String"/>
<param name="outputFormatClass" type="java.lang.Class"/>
<param name="keyClass" type="java.lang.Class"/>
<param name="valueClass" type="java.lang.Class"/>
<doc>
<![CDATA[Adds a multi named output for the job.
@param conf job conf to add the named output
@param namedOutput named output name, it has to be a word, letters
and numbers only, cannot be the word 'part' as
that is reserved for the
default output.
@param outputFormatClass OutputFormat class.
@param keyClass key class
@param valueClass value class]]>
</doc>
</method>
<method name="setCountersEnabled"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="enabled" type="boolean"/>
<doc>
<![CDATA[Enables or disables counters for the named outputs.
<p>
By default these counters are disabled.
<p>
MultipleOutputs supports counters, by default the are disabled.
The counters group is the {@link MultipleOutputs} class name.
</p>
The names of the counters are the same as the named outputs. For multi
named outputs the name of the counter is the concatenation of the named
output, and underscore '_' and the multiname.
@param conf job conf to enableadd the named output.
@param enabled indicates if the counters will be enabled or not.]]>
</doc>
</method>
<method name="getCountersEnabled" return="boolean"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Returns if the counters for the named outputs are enabled or not.
<p>
By default these counters are disabled.
<p>
MultipleOutputs supports counters, by default the are disabled.
The counters group is the {@link MultipleOutputs} class name.
</p>
The names of the counters are the same as the named outputs. For multi
named outputs the name of the counter is the concatenation of the named
output, and underscore '_' and the multiname.
@param conf job conf to enableadd the named output.
@return TRUE if the counters are enabled, FALSE if they are disabled.]]>
</doc>
</method>
<method name="getNamedOutputs" return="java.util.Iterator"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns iterator with the defined name outputs.
@return iterator with the defined named outputs]]>
</doc>
</method>
<method name="getCollector" return="org.apache.hadoop.mapred.OutputCollector"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="namedOutput" type="java.lang.String"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Gets the output collector for a named output.
@param namedOutput the named output name
@param reporter the reporter
@return the output collector for the given named output
@throws IOException thrown if output collector could not be created]]>
</doc>
</method>
<method name="getCollector" return="org.apache.hadoop.mapred.OutputCollector"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="namedOutput" type="java.lang.String"/>
<param name="multiName" type="java.lang.String"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Gets the output collector for a multi named output.
@param namedOutput the named output name
@param multiName the multi name part
@param reporter the reporter
@return the output collector for the given named output
@throws IOException thrown if output collector could not be created]]>
</doc>
</method>
<method name="close"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Closes all the opened named outputs.
<p>
If overriden subclasses must invoke <code>super.close()</code> at the
end of their <code>close()</code>
@throws java.io.IOException thrown if any of the MultipleOutput files
could not be closed properly.]]>
</doc>
</method>
<doc>
<![CDATA[The MultipleOutputs class simplifies writing to additional outputs other
than the job default output via the <code>OutputCollector</code> passed to
the <code>map()</code> and <code>reduce()</code> methods of the
<code>Mapper</code> and <code>Reducer</code> implementations.
<p>
Each additional output, or named output, may be configured with its own
<code>OutputFormat</code>, with its own key class and with its own value
class.
<p>
A named output can be a single file or a multi file. The later is referred as
a multi named output.
<p>
A multi named output is an unbound set of files all sharing the same
<code>OutputFormat</code>, key class and value class configuration.
<p>
When named outputs are used within a <code>Mapper</code> implementation,
key/values written to a name output are not part of the reduce phase, only
key/values written to the job <code>OutputCollector</code> are part of the
reduce phase.
<p>
MultipleOutputs supports counters, by default the are disabled. The counters
group is the {@link MultipleOutputs} class name.
</p>
The names of the counters are the same as the named outputs. For multi
named outputs the name of the counter is the concatenation of the named
output, and underscore '_' and the multiname.
<p>
Job configuration usage pattern is:
<pre>
JobConf conf = new JobConf();
conf.setInputPath(inDir);
FileOutputFormat.setOutputPath(conf, outDir);
conf.setMapperClass(MOMap.class);
conf.setReducerClass(MOReduce.class);
...
// Defines additional single text based output 'text' for the job
MultipleOutputs.addNamedOutput(conf, "text", TextOutputFormat.class,
LongWritable.class, Text.class);
// Defines additional multi sequencefile based output 'sequence' for the
// job
MultipleOutputs.addMultiNamedOutput(conf, "seq",
SequenceFileOutputFormat.class,
LongWritable.class, Text.class);
...
JobClient jc = new JobClient();
RunningJob job = jc.submitJob(conf);
...
</pre>
<p>
Job configuration usage pattern is:
<pre>
public class MOReduce implements
Reducer&lt;WritableComparable, Writable&gt; {
private MultipleOutputs mos;
public void configure(JobConf conf) {
...
mos = new MultipleOutputs(conf);
}
public void reduce(WritableComparable key, Iterator&lt;Writable&gt; values,
OutputCollector output, Reporter reporter)
throws IOException {
...
mos.getCollector("text", reporter).collect(key, new Text("Hello"));
mos.getCollector("seq", "A", reporter).collect(key, new Text("Bye"));
mos.getCollector("seq", "B", reporter).collect(key, new Text("Chau"));
...
}
public void close() throws IOException {
mos.close();
...
}
}
</pre>]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.MultipleOutputs -->
<!-- start class org.apache.hadoop.mapred.lib.MultipleSequenceFileOutputFormat -->
<class name="MultipleSequenceFileOutputFormat" extends="org.apache.hadoop.mapred.lib.MultipleOutputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="MultipleSequenceFileOutputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getBaseRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="fs" type="org.apache.hadoop.fs.FileSystem"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="name" type="java.lang.String"/>
<param name="arg3" type="org.apache.hadoop.util.Progressable"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[This class extends the MultipleOutputFormat, allowing to write the output data
to different output files in sequence file output format.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.MultipleSequenceFileOutputFormat -->
<!-- start class org.apache.hadoop.mapred.lib.MultipleTextOutputFormat -->
<class name="MultipleTextOutputFormat" extends="org.apache.hadoop.mapred.lib.MultipleOutputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="MultipleTextOutputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getBaseRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="fs" type="org.apache.hadoop.fs.FileSystem"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="name" type="java.lang.String"/>
<param name="arg3" type="org.apache.hadoop.util.Progressable"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[This class extends the MultipleOutputFormat, allowing to write the output
data to different output files in Text output format.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.MultipleTextOutputFormat -->
<!-- start class org.apache.hadoop.mapred.lib.MultithreadedMapRunner -->
<class name="MultithreadedMapRunner" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.MapRunnable"/>
<constructor name="MultithreadedMapRunner"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="configure"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobConf" type="org.apache.hadoop.mapred.JobConf"/>
</method>
<method name="run"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="input" type="org.apache.hadoop.mapred.RecordReader"/>
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[Multithreaded implementation for {@link MapRunnable}.
<p>
It can be used instead of the default implementation,
of {@link org.apache.hadoop.mapred.MapRunner}, when the Map
operation is not CPU bound in order to improve throughput.
<p>
Map implementations using this MapRunnable must be thread-safe.
<p>
The Map-Reduce job has to be configured to use this MapRunnable class (using
the JobConf.setMapRunnerClass method) and
the number of threads the thread-pool can use with the
<code>mapred.map.multithreadedrunner.threads</code> property, its default
value is 10 threads.
<p>]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.MultithreadedMapRunner -->
<!-- start class org.apache.hadoop.mapred.lib.NLineInputFormat -->
<class name="NLineInputFormat" extends="org.apache.hadoop.mapred.FileInputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.JobConfigurable"/>
<constructor name="NLineInputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="genericSplit" type="org.apache.hadoop.mapred.InputSplit"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="numSplits" type="int"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Logically splits the set of input files for the job, splits N lines
of the input as one split.
@see org.apache.hadoop.mapred.FileInputFormat#getSplits(JobConf, int)]]>
</doc>
</method>
<method name="configure"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
</method>
<method name="createFileSplit" return="org.apache.hadoop.mapred.FileSplit"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="protected"
deprecated="not deprecated">
<param name="fileName" type="org.apache.hadoop.fs.Path"/>
<param name="begin" type="long"/>
<param name="length" type="long"/>
<doc>
<![CDATA[NLineInputFormat uses LineRecordReader, which always reads
(and consumes) at least one character out of its upper split
boundary. So to make sure that each mapper gets N lines, we
move back the upper split limits of each split
by one character here.
@param fileName Path of file
@param begin the position of the first byte in the file to process
@param length number of bytes in InputSplit
@return FileSplit]]>
</doc>
</method>
<doc>
<![CDATA[NLineInputFormat which splits N lines of input as one split.
In many "pleasantly" parallel applications, each process/mapper
processes the same input file (s), but with computations are
controlled by different parameters.(Referred to as "parameter sweeps").
One way to achieve this, is to specify a set of parameters
(one set per line) as input in a control file
(which is the input path to the map-reduce application,
where as the input dataset is specified
via a config variable in JobConf.).
The NLineInputFormat can be used in such applications, that splits
the input file such that by default, one line is fed as
a value to one map task, and key is the offset.
i.e. (k,v) is (LongWritable, Text).
The location hints will span the whole mapred cluster.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.NLineInputFormat -->
<!-- start class org.apache.hadoop.mapred.lib.NullOutputFormat -->
<class name="NullOutputFormat" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.OutputFormat"/>
<constructor name="NullOutputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="name" type="java.lang.String"/>
<param name="progress" type="org.apache.hadoop.util.Progressable"/>
</method>
<method name="checkOutputSpecs"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
</method>
<doc>
<![CDATA[Consume all outputs and put them in /dev/null.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.NullOutputFormat -->
<!-- start class org.apache.hadoop.mapred.lib.RegexMapper -->
<class name="RegexMapper" extends="org.apache.hadoop.mapred.MapReduceBase"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.Mapper"/>
<constructor name="RegexMapper"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="configure"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
</method>
<method name="map"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="K"/>
<param name="value" type="org.apache.hadoop.io.Text"/>
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[A {@link Mapper} that extracts text matching a regular expression.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.RegexMapper -->
<!-- start class org.apache.hadoop.mapred.lib.TokenCountMapper -->
<class name="TokenCountMapper" extends="org.apache.hadoop.mapred.MapReduceBase"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.Mapper"/>
<constructor name="TokenCountMapper"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="map"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="K"/>
<param name="value" type="org.apache.hadoop.io.Text"/>
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[A {@link Mapper} that maps text values into &lt;token,freq&gt; pairs. Uses
{@link StringTokenizer} to break text into tokens.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.TokenCountMapper -->
<!-- start class org.apache.hadoop.mapred.lib.TotalOrderPartitioner -->
<class name="TotalOrderPartitioner" extends="org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.Partitioner"/>
<constructor name="TotalOrderPartitioner"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="configure"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
</method>
<method name="setPartitionFile"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="Use
{@link #setPartitionFile(Configuration, Path)}
instead">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="p" type="org.apache.hadoop.fs.Path"/>
<doc>
<![CDATA[Set the path to the SequenceFile storing the sorted partition keyset.
It must be the case that for <tt>R</tt> reduces, there are <tt>R-1</tt>
keys in the SequenceFile.
@deprecated Use
{@link #setPartitionFile(Configuration, Path)}
instead]]>
</doc>
</method>
<method name="getPartitionFile" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="Use
{@link #getPartitionFile(Configuration)}
instead">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Get the path to the SequenceFile storing the sorted partition keyset.
@see #setPartitionFile(JobConf,Path)
@deprecated Use
{@link #getPartitionFile(Configuration)}
instead]]>
</doc>
</method>
<doc>
<![CDATA[Partitioner effecting a total order by reading split points from
an externally generated source.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.TotalOrderPartitioner -->
</package>
<package name="org.apache.hadoop.mapred.lib.aggregate">
<!-- start class org.apache.hadoop.mapred.lib.aggregate.DoubleValueSum -->
<class name="DoubleValueSum" extends="org.apache.hadoop.mapreduce.lib.aggregate.DoubleValueSum"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"/>
<constructor name="DoubleValueSum"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<doc>
<![CDATA[This class implements a value aggregator that sums up a sequence of double
values.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.aggregate.DoubleValueSum -->
<!-- start class org.apache.hadoop.mapred.lib.aggregate.LongValueMax -->
<class name="LongValueMax" extends="org.apache.hadoop.mapreduce.lib.aggregate.LongValueMax"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"/>
<constructor name="LongValueMax"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<doc>
<![CDATA[This class implements a value aggregator that maintain the maximum of
a sequence of long values.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.aggregate.LongValueMax -->
<!-- start class org.apache.hadoop.mapred.lib.aggregate.LongValueMin -->
<class name="LongValueMin" extends="org.apache.hadoop.mapreduce.lib.aggregate.LongValueMin"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"/>
<constructor name="LongValueMin"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<doc>
<![CDATA[This class implements a value aggregator that maintain the minimum of
a sequence of long values.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.aggregate.LongValueMin -->
<!-- start class org.apache.hadoop.mapred.lib.aggregate.LongValueSum -->
<class name="LongValueSum" extends="org.apache.hadoop.mapreduce.lib.aggregate.LongValueSum"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"/>
<constructor name="LongValueSum"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<doc>
<![CDATA[This class implements a value aggregator that sums up
a sequence of long values.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.aggregate.LongValueSum -->
<!-- start class org.apache.hadoop.mapred.lib.aggregate.StringValueMax -->
<class name="StringValueMax" extends="org.apache.hadoop.mapreduce.lib.aggregate.StringValueMax"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"/>
<constructor name="StringValueMax"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<doc>
<![CDATA[This class implements a value aggregator that maintain the biggest of
a sequence of strings.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.aggregate.StringValueMax -->
<!-- start class org.apache.hadoop.mapred.lib.aggregate.StringValueMin -->
<class name="StringValueMin" extends="org.apache.hadoop.mapreduce.lib.aggregate.StringValueMin"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"/>
<constructor name="StringValueMin"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<doc>
<![CDATA[This class implements a value aggregator that maintain the smallest of
a sequence of strings.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.aggregate.StringValueMin -->
<!-- start class org.apache.hadoop.mapred.lib.aggregate.UniqValueCount -->
<class name="UniqValueCount" extends="org.apache.hadoop.mapreduce.lib.aggregate.UniqValueCount"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"/>
<constructor name="UniqValueCount"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[the default constructor]]>
</doc>
</constructor>
<constructor name="UniqValueCount" type="long"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[constructor
@param maxNum the limit in the number of unique values to keep.]]>
</doc>
</constructor>
<doc>
<![CDATA[This class implements a value aggregator that dedupes a sequence of objects.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.aggregate.UniqValueCount -->
<!-- start class org.apache.hadoop.mapred.lib.aggregate.UserDefinedValueAggregatorDescriptor -->
<class name="UserDefinedValueAggregatorDescriptor" extends="org.apache.hadoop.mapreduce.lib.aggregate.UserDefinedValueAggregatorDescriptor"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorDescriptor"/>
<constructor name="UserDefinedValueAggregatorDescriptor" type="java.lang.String, org.apache.hadoop.mapred.JobConf"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@param className the class name of the user defined descriptor class
@param job a configure object used for decriptor configuration]]>
</doc>
</constructor>
<method name="createInstance" return="java.lang.Object"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="className" type="java.lang.String"/>
<doc>
<![CDATA[Create an instance of the given class
@param className the name of the class
@return a dynamically created instance of the given class]]>
</doc>
</method>
<method name="configure"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Do nothing.]]>
</doc>
</method>
<doc>
<![CDATA[This class implements a wrapper for a user defined value aggregator
descriptor.
It serves two functions: One is to create an object of
ValueAggregatorDescriptor from the name of a user defined class that may be
dynamically loaded. The other is to delegate invocations of
generateKeyValPairs function to the created object.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.aggregate.UserDefinedValueAggregatorDescriptor -->
<!-- start interface org.apache.hadoop.mapred.lib.aggregate.ValueAggregator -->
<interface name="ValueAggregator" abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator"/>
<doc>
<![CDATA[This interface defines the minimal protocol for value aggregators.]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapred.lib.aggregate.ValueAggregator -->
<!-- start class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorBaseDescriptor -->
<class name="ValueAggregatorBaseDescriptor" extends="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorBaseDescriptor"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorDescriptor"/>
<constructor name="ValueAggregatorBaseDescriptor"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="generateEntry" return="java.util.Map.Entry"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="type" type="java.lang.String"/>
<param name="id" type="java.lang.String"/>
<param name="val" type="org.apache.hadoop.io.Text"/>
<doc>
<![CDATA[@param type the aggregation type
@param id the aggregation id
@param val the val associated with the id to be aggregated
@return an Entry whose key is the aggregation id prefixed with
the aggregation type.]]>
</doc>
</method>
<method name="generateValueAggregator" return="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="type" type="java.lang.String"/>
<doc>
<![CDATA[@param type the aggregation type
@return a value aggregator of the given type.]]>
</doc>
</method>
<method name="configure"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[get the input file name.
@param job a job configuration object]]>
</doc>
</method>
<field name="UNIQ_VALUE_COUNT" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="LONG_VALUE_SUM" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="DOUBLE_VALUE_SUM" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="VALUE_HISTOGRAM" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="LONG_VALUE_MAX" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="LONG_VALUE_MIN" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="STRING_VALUE_MAX" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="STRING_VALUE_MIN" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[This class implements the common functionalities of
the subclasses of ValueAggregatorDescriptor class.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorBaseDescriptor -->
<!-- start class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorCombiner -->
<class name="ValueAggregatorCombiner" extends="org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorJobBase"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="ValueAggregatorCombiner"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="configure"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Combiner does not need to configure.]]>
</doc>
</method>
<method name="reduce"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="org.apache.hadoop.io.Text"/>
<param name="values" type="java.util.Iterator"/>
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Combines values for a given key.
@param key the key is expected to be a Text object, whose prefix indicates
the type of aggregation to aggregate the values.
@param values the values to combine
@param output to collect combined values]]>
</doc>
</method>
<method name="close"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Do nothing.]]>
</doc>
</method>
<method name="map"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="arg0" type="K1"/>
<param name="arg1" type="V1"/>
<param name="arg2" type="org.apache.hadoop.mapred.OutputCollector"/>
<param name="arg3" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Do nothing. Should not be called.]]>
</doc>
</method>
<doc>
<![CDATA[This class implements the generic combiner of Aggregate.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorCombiner -->
<!-- start interface org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorDescriptor -->
<interface name="ValueAggregatorDescriptor" abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorDescriptor"/>
<method name="configure"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Configure the object
@param job
a JobConf object that may contain the information that can be used
to configure the object.]]>
</doc>
</method>
<field name="TYPE_SEPARATOR" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="ONE" type="org.apache.hadoop.io.Text"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[This interface defines the contract a value aggregator descriptor must
support. Such a descriptor can be configured with a JobConf object. Its main
function is to generate a list of aggregation-id/value pairs. An aggregation
id encodes an aggregation type which is used to guide the way to aggregate
the value in the reduce/combiner phrase of an Aggregate based job.The mapper in
an Aggregate based map/reduce job may create one or more of
ValueAggregatorDescriptor objects at configuration time. For each input
key/value pair, the mapper will use those objects to create aggregation
id/value pairs.]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorDescriptor -->
<!-- start class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorJob -->
<class name="ValueAggregatorJob" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="ValueAggregatorJob"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="createValueAggregatorJobs" return="org.apache.hadoop.mapred.jobcontrol.JobControl"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="args" type="java.lang.String[]"/>
<param name="descriptors" type="java.lang.Class[]"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="createValueAggregatorJobs" return="org.apache.hadoop.mapred.jobcontrol.JobControl"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="args" type="java.lang.String[]"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="createValueAggregatorJob" return="org.apache.hadoop.mapred.JobConf"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="args" type="java.lang.String[]"/>
<param name="caller" type="java.lang.Class"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Create an Aggregate based map/reduce job.
@param args the arguments used for job creation. Generic hadoop
arguments are accepted.
@param caller the the caller class.
@return a JobConf object ready for submission.
@throws IOException
@see GenericOptionsParser]]>
</doc>
</method>
<method name="createValueAggregatorJob" return="org.apache.hadoop.mapred.JobConf"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="args" type="java.lang.String[]"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Create an Aggregate based map/reduce job.
@param args the arguments used for job creation. Generic hadoop
arguments are accepted.
@return a JobConf object ready for submission.
@throws IOException
@see GenericOptionsParser]]>
</doc>
</method>
<method name="createValueAggregatorJob" return="org.apache.hadoop.mapred.JobConf"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="args" type="java.lang.String[]"/>
<param name="descriptors" type="java.lang.Class[]"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="setAggregatorDescriptors"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="descriptors" type="java.lang.Class[]"/>
</method>
<method name="createValueAggregatorJob" return="org.apache.hadoop.mapred.JobConf"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="args" type="java.lang.String[]"/>
<param name="descriptors" type="java.lang.Class[]"/>
<param name="caller" type="java.lang.Class"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="main"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="args" type="java.lang.String[]"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[create and run an Aggregate based map/reduce job.
@param args the arguments used for job creation
@throws IOException]]>
</doc>
</method>
<doc>
<![CDATA[This is the main class for creating a map/reduce job using Aggregate
framework. The Aggregate is a specialization of map/reduce framework,
specilizing for performing various simple aggregations.
Generally speaking, in order to implement an application using Map/Reduce
model, the developer is to implement Map and Reduce functions (and possibly
combine function). However, a lot of applications related to counting and
statistics computing have very similar characteristics. Aggregate abstracts
out the general patterns of these functions and implementing those patterns.
In particular, the package provides generic mapper/redducer/combiner classes,
and a set of built-in value aggregators, and a generic utility class that
helps user create map/reduce jobs using the generic class. The built-in
aggregators include:
sum over numeric values count the number of distinct values compute the
histogram of values compute the minimum, maximum, media,average, standard
deviation of numeric values
The developer using Aggregate will need only to provide a plugin class
conforming to the following interface:
public interface ValueAggregatorDescriptor { public ArrayList&lt;Entry&gt;
generateKeyValPairs(Object key, Object value); public void
configure(JobConfjob); }
The package also provides a base class, ValueAggregatorBaseDescriptor,
implementing the above interface. The user can extend the base class and
implement generateKeyValPairs accordingly.
The primary work of generateKeyValPairs is to emit one or more key/value
pairs based on the input key/value pair. The key in an output key/value pair
encode two pieces of information: aggregation type and aggregation id. The
value will be aggregated onto the aggregation id according the aggregation
type.
This class offers a function to generate a map/reduce job using Aggregate
framework. The function takes the following parameters: input directory spec
input format (text or sequence file) output directory a file specifying the
user plugin class]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorJob -->
<!-- start class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorJobBase -->
<class name="ValueAggregatorJobBase" extends="java.lang.Object"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.Mapper"/>
<implements name="org.apache.hadoop.mapred.Reducer"/>
<constructor name="ValueAggregatorJobBase"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="configure"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
</method>
<method name="logSpec"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</method>
<method name="close"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<field name="aggregatorDescriptorList" type="java.util.ArrayList"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[This abstract class implements some common functionalities of the
the generic mapper, reducer and combiner classes of Aggregate.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorJobBase -->
<!-- start class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorMapper -->
<class name="ValueAggregatorMapper" extends="org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorJobBase"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="ValueAggregatorMapper"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="map"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="K1"/>
<param name="value" type="V1"/>
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[the map function. It iterates through the value aggregator descriptor
list to generate aggregation id/value pairs and emit them.]]>
</doc>
</method>
<method name="reduce"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="arg0" type="org.apache.hadoop.io.Text"/>
<param name="arg1" type="java.util.Iterator"/>
<param name="arg2" type="org.apache.hadoop.mapred.OutputCollector"/>
<param name="arg3" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Do nothing. Should not be called.]]>
</doc>
</method>
<doc>
<![CDATA[This class implements the generic mapper of Aggregate.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorMapper -->
<!-- start class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorReducer -->
<class name="ValueAggregatorReducer" extends="org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorJobBase"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="ValueAggregatorReducer"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="reduce"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="org.apache.hadoop.io.Text"/>
<param name="values" type="java.util.Iterator"/>
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[@param key
the key is expected to be a Text object, whose prefix indicates
the type of aggregation to aggregate the values. In effect, data
driven computing is achieved. It is assumed that each aggregator's
getReport method emits appropriate output for the aggregator. This
may be further customiized.
@param values
the values to be aggregated]]>
</doc>
</method>
<method name="map"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="arg0" type="K1"/>
<param name="arg1" type="V1"/>
<param name="arg2" type="org.apache.hadoop.mapred.OutputCollector"/>
<param name="arg3" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Do nothing. Should not be called]]>
</doc>
</method>
<doc>
<![CDATA[This class implements the generic reducer of Aggregate.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorReducer -->
<!-- start class org.apache.hadoop.mapred.lib.aggregate.ValueHistogram -->
<class name="ValueHistogram" extends="org.apache.hadoop.mapreduce.lib.aggregate.ValueHistogram"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"/>
<constructor name="ValueHistogram"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<doc>
<![CDATA[This class implements a value aggregator that computes the
histogram of a sequence of strings.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.aggregate.ValueHistogram -->
</package>
<package name="org.apache.hadoop.mapred.lib.db">
<!-- start class org.apache.hadoop.mapred.lib.db.DBConfiguration -->
<class name="DBConfiguration" extends="org.apache.hadoop.mapreduce.lib.db.DBConfiguration"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<method name="configureDB"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="driverClass" type="java.lang.String"/>
<param name="dbUrl" type="java.lang.String"/>
<param name="userName" type="java.lang.String"/>
<param name="passwd" type="java.lang.String"/>
<doc>
<![CDATA[Sets the DB access related fields in the JobConf.
@param job the job
@param driverClass JDBC Driver class name
@param dbUrl JDBC DB access URL.
@param userName DB access username
@param passwd DB access passwd]]>
</doc>
</method>
<method name="configureDB"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="driverClass" type="java.lang.String"/>
<param name="dbUrl" type="java.lang.String"/>
<doc>
<![CDATA[Sets the DB access related fields in the JobConf.
@param job the job
@param driverClass JDBC Driver class name
@param dbUrl JDBC DB access URL.]]>
</doc>
</method>
<field name="DRIVER_CLASS_PROPERTY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[The JDBC Driver class name]]>
</doc>
</field>
<field name="URL_PROPERTY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[JDBC Database access URL]]>
</doc>
</field>
<field name="USERNAME_PROPERTY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[User name to access the database]]>
</doc>
</field>
<field name="PASSWORD_PROPERTY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Password to access the database]]>
</doc>
</field>
<field name="INPUT_TABLE_NAME_PROPERTY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Input table name]]>
</doc>
</field>
<field name="INPUT_FIELD_NAMES_PROPERTY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Field names in the Input table]]>
</doc>
</field>
<field name="INPUT_CONDITIONS_PROPERTY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[WHERE clause in the input SELECT statement]]>
</doc>
</field>
<field name="INPUT_ORDER_BY_PROPERTY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[ORDER BY clause in the input SELECT statement]]>
</doc>
</field>
<field name="INPUT_QUERY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Whole input query, exluding LIMIT...OFFSET]]>
</doc>
</field>
<field name="INPUT_COUNT_QUERY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Input query to get the count of records]]>
</doc>
</field>
<field name="INPUT_CLASS_PROPERTY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Class name implementing DBWritable which will hold input tuples]]>
</doc>
</field>
<field name="OUTPUT_TABLE_NAME_PROPERTY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Output table name]]>
</doc>
</field>
<field name="OUTPUT_FIELD_NAMES_PROPERTY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Field names in the Output table]]>
</doc>
</field>
<field name="OUTPUT_FIELD_COUNT_PROPERTY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Number of fields in the Output table]]>
</doc>
</field>
</class>
<!-- end class org.apache.hadoop.mapred.lib.db.DBConfiguration -->
<!-- start class org.apache.hadoop.mapred.lib.db.DBInputFormat -->
<class name="DBInputFormat" extends="org.apache.hadoop.mapreduce.lib.db.DBInputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.InputFormat"/>
<implements name="org.apache.hadoop.mapred.JobConfigurable"/>
<constructor name="DBInputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="configure"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapred.InputSplit"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="chunks" type="int"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<method name="setInput"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="inputClass" type="java.lang.Class"/>
<param name="tableName" type="java.lang.String"/>
<param name="conditions" type="java.lang.String"/>
<param name="orderBy" type="java.lang.String"/>
<param name="fieldNames" type="java.lang.String[]"/>
<doc>
<![CDATA[Initializes the map-part of the job with the appropriate input settings.
@param job The job
@param inputClass the class object implementing DBWritable, which is the
Java object holding tuple fields.
@param tableName The table to read data from
@param conditions The condition which to select data with, eg. '(updated &gt;
20070101 AND length &gt; 0)'
@param orderBy the fieldNames in the orderBy clause.
@param fieldNames The field names in the table
@see #setInput(JobConf, Class, String, String)]]>
</doc>
</method>
<method name="setInput"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="inputClass" type="java.lang.Class"/>
<param name="inputQuery" type="java.lang.String"/>
<param name="inputCountQuery" type="java.lang.String"/>
<doc>
<![CDATA[Initializes the map-part of the job with the appropriate input settings.
@param job The job
@param inputClass the class object implementing DBWritable, which is the
Java object holding tuple fields.
@param inputQuery the input query to select fields. Example :
"SELECT f1, f2, f3 FROM Mytable ORDER BY f1"
@param inputCountQuery the input query that returns the number of records in
the table.
Example : "SELECT COUNT(f1) FROM Mytable"
@see #setInput(JobConf, Class, String, String, String, String...)]]>
</doc>
</method>
</class>
<!-- end class org.apache.hadoop.mapred.lib.db.DBInputFormat -->
<!-- start class org.apache.hadoop.mapred.lib.db.DBOutputFormat -->
<class name="DBOutputFormat" extends="org.apache.hadoop.mapreduce.lib.db.DBOutputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.OutputFormat"/>
<constructor name="DBOutputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="checkOutputSpecs"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="filesystem" type="org.apache.hadoop.fs.FileSystem"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="filesystem" type="org.apache.hadoop.fs.FileSystem"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="name" type="java.lang.String"/>
<param name="progress" type="org.apache.hadoop.util.Progressable"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<method name="setOutput"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="tableName" type="java.lang.String"/>
<param name="fieldNames" type="java.lang.String[]"/>
<doc>
<![CDATA[Initializes the reduce-part of the job with the appropriate output settings
@param job The job
@param tableName The table to insert data into
@param fieldNames The field names in the table.]]>
</doc>
</method>
<method name="setOutput"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="tableName" type="java.lang.String"/>
<param name="fieldCount" type="int"/>
<doc>
<![CDATA[Initializes the reduce-part of the job with the appropriate output settings
@param job The job
@param tableName The table to insert data into
@param fieldCount the number of fields in the table.]]>
</doc>
</method>
</class>
<!-- end class org.apache.hadoop.mapred.lib.db.DBOutputFormat -->
<!-- start interface org.apache.hadoop.mapred.lib.db.DBWritable -->
<interface name="DBWritable" abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapreduce.lib.db.DBWritable"/>
</interface>
<!-- end interface org.apache.hadoop.mapred.lib.db.DBWritable -->
</package>
<package name="org.apache.hadoop.mapred.pipes">
<!-- start class org.apache.hadoop.mapred.pipes.Submitter -->
<class name="Submitter" extends="org.apache.hadoop.conf.Configured"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.util.Tool"/>
<constructor name="Submitter"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<constructor name="Submitter" type="org.apache.hadoop.conf.Configuration"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getExecutable" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Get the URI of the application's executable.
@param conf
@return the URI where the application's executable is located]]>
</doc>
</method>
<method name="setExecutable"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="executable" type="java.lang.String"/>
<doc>
<![CDATA[Set the URI for the application's executable. Normally this is a hdfs:
location.
@param conf
@param executable The URI of the application's executable.]]>
</doc>
</method>
<method name="setIsJavaRecordReader"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="value" type="boolean"/>
<doc>
<![CDATA[Set whether the job is using a Java RecordReader.
@param conf the configuration to modify
@param value the new value]]>
</doc>
</method>
<method name="getIsJavaRecordReader" return="boolean"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Check whether the job is using a Java RecordReader
@param conf the configuration to check
@return is it a Java RecordReader?]]>
</doc>
</method>
<method name="setIsJavaMapper"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="value" type="boolean"/>
<doc>
<![CDATA[Set whether the Mapper is written in Java.
@param conf the configuration to modify
@param value the new value]]>
</doc>
</method>
<method name="getIsJavaMapper" return="boolean"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Check whether the job is using a Java Mapper.
@param conf the configuration to check
@return is it a Java Mapper?]]>
</doc>
</method>
<method name="setIsJavaReducer"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="value" type="boolean"/>
<doc>
<![CDATA[Set whether the Reducer is written in Java.
@param conf the configuration to modify
@param value the new value]]>
</doc>
</method>
<method name="getIsJavaReducer" return="boolean"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Check whether the job is using a Java Reducer.
@param conf the configuration to check
@return is it a Java Reducer?]]>
</doc>
</method>
<method name="setIsJavaRecordWriter"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="value" type="boolean"/>
<doc>
<![CDATA[Set whether the job will use a Java RecordWriter.
@param conf the configuration to modify
@param value the new value to set]]>
</doc>
</method>
<method name="getIsJavaRecordWriter" return="boolean"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Will the reduce use a Java RecordWriter?
@param conf the configuration to check
@return true, if the output of the job will be written by Java]]>
</doc>
</method>
<method name="getKeepCommandFile" return="boolean"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Does the user want to keep the command file for debugging? If this is
true, pipes will write a copy of the command data to a file in the
task directory named "downlink.data", which may be used to run the C++
program under the debugger. You probably also want to set
JobConf.setKeepFailedTaskFiles(true) to keep the entire directory from
being deleted.
To run using the data file, set the environment variable
"mapreduce.pipes.commandfile" to point to the file.
@param conf the configuration to check
@return will the framework save the command file?]]>
</doc>
</method>
<method name="setKeepCommandFile"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="keep" type="boolean"/>
<doc>
<![CDATA[Set whether to keep the command file for debugging
@param conf the configuration to modify
@param keep the new value]]>
</doc>
</method>
<method name="submitJob" return="org.apache.hadoop.mapred.RunningJob"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="Use {@link Submitter#runJob(JobConf)}">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Submit a job to the map/reduce cluster. All of the necessary modifications
to the job to run under pipes are made to the configuration.
@param conf the job to submit to the cluster (MODIFIED)
@throws IOException
@deprecated Use {@link Submitter#runJob(JobConf)}]]>
</doc>
</method>
<method name="runJob" return="org.apache.hadoop.mapred.RunningJob"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Submit a job to the map/reduce cluster. All of the necessary modifications
to the job to run under pipes are made to the configuration.
@param conf the job to submit to the cluster (MODIFIED)
@throws IOException]]>
</doc>
</method>
<method name="jobSubmit" return="org.apache.hadoop.mapred.RunningJob"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Submit a job to the Map-Reduce framework.
This returns a handle to the {@link RunningJob} which can be used to track
the running-job.
@param conf the job configuration.
@return a handle to the {@link RunningJob} which can be used to track the
running-job.
@throws IOException]]>
</doc>
</method>
<method name="run" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="args" type="java.lang.String[]"/>
<exception name="Exception" type="java.lang.Exception"/>
</method>
<method name="main"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="args" type="java.lang.String[]"/>
<exception name="Exception" type="java.lang.Exception"/>
<doc>
<![CDATA[Submit a pipes job based on the command line arguments.
@param args]]>
</doc>
</method>
<field name="LOG" type="org.slf4j.Logger"
transient="false" volatile="false"
static="true" final="true" visibility="protected"
deprecated="not deprecated">
</field>
<field name="PRESERVE_COMMANDFILE" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="EXECUTABLE" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="INTERPRETOR" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="IS_JAVA_MAP" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="IS_JAVA_RR" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="IS_JAVA_RW" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="IS_JAVA_REDUCE" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="PARTITIONER" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="INPUT_FORMAT" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="PORT" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[The main entry point and job submitter. It may either be used as a command
line-based or API-based method to launch Pipes jobs.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.pipes.Submitter -->
</package>
<package name="org.apache.hadoop.mapreduce">
<!-- start class org.apache.hadoop.mapreduce.Cluster -->
<class name="Cluster" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="Cluster" type="org.apache.hadoop.conf.Configuration"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</constructor>
<constructor name="Cluster" type="java.net.InetSocketAddress, org.apache.hadoop.conf.Configuration"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</constructor>
<method name="close"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Close the <code>Cluster</code>.
@throws IOException]]>
</doc>
</method>
<method name="getFileSystem" return="org.apache.hadoop.fs.FileSystem"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Get the file system where job-specific files are stored
@return object of FileSystem
@throws IOException
@throws InterruptedException]]>
</doc>
</method>
<method name="getJob" return="org.apache.hadoop.mapreduce.Job"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobId" type="org.apache.hadoop.mapreduce.JobID"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Get job corresponding to jobid.
@param jobId
@return object of {@link Job}
@throws IOException
@throws InterruptedException]]>
</doc>
</method>
<method name="getQueues" return="org.apache.hadoop.mapreduce.QueueInfo[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Get all the queues in cluster.
@return array of {@link QueueInfo}
@throws IOException
@throws InterruptedException]]>
</doc>
</method>
<method name="getQueue" return="org.apache.hadoop.mapreduce.QueueInfo"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="name" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Get queue information for the specified name.
@param name queuename
@return object of {@link QueueInfo}
@throws IOException
@throws InterruptedException]]>
</doc>
</method>
<method name="getLogParams" return="org.apache.hadoop.mapreduce.v2.LogParams"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobID" type="org.apache.hadoop.mapreduce.JobID"/>
<param name="taskAttemptID" type="org.apache.hadoop.mapreduce.TaskAttemptID"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Get log parameters for the specified jobID or taskAttemptID
@param jobID the job id.
@param taskAttemptID the task attempt id. Optional.
@return the LogParams
@throws IOException
@throws InterruptedException]]>
</doc>
</method>
<method name="getClusterStatus" return="org.apache.hadoop.mapreduce.ClusterMetrics"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Get current cluster status.
@return object of {@link ClusterMetrics}
@throws IOException
@throws InterruptedException]]>
</doc>
</method>
<method name="getActiveTaskTrackers" return="org.apache.hadoop.mapreduce.TaskTrackerInfo[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Get all active trackers in the cluster.
@return array of {@link TaskTrackerInfo}
@throws IOException
@throws InterruptedException]]>
</doc>
</method>
<method name="getBlackListedTaskTrackers" return="org.apache.hadoop.mapreduce.TaskTrackerInfo[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Get blacklisted trackers.
@return array of {@link TaskTrackerInfo}
@throws IOException
@throws InterruptedException]]>
</doc>
</method>
<method name="getAllJobs" return="org.apache.hadoop.mapreduce.Job[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="Use {@link #getAllJobStatuses()} instead.">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Get all the jobs in cluster.
@return array of {@link Job}
@throws IOException
@throws InterruptedException
@deprecated Use {@link #getAllJobStatuses()} instead.]]>
</doc>
</method>
<method name="getAllJobStatuses" return="org.apache.hadoop.mapreduce.JobStatus[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Get job status for all jobs in the cluster.
@return job status for all jobs in cluster
@throws IOException
@throws InterruptedException]]>
</doc>
</method>
<method name="getSystemDir" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Grab the jobtracker system directory path where
job-specific files will be placed.
@return the system directory where job-specific files are to be placed.]]>
</doc>
</method>
<method name="getStagingAreaDir" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Grab the jobtracker's view of the staging directory path where
job-specific files will be placed.
@return the staging directory where job-specific files are to be placed.]]>
</doc>
</method>
<method name="getJobHistoryUrl" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobId" type="org.apache.hadoop.mapreduce.JobID"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Get the job history file path for a given job id. The job history file at
this path may or may not be existing depending on the job completion state.
The file is present only for the completed jobs.
@param jobId the JobID of the job submitted by the current user.
@return the file path of the job history file
@throws IOException
@throws InterruptedException]]>
</doc>
</method>
<method name="getQueueAclsForCurrentUser" return="org.apache.hadoop.mapreduce.QueueAclsInfo[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Gets the Queue ACLs for current user
@return array of QueueAclsInfo object for current user.
@throws IOException]]>
</doc>
</method>
<method name="getRootQueues" return="org.apache.hadoop.mapreduce.QueueInfo[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Gets the root level queues.
@return array of JobQueueInfo object.
@throws IOException]]>
</doc>
</method>
<method name="getChildQueues" return="org.apache.hadoop.mapreduce.QueueInfo[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="queueName" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Returns immediate children of queueName.
@param queueName
@return array of JobQueueInfo which are children of queueName
@throws IOException]]>
</doc>
</method>
<method name="getJobTrackerStatus" return="org.apache.hadoop.mapreduce.Cluster.JobTrackerStatus"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Get the JobTracker's status.
@return {@link JobTrackerStatus} of the JobTracker
@throws IOException
@throws InterruptedException]]>
</doc>
</method>
<method name="getTaskTrackerExpiryInterval" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Get the tasktracker expiry interval for the cluster
@return the expiry interval in msec]]>
</doc>
</method>
<method name="getDelegationToken" return="org.apache.hadoop.security.token.Token"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="renewer" type="org.apache.hadoop.io.Text"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Get a delegation token for the user from the JobTracker.
@param renewer the user who can renew the token
@return the new token
@throws IOException]]>
</doc>
</method>
<method name="renewDelegationToken" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="Use {@link Token#renew} instead">
<param name="token" type="org.apache.hadoop.security.token.Token"/>
<exception name="SecretManager.InvalidToken" type="org.apache.hadoop.security.token.SecretManager.InvalidToken"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Renew a delegation token
@param token the token to renew
@return the new expiration time
@throws InvalidToken
@throws IOException
@deprecated Use {@link Token#renew} instead]]>
</doc>
</method>
<method name="cancelDelegationToken"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="Use {@link Token#cancel} instead">
<param name="token" type="org.apache.hadoop.security.token.Token"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Cancel a delegation token from the JobTracker
@param token the token to cancel
@throws IOException
@deprecated Use {@link Token#cancel} instead]]>
</doc>
</method>
<doc>
<![CDATA[Provides a way to access information about the map/reduce cluster.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.Cluster -->
<!-- start class org.apache.hadoop.mapreduce.ClusterMetrics -->
<class name="ClusterMetrics" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.io.Writable"/>
<constructor name="ClusterMetrics"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<constructor name="ClusterMetrics" type="int, int, int, int, int, int, int, int, int, int, int, int"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<constructor name="ClusterMetrics" type="int, int, int, int, int, int, int, int, int, int, int, int, int"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getRunningMaps" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the number of running map tasks in the cluster.
@return running maps]]>
</doc>
</method>
<method name="getRunningReduces" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the number of running reduce tasks in the cluster.
@return running reduces]]>
</doc>
</method>
<method name="getOccupiedMapSlots" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get number of occupied map slots in the cluster.
@return occupied map slot count]]>
</doc>
</method>
<method name="getOccupiedReduceSlots" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the number of occupied reduce slots in the cluster.
@return occupied reduce slot count]]>
</doc>
</method>
<method name="getReservedMapSlots" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get number of reserved map slots in the cluster.
@return reserved map slot count]]>
</doc>
</method>
<method name="getReservedReduceSlots" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the number of reserved reduce slots in the cluster.
@return reserved reduce slot count]]>
</doc>
</method>
<method name="getMapSlotCapacity" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the total number of map slots in the cluster.
@return map slot capacity]]>
</doc>
</method>
<method name="getReduceSlotCapacity" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the total number of reduce slots in the cluster.
@return reduce slot capacity]]>
</doc>
</method>
<method name="getTotalJobSubmissions" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the total number of job submissions in the cluster.
@return total number of job submissions]]>
</doc>
</method>
<method name="getTaskTrackerCount" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the number of active trackers in the cluster.
@return active tracker count.]]>
</doc>
</method>
<method name="getBlackListedTaskTrackerCount" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the number of blacklisted trackers in the cluster.
@return blacklisted tracker count]]>
</doc>
</method>
<method name="getGrayListedTaskTrackerCount" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the number of graylisted trackers in the cluster.
@return graylisted tracker count]]>
</doc>
</method>
<method name="getDecommissionedTaskTrackerCount" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the number of decommissioned trackers in the cluster.
@return decommissioned tracker count]]>
</doc>
</method>
<method name="readFields"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type="java.io.DataInput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="write"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="out" type="java.io.DataOutput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[Status information on the current state of the Map-Reduce cluster.
<p><code>ClusterMetrics</code> provides clients with information such as:
<ol>
<li>
Size of the cluster.
</li>
<li>
Number of blacklisted and decommissioned trackers.
</li>
<li>
Slot capacity of the cluster.
</li>
<li>
The number of currently occupied/reserved map and reduce slots.
</li>
<li>
The number of currently running map and reduce tasks.
</li>
<li>
The number of job submissions.
</li>
</ol>
<p>Clients can query for the latest <code>ClusterMetrics</code>, via
{@link Cluster#getClusterStatus()}.</p>
@see Cluster]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.ClusterMetrics -->
<!-- start interface org.apache.hadoop.mapreduce.Counter -->
<interface name="Counter" abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.io.Writable"/>
<method name="setDisplayName"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="(and no-op by default)">
<param name="displayName" type="java.lang.String"/>
<doc>
<![CDATA[Set the display name of the counter
@param displayName of the counter
@deprecated (and no-op by default)]]>
</doc>
</method>
<method name="getName" return="java.lang.String"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the name of the counter]]>
</doc>
</method>
<method name="getDisplayName" return="java.lang.String"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the display name of the counter.
@return the user facing name of the counter]]>
</doc>
</method>
<method name="getValue" return="long"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[What is the current value of this counter?
@return the current value]]>
</doc>
</method>
<method name="setValue"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="value" type="long"/>
<doc>
<![CDATA[Set this counter by the given value
@param value the value to set]]>
</doc>
</method>
<method name="increment"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="incr" type="long"/>
<doc>
<![CDATA[Increment this counter by the given value
@param incr the value to increase this counter by]]>
</doc>
</method>
<doc>
<![CDATA[A named counter that tracks the progress of a map/reduce job.
<p><code>Counters</code> represent global counters, defined either by the
Map-Reduce framework or applications. Each <code>Counter</code> is named by
an {@link Enum} and has a long for the value.</p>
<p><code>Counters</code> are bunched into Groups, each comprising of
counters from a particular <code>Enum</code> class.]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapreduce.Counter -->
<!-- start interface org.apache.hadoop.mapreduce.CounterGroup -->
<interface name="CounterGroup" abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapreduce.counters.CounterGroupBase"/>
<doc>
<![CDATA[A group of {@link Counter}s that logically belong together. Typically,
it is an {@link Enum} subclass and the counters are the values.]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapreduce.CounterGroup -->
<!-- start class org.apache.hadoop.mapreduce.Counters -->
<class name="Counters" extends="org.apache.hadoop.mapreduce.counters.AbstractCounters"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="Counters"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Default constructor]]>
</doc>
</constructor>
<constructor name="Counters" type="org.apache.hadoop.mapreduce.counters.AbstractCounters"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Construct the Counters object from the another counters object
@param <C> the type of counter
@param <G> the type of counter group
@param counters the old counters object]]>
</doc>
</constructor>
<doc>
<![CDATA[<p><code>Counters</code> holds per job/task counters, defined either by the
Map-Reduce framework or applications. Each <code>Counter</code> can be of
any {@link Enum} type.</p>
<p><code>Counters</code> are bunched into {@link CounterGroup}s, each
comprising of counters from a particular <code>Enum</code> class.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.Counters -->
<!-- start class org.apache.hadoop.mapreduce.ID -->
<class name="ID" extends="java.lang.Object"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.io.WritableComparable"/>
<constructor name="ID" type="int"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[constructs an ID object from the given int]]>
</doc>
</constructor>
<constructor name="ID"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</constructor>
<method name="getId" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[returns the int which represents the identifier]]>
</doc>
</method>
<method name="toString" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="hashCode" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="equals" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="o" type="java.lang.Object"/>
</method>
<method name="compareTo" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="that" type="org.apache.hadoop.mapreduce.ID"/>
<doc>
<![CDATA[Compare IDs by associated numbers]]>
</doc>
</method>
<method name="readFields"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type="java.io.DataInput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="write"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="out" type="java.io.DataOutput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<field name="SEPARATOR" type="char"
transient="false" volatile="false"
static="true" final="true" visibility="protected"
deprecated="not deprecated">
</field>
<field name="id" type="int"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[A general identifier, which internally stores the id
as an integer. This is the super class of {@link JobID},
{@link TaskID} and {@link TaskAttemptID}.
@see JobID
@see TaskID
@see TaskAttemptID]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.ID -->
<!-- start class org.apache.hadoop.mapreduce.InputFormat -->
<class name="InputFormat" extends="java.lang.Object"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="InputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getSplits" return="java.util.List"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Logically split the set of input files for the job.
<p>Each {@link InputSplit} is then assigned to an individual {@link Mapper}
for processing.</p>
<p><i>Note</i>: The split is a <i>logical</i> split of the inputs and the
input files are not physically split into chunks. For e.g. a split could
be <i>&lt;input-file-path, start, offset&gt;</i> tuple. The InputFormat
also creates the {@link RecordReader} to read the {@link InputSplit}.
@param context job configuration.
@return an array of {@link InputSplit}s for the job.]]>
</doc>
</method>
<method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Create a record reader for a given split. The framework will call
{@link RecordReader#initialize(InputSplit, TaskAttemptContext)} before
the split is used.
@param split the split to be read
@param context the information about the task
@return a new record reader
@throws IOException
@throws InterruptedException]]>
</doc>
</method>
<doc>
<![CDATA[<code>InputFormat</code> describes the input-specification for a
Map-Reduce job.
<p>The Map-Reduce framework relies on the <code>InputFormat</code> of the
job to:<p>
<ol>
<li>
Validate the input-specification of the job.
<li>
Split-up the input file(s) into logical {@link InputSplit}s, each of
which is then assigned to an individual {@link Mapper}.
</li>
<li>
Provide the {@link RecordReader} implementation to be used to glean
input records from the logical <code>InputSplit</code> for processing by
the {@link Mapper}.
</li>
</ol>
<p>The default behavior of file-based {@link InputFormat}s, typically
sub-classes of {@link FileInputFormat}, is to split the
input into <i>logical</i> {@link InputSplit}s based on the total size, in
bytes, of the input files. However, the {@link FileSystem} blocksize of
the input files is treated as an upper bound for input splits. A lower bound
on the split size can be set via
<a href="{@docRoot}/../hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml#mapreduce.input.fileinputformat.split.minsize">
mapreduce.input.fileinputformat.split.minsize</a>.</p>
<p>Clearly, logical splits based on input-size is insufficient for many
applications since record boundaries are to respected. In such cases, the
application has to also implement a {@link RecordReader} on whom lies the
responsibility to respect record-boundaries and present a record-oriented
view of the logical <code>InputSplit</code> to the individual task.
@see InputSplit
@see RecordReader
@see FileInputFormat]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.InputFormat -->
<!-- start class org.apache.hadoop.mapreduce.InputSplit -->
<class name="InputSplit" extends="java.lang.Object"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="InputSplit"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getLength" return="long"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Get the size of the split, so that the input splits can be sorted by size.
@return the number of bytes in the split
@throws IOException
@throws InterruptedException]]>
</doc>
</method>
<method name="getLocations" return="java.lang.String[]"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Get the list of nodes by name where the data for the split would be local.
The locations do not need to be serialized.
@return a new array of the node nodes.
@throws IOException
@throws InterruptedException]]>
</doc>
</method>
<method name="getLocationInfo" return="org.apache.hadoop.mapred.SplitLocationInfo[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Gets info about which nodes the input split is stored on and how it is
stored at each location.
@return list of <code>SplitLocationInfo</code>s describing how the split
data is stored at each location. A null value indicates that all the
locations have the data stored on disk.
@throws IOException]]>
</doc>
</method>
<doc>
<![CDATA[<code>InputSplit</code> represents the data to be processed by an
individual {@link Mapper}.
<p>Typically, it presents a byte-oriented view on the input and is the
responsibility of {@link RecordReader} of the job to process this and present
a record-oriented view.
@see InputFormat
@see RecordReader]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.InputSplit -->
<!-- start class org.apache.hadoop.mapreduce.Job -->
<class name="Job" extends="org.apache.hadoop.mapreduce.task.JobContextImpl"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapreduce.JobContext"/>
<implements name="java.lang.AutoCloseable"/>
<constructor name="Job"
static="false" final="false" visibility="public"
deprecated="Use {@link #getInstance()}">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[@deprecated Use {@link #getInstance()}]]>
</doc>
</constructor>
<constructor name="Job" type="org.apache.hadoop.conf.Configuration"
static="false" final="false" visibility="public"
deprecated="Use {@link #getInstance(Configuration)}">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[@deprecated Use {@link #getInstance(Configuration)}]]>
</doc>
</constructor>
<constructor name="Job" type="org.apache.hadoop.conf.Configuration, java.lang.String"
static="false" final="false" visibility="public"
deprecated="Use {@link #getInstance(Configuration, String)}">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[@deprecated Use {@link #getInstance(Configuration, String)}]]>
</doc>
</constructor>
<method name="getInstance" return="org.apache.hadoop.mapreduce.Job"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Creates a new {@link Job} with no particular {@link Cluster} .
A Cluster will be created with a generic {@link Configuration}.
@return the {@link Job} , with no connection to a cluster yet.
@throws IOException]]>
</doc>
</method>
<method name="getInstance" return="org.apache.hadoop.mapreduce.Job"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Creates a new {@link Job} with no particular {@link Cluster} and a
given {@link Configuration}.
The <code>Job</code> makes a copy of the <code>Configuration</code> so
that any necessary internal modifications do not reflect on the incoming
parameter.
A Cluster will be created from the conf parameter only when it's needed.
@param conf the configuration
@return the {@link Job} , with no connection to a cluster yet.
@throws IOException]]>
</doc>
</method>
<method name="getInstance" return="org.apache.hadoop.mapreduce.Job"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<param name="jobName" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Creates a new {@link Job} with no particular {@link Cluster} and a given jobName.
A Cluster will be created from the conf parameter only when it's needed.
The <code>Job</code> makes a copy of the <code>Configuration</code> so
that any necessary internal modifications do not reflect on the incoming
parameter.
@param conf the configuration
@return the {@link Job} , with no connection to a cluster yet.
@throws IOException]]>
</doc>
</method>
<method name="getInstance" return="org.apache.hadoop.mapreduce.Job"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="status" type="org.apache.hadoop.mapreduce.JobStatus"/>
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Creates a new {@link Job} with no particular {@link Cluster} and given
{@link Configuration} and {@link JobStatus}.
A Cluster will be created from the conf parameter only when it's needed.
The <code>Job</code> makes a copy of the <code>Configuration</code> so
that any necessary internal modifications do not reflect on the incoming
parameter.
@param status job status
@param conf job configuration
@return the {@link Job} , with no connection to a cluster yet.
@throws IOException]]>
</doc>
</method>
<method name="getInstance" return="org.apache.hadoop.mapreduce.Job"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="Use {@link #getInstance()}">
<param name="ignored" type="org.apache.hadoop.mapreduce.Cluster"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Creates a new {@link Job} with no particular {@link Cluster}.
A Cluster will be created from the conf parameter only when it's needed.
The <code>Job</code> makes a copy of the <code>Configuration</code> so
that any necessary internal modifications do not reflect on the incoming
parameter.
@param ignored
@return the {@link Job} , with no connection to a cluster yet.
@throws IOException
@deprecated Use {@link #getInstance()}]]>
</doc>
</method>
<method name="getInstance" return="org.apache.hadoop.mapreduce.Job"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="Use {@link #getInstance(Configuration)}">
<param name="ignored" type="org.apache.hadoop.mapreduce.Cluster"/>
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Creates a new {@link Job} with no particular {@link Cluster} and given
{@link Configuration}.
A Cluster will be created from the conf parameter only when it's needed.
The <code>Job</code> makes a copy of the <code>Configuration</code> so
that any necessary internal modifications do not reflect on the incoming
parameter.
@param ignored
@param conf job configuration
@return the {@link Job} , with no connection to a cluster yet.
@throws IOException
@deprecated Use {@link #getInstance(Configuration)}]]>
</doc>
</method>
<method name="getStatus" return="org.apache.hadoop.mapreduce.JobStatus"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<method name="getJobState" return="org.apache.hadoop.mapreduce.JobStatus.State"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Returns the current state of the Job.
@return JobStatus#State
@throws IOException
@throws InterruptedException]]>
</doc>
</method>
<method name="getTrackingURL" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the URL where some job progress information will be displayed.
@return the URL where some job progress information will be displayed.]]>
</doc>
</method>
<method name="getJobFile" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the path of the submitted job configuration.
@return the path of the submitted job configuration.]]>
</doc>
</method>
<method name="getStartTime" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get start time of the job.
@return the start time of the job]]>
</doc>
</method>
<method name="getFinishTime" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Get finish time of the job.
@return the finish time of the job]]>
</doc>
</method>
<method name="getSchedulingInfo" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get scheduling info of the job.
@return the scheduling info of the job]]>
</doc>
</method>
<method name="getPriority" return="org.apache.hadoop.mapreduce.JobPriority"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Get scheduling info of the job.
@return the priority info of the job]]>
</doc>
</method>
<method name="getJobName" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[The user-specified job name.]]>
</doc>
</method>
<method name="getHistoryUrl" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<method name="isRetired" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<method name="toString" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Dump stats to screen.]]>
</doc>
</method>
<method name="getTaskReports" return="org.apache.hadoop.mapreduce.TaskReport[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="type" type="org.apache.hadoop.mapreduce.TaskType"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Get the information of the current state of the tasks of a job.
@param type Type of the task
@return the list of all of the map tips.
@throws IOException]]>
</doc>
</method>
<method name="mapProgress" return="float"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get the <i>progress</i> of the job's map-tasks, as a float between 0.0
and 1.0. When all map tasks have completed, the function returns 1.0.
@return the progress of the job's map-tasks.
@throws IOException]]>
</doc>
</method>
<method name="reduceProgress" return="float"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get the <i>progress</i> of the job's reduce-tasks, as a float between 0.0
and 1.0. When all reduce tasks have completed, the function returns 1.0.
@return the progress of the job's reduce-tasks.
@throws IOException]]>
</doc>
</method>
<method name="cleanupProgress" return="float"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Get the <i>progress</i> of the job's cleanup-tasks, as a float between 0.0
and 1.0. When all cleanup tasks have completed, the function returns 1.0.
@return the progress of the job's cleanup-tasks.
@throws IOException]]>
</doc>
</method>
<method name="setupProgress" return="float"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get the <i>progress</i> of the job's setup-tasks, as a float between 0.0
and 1.0. When all setup tasks have completed, the function returns 1.0.
@return the progress of the job's setup-tasks.
@throws IOException]]>
</doc>
</method>
<method name="isComplete" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Check if the job is finished or not.
This is a non-blocking call.
@return <code>true</code> if the job is complete, else <code>false</code>.
@throws IOException]]>
</doc>
</method>
<method name="isSuccessful" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Check if the job completed successfully.
@return <code>true</code> if the job succeeded, else <code>false</code>.
@throws IOException]]>
</doc>
</method>
<method name="killJob"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Kill the running job. Blocks until all job tasks have been
killed as well. If the job is no longer running, it simply returns.
@throws IOException]]>
</doc>
</method>
<method name="setPriority"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobPriority" type="org.apache.hadoop.mapreduce.JobPriority"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Set the priority of a running job.
@param jobPriority the new priority for the job.
@throws IOException]]>
</doc>
</method>
<method name="setPriorityAsInteger"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobPriority" type="int"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Set the priority of a running job.
@param jobPriority
the new priority for the job.
@throws IOException]]>
</doc>
</method>
<method name="getTaskCompletionEvents" return="org.apache.hadoop.mapreduce.TaskCompletionEvent[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="startFrom" type="int"/>
<param name="numEvents" type="int"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Get events indicating completion (success/failure) of component tasks.
@param startFrom index to start fetching events from
@param numEvents number of events to fetch
@return an array of {@link TaskCompletionEvent}s
@throws IOException]]>
</doc>
</method>
<method name="getTaskCompletionEvents" return="org.apache.hadoop.mapred.TaskCompletionEvent[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="startFrom" type="int"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get events indicating completion (success/failure) of component tasks.
@param startFrom index to start fetching events from
@return an array of {@link org.apache.hadoop.mapred.TaskCompletionEvent}s
@throws IOException]]>
</doc>
</method>
<method name="killTask"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskId" type="org.apache.hadoop.mapreduce.TaskAttemptID"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Kill indicated task attempt.
@param taskId the id of the task to be terminated.
@throws IOException]]>
</doc>
</method>
<method name="failTask"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskId" type="org.apache.hadoop.mapreduce.TaskAttemptID"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Fail indicated task attempt.
@param taskId the id of the task to be terminated.
@throws IOException]]>
</doc>
</method>
<method name="getCounters" return="org.apache.hadoop.mapreduce.Counters"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Gets the counters for this job. May return null if the job has been
retired and the job is no longer in the completed job store.
@return the counters for this job.
@throws IOException]]>
</doc>
</method>
<method name="getTaskDiagnostics" return="java.lang.String[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskid" type="org.apache.hadoop.mapreduce.TaskAttemptID"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Gets the diagnostic messages for a given task attempt.
@param taskid
@return the list of diagnostic messages for the task
@throws IOException]]>
</doc>
</method>
<method name="setNumReduceTasks"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="tasks" type="int"/>
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
<doc>
<![CDATA[Set the number of reduce tasks for the job.
@param tasks the number of reduce tasks
@throws IllegalStateException if the job is submitted]]>
</doc>
</method>
<method name="setWorkingDirectory"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="dir" type="org.apache.hadoop.fs.Path"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Set the current working directory for the default file system.
@param dir the new current working directory.
@throws IllegalStateException if the job is submitted]]>
</doc>
</method>
<method name="setInputFormatClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="cls" type="java.lang.Class"/>
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
<doc>
<![CDATA[Set the {@link InputFormat} for the job.
@param cls the <code>InputFormat</code> to use
@throws IllegalStateException if the job is submitted]]>
</doc>
</method>
<method name="setOutputFormatClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="cls" type="java.lang.Class"/>
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
<doc>
<![CDATA[Set the {@link OutputFormat} for the job.
@param cls the <code>OutputFormat</code> to use
@throws IllegalStateException if the job is submitted]]>
</doc>
</method>
<method name="setMapperClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="cls" type="java.lang.Class"/>
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
<doc>
<![CDATA[Set the {@link Mapper} for the job.
@param cls the <code>Mapper</code> to use
@throws IllegalStateException if the job is submitted]]>
</doc>
</method>
<method name="setJarByClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="cls" type="java.lang.Class"/>
<doc>
<![CDATA[Set the Jar by finding where a given class came from.
@param cls the example class]]>
</doc>
</method>
<method name="setJar"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jar" type="java.lang.String"/>
<doc>
<![CDATA[Set the job jar]]>
</doc>
</method>
<method name="setUser"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="user" type="java.lang.String"/>
<doc>
<![CDATA[Set the reported username for this job.
@param user the username for this job.]]>
</doc>
</method>
<method name="setCombinerClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="cls" type="java.lang.Class"/>
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
<doc>
<![CDATA[Set the combiner class for the job.
@param cls the combiner to use
@throws IllegalStateException if the job is submitted]]>
</doc>
</method>
<method name="setReducerClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="cls" type="java.lang.Class"/>
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
<doc>
<![CDATA[Set the {@link Reducer} for the job.
@param cls the <code>Reducer</code> to use
@throws IllegalStateException if the job is submitted]]>
</doc>
</method>
<method name="setPartitionerClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="cls" type="java.lang.Class"/>
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
<doc>
<![CDATA[Set the {@link Partitioner} for the job.
@param cls the <code>Partitioner</code> to use
@throws IllegalStateException if the job is submitted]]>
</doc>
</method>
<method name="setMapOutputKeyClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="theClass" type="java.lang.Class"/>
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
<doc>
<![CDATA[Set the key class for the map output data. This allows the user to
specify the map output key class to be different than the final output
value class.
@param theClass the map output key class.
@throws IllegalStateException if the job is submitted]]>
</doc>
</method>
<method name="setMapOutputValueClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="theClass" type="java.lang.Class"/>
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
<doc>
<![CDATA[Set the value class for the map output data. This allows the user to
specify the map output value class to be different than the final output
value class.
@param theClass the map output value class.
@throws IllegalStateException if the job is submitted]]>
</doc>
</method>
<method name="setOutputKeyClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="theClass" type="java.lang.Class"/>
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
<doc>
<![CDATA[Set the key class for the job output data.
@param theClass the key class for the job output data.
@throws IllegalStateException if the job is submitted]]>
</doc>
</method>
<method name="setOutputValueClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="theClass" type="java.lang.Class"/>
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
<doc>
<![CDATA[Set the value class for job outputs.
@param theClass the value class for job outputs.
@throws IllegalStateException if the job is submitted]]>
</doc>
</method>
<method name="setCombinerKeyGroupingComparatorClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="cls" type="java.lang.Class"/>
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
<doc>
<![CDATA[Define the comparator that controls which keys are grouped together
for a single call to combiner,
{@link Reducer#reduce(Object, Iterable,
org.apache.hadoop.mapreduce.Reducer.Context)}
@param cls the raw comparator to use
@throws IllegalStateException if the job is submitted]]>
</doc>
</method>
<method name="setSortComparatorClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="cls" type="java.lang.Class"/>
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
<doc>
<![CDATA[Define the comparator that controls how the keys are sorted before they
are passed to the {@link Reducer}.
@param cls the raw comparator
@throws IllegalStateException if the job is submitted
@see #setCombinerKeyGroupingComparatorClass(Class)]]>
</doc>
</method>
<method name="setGroupingComparatorClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="cls" type="java.lang.Class"/>
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
<doc>
<![CDATA[Define the comparator that controls which keys are grouped together
for a single call to
{@link Reducer#reduce(Object, Iterable,
org.apache.hadoop.mapreduce.Reducer.Context)}
@param cls the raw comparator to use
@throws IllegalStateException if the job is submitted
@see #setCombinerKeyGroupingComparatorClass(Class)]]>
</doc>
</method>
<method name="setJobName"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="name" type="java.lang.String"/>
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
<doc>
<![CDATA[Set the user-specified job name.
@param name the job's new name.
@throws IllegalStateException if the job is submitted]]>
</doc>
</method>
<method name="setSpeculativeExecution"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="speculativeExecution" type="boolean"/>
<doc>
<![CDATA[Turn speculative execution on or off for this job.
@param speculativeExecution <code>true</code> if speculative execution
should be turned on, else <code>false</code>.]]>
</doc>
</method>
<method name="setMapSpeculativeExecution"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="speculativeExecution" type="boolean"/>
<doc>
<![CDATA[Turn speculative execution on or off for this job for map tasks.
@param speculativeExecution <code>true</code> if speculative execution
should be turned on for map tasks,
else <code>false</code>.]]>
</doc>
</method>
<method name="setReduceSpeculativeExecution"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="speculativeExecution" type="boolean"/>
<doc>
<![CDATA[Turn speculative execution on or off for this job for reduce tasks.
@param speculativeExecution <code>true</code> if speculative execution
should be turned on for reduce tasks,
else <code>false</code>.]]>
</doc>
</method>
<method name="setJobSetupCleanupNeeded"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="needed" type="boolean"/>
<doc>
<![CDATA[Specify whether job-setup and job-cleanup is needed for the job
@param needed If <code>true</code>, job-setup and job-cleanup will be
considered from {@link OutputCommitter}
else ignored.]]>
</doc>
</method>
<method name="setCacheArchives"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="archives" type="java.net.URI[]"/>
<doc>
<![CDATA[Set the given set of archives
@param archives The list of archives that need to be localized]]>
</doc>
</method>
<method name="setCacheFiles"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="files" type="java.net.URI[]"/>
<doc>
<![CDATA[Set the given set of files
@param files The list of files that need to be localized]]>
</doc>
</method>
<method name="addCacheArchive"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="uri" type="java.net.URI"/>
<doc>
<![CDATA[Add a archives to be localized
@param uri The uri of the cache to be localized]]>
</doc>
</method>
<method name="addCacheFile"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="uri" type="java.net.URI"/>
<doc>
<![CDATA[Add a file to be localized
@param uri The uri of the cache to be localized]]>
</doc>
</method>
<method name="addFileToClassPath"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="file" type="org.apache.hadoop.fs.Path"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Add an file path to the current set of classpath entries It adds the file
to cache as well.
Files added with this method will not be unpacked while being added to the
classpath.
To add archives to classpath, use the {@link #addArchiveToClassPath(Path)}
method instead.
@param file Path of the file to be added]]>
</doc>
</method>
<method name="addArchiveToClassPath"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="archive" type="org.apache.hadoop.fs.Path"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Add an archive path to the current set of classpath entries. It adds the
archive to cache as well.
Archive files will be unpacked and added to the classpath
when being distributed.
@param archive Path of the archive to be added]]>
</doc>
</method>
<method name="createSymlink"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Originally intended to enable symlinks, but currently symlinks cannot be
disabled.]]>
</doc>
</method>
<method name="setMaxMapAttempts"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="n" type="int"/>
<doc>
<![CDATA[Expert: Set the number of maximum attempts that will be made to run a
map task.
@param n the number of attempts per map task.]]>
</doc>
</method>
<method name="setMaxReduceAttempts"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="n" type="int"/>
<doc>
<![CDATA[Expert: Set the number of maximum attempts that will be made to run a
reduce task.
@param n the number of attempts per reduce task.]]>
</doc>
</method>
<method name="setProfileEnabled"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="newValue" type="boolean"/>
<doc>
<![CDATA[Set whether the system should collect profiler information for some of
the tasks in this job? The information is stored in the user log
directory.
@param newValue true means it should be gathered]]>
</doc>
</method>
<method name="setProfileParams"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="value" type="java.lang.String"/>
<doc>
<![CDATA[Set the profiler configuration arguments. If the string contains a '%s' it
will be replaced with the name of the profiling output file when the task
runs.
This value is passed to the task child JVM on the command line.
@param value the configuration string]]>
</doc>
</method>
<method name="setProfileTaskRange"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="isMap" type="boolean"/>
<param name="newValue" type="java.lang.String"/>
<doc>
<![CDATA[Set the ranges of maps or reduces to profile. setProfileEnabled(true)
must also be called.
@param newValue a set of integer ranges of the map ids]]>
</doc>
</method>
<method name="setCancelDelegationTokenUponJobCompletion"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="value" type="boolean"/>
<doc>
<![CDATA[Sets the flag that will allow the JobTracker to cancel the HDFS delegation
tokens upon job completion. Defaults to true.]]>
</doc>
</method>
<method name="addFileToSharedCache" return="boolean"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="resource" type="java.net.URI"/>
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<doc>
<![CDATA[Add a file to job config for shared cache processing. If shared cache is
enabled, it will return true, otherwise, return false. We don't check with
SCM here given application might not be able to provide the job id;
ClientSCMProtocol.use requires the application id. Job Submitter will read
the files from job config and take care of things.
@param resource The resource that Job Submitter will process later using
shared cache.
@param conf Configuration to add the resource to
@return whether the resource has been added to the configuration]]>
</doc>
</method>
<method name="addFileToSharedCacheAndClasspath" return="boolean"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="resource" type="java.net.URI"/>
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<doc>
<![CDATA[Add a file to job config for shared cache processing. If shared cache is
enabled, it will return true, otherwise, return false. We don't check with
SCM here given application might not be able to provide the job id;
ClientSCMProtocol.use requires the application id. Job Submitter will read
the files from job config and take care of things. Job Submitter will also
add the file to classpath. Intended to be used by user code.
@param resource The resource that Job Submitter will process later using
shared cache.
@param conf Configuration to add the resource to
@return whether the resource has been added to the configuration]]>
</doc>
</method>
<method name="addArchiveToSharedCache" return="boolean"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="resource" type="java.net.URI"/>
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<doc>
<![CDATA[Add an archive to job config for shared cache processing. If shared cache
is enabled, it will return true, otherwise, return false. We don't check
with SCM here given application might not be able to provide the job id;
ClientSCMProtocol.use requires the application id. Job Submitter will read
the files from job config and take care of things. Intended to be used by
user code.
@param resource The resource that Job Submitter will process later using
shared cache.
@param conf Configuration to add the resource to
@return whether the resource has been added to the configuration]]>
</doc>
</method>
<method name="setFileSharedCacheUploadPolicies"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<param name="policies" type="java.util.Map"/>
<doc>
<![CDATA[This is to set the shared cache upload policies for files. If the parameter
was previously set, this method will replace the old value with the new
provided map.
@param conf Configuration which stores the shared cache upload policies
@param policies A map containing the shared cache upload policies for a set
of resources. The key is the url of the resource and the value is
the upload policy. True if it should be uploaded, false otherwise.]]>
</doc>
</method>
<method name="setArchiveSharedCacheUploadPolicies"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<param name="policies" type="java.util.Map"/>
<doc>
<![CDATA[This is to set the shared cache upload policies for archives. If the
parameter was previously set, this method will replace the old value with
the new provided map.
@param conf Configuration which stores the shared cache upload policies
@param policies A map containing the shared cache upload policies for a set
of resources. The key is the url of the resource and the value is
the upload policy. True if it should be uploaded, false otherwise.]]>
</doc>
</method>
<method name="getFileSharedCacheUploadPolicies" return="java.util.Map"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<doc>
<![CDATA[This is to get the shared cache upload policies for files.
@param conf Configuration which stores the shared cache upload policies
@return A map containing the shared cache upload policies for a set of
resources. The key is the url of the resource and the value is the
upload policy. True if it should be uploaded, false otherwise.]]>
</doc>
</method>
<method name="getArchiveSharedCacheUploadPolicies" return="java.util.Map"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<doc>
<![CDATA[This is to get the shared cache upload policies for archives.
@param conf Configuration which stores the shared cache upload policies
@return A map containing the shared cache upload policies for a set of
resources. The key is the url of the resource and the value is the
upload policy. True if it should be uploaded, false otherwise.]]>
</doc>
</method>
<method name="submit"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
<doc>
<![CDATA[Submit the job to the cluster and return immediately.
@throws IOException]]>
</doc>
</method>
<method name="waitForCompletion" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="verbose" type="boolean"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
<doc>
<![CDATA[Submit the job to the cluster and wait for it to finish.
@param verbose print the progress to the user
@return true if the job succeeded
@throws IOException thrown if the communication with the
<code>JobTracker</code> is lost]]>
</doc>
</method>
<method name="monitorAndPrintJob" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Monitor a job and print status in real-time as progress is made and tasks
fail.
@return true if the job succeeded
@throws IOException if communication to the JobTracker fails]]>
</doc>
</method>
<method name="getProgressPollInterval" return="int"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<doc>
<![CDATA[The interval at which monitorAndPrintJob() prints status]]>
</doc>
</method>
<method name="getCompletionPollInterval" return="int"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<doc>
<![CDATA[The interval at which waitForCompletion() should check.]]>
</doc>
</method>
<method name="getTaskOutputFilter" return="org.apache.hadoop.mapreduce.Job.TaskStatusFilter"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<doc>
<![CDATA[Get the task output filter.
@param conf the configuration.
@return the filter level.]]>
</doc>
</method>
<method name="setTaskOutputFilter"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<param name="newValue" type="org.apache.hadoop.mapreduce.Job.TaskStatusFilter"/>
<doc>
<![CDATA[Modify the Configuration to set the task output filter.
@param conf the Configuration to modify.
@param newValue the value to set.]]>
</doc>
</method>
<method name="isUber" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<method name="getReservationId" return="org.apache.hadoop.yarn.api.records.ReservationId"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the reservation to which the job is submitted to, if any
@return the reservationId the identifier of the job's reservation, null if
the job does not have any reservation associated with it]]>
</doc>
</method>
<method name="setReservationId"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="reservationId" type="org.apache.hadoop.yarn.api.records.ReservationId"/>
<doc>
<![CDATA[Set the reservation to which the job is submitted to
@param reservationId the reservationId to set]]>
</doc>
</method>
<method name="close"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Close the <code>Job</code>.
@throws IOException if fail to close.]]>
</doc>
</method>
<field name="OUTPUT_FILTER" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="COMPLETION_POLL_INTERVAL_KEY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Key in mapred-*.xml that sets completionPollInvervalMillis]]>
</doc>
</field>
<field name="PROGRESS_MONITOR_POLL_INTERVAL_KEY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Key in mapred-*.xml that sets progMonitorPollIntervalMillis]]>
</doc>
</field>
<field name="USED_GENERIC_PARSER" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="SUBMIT_REPLICATION" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="DEFAULT_SUBMIT_REPLICATION" type="int"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="USE_WILDCARD_FOR_LIBJARS" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="DEFAULT_USE_WILDCARD_FOR_LIBJARS" type="boolean"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[The job submitter's view of the Job.
<p>It allows the user to configure the
job, submit it, control its execution, and query the state. The set methods
only work until the job is submitted, afterwards they will throw an
IllegalStateException. </p>
<p>
Normally the user creates the application, describes various facets of the
job via {@link Job} and then submits the job and monitor its progress.</p>
<p>Here is an example on how to submit a job:</p>
<p><blockquote><pre>
// Create a new Job
Job job = Job.getInstance();
job.setJarByClass(MyJob.class);
// Specify various job-specific parameters
job.setJobName("myjob");
job.setInputPath(new Path("in"));
job.setOutputPath(new Path("out"));
job.setMapperClass(MyJob.MyMapper.class);
job.setReducerClass(MyJob.MyReducer.class);
// Submit the job, then poll for progress until the job is complete
job.waitForCompletion(true);
</pre></blockquote>]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.Job -->
<!-- start interface org.apache.hadoop.mapreduce.JobContext -->
<interface name="JobContext" abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapreduce.MRJobConfig"/>
<method name="getConfiguration" return="org.apache.hadoop.conf.Configuration"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Return the configuration for the job.
@return the shared configuration object]]>
</doc>
</method>
<method name="getCredentials" return="org.apache.hadoop.security.Credentials"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get credentials for the job.
@return credentials for the job]]>
</doc>
</method>
<method name="getJobID" return="org.apache.hadoop.mapreduce.JobID"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the unique ID for the job.
@return the object with the job id]]>
</doc>
</method>
<method name="getNumReduceTasks" return="int"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get configured the number of reduce tasks for this job. Defaults to
<code>1</code>.
@return the number of reduce tasks for this job.]]>
</doc>
</method>
<method name="getWorkingDirectory" return="org.apache.hadoop.fs.Path"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get the current working directory for the default file system.
@return the directory name.]]>
</doc>
</method>
<method name="getOutputKeyClass" return="java.lang.Class"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the key class for the job output data.
@return the key class for the job output data.]]>
</doc>
</method>
<method name="getOutputValueClass" return="java.lang.Class"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the value class for job outputs.
@return the value class for job outputs.]]>
</doc>
</method>
<method name="getMapOutputKeyClass" return="java.lang.Class"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the key class for the map output data. If it is not set, use the
(final) output key class. This allows the map output key class to be
different than the final output key class.
@return the map output key class.]]>
</doc>
</method>
<method name="getMapOutputValueClass" return="java.lang.Class"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the value class for the map output data. If it is not set, use the
(final) output value class This allows the map output value class to be
different than the final output value class.
@return the map output value class.]]>
</doc>
</method>
<method name="getJobName" return="java.lang.String"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the user-specified job name. This is only used to identify the
job to the user.
@return the job's name, defaulting to "".]]>
</doc>
</method>
<method name="getInputFormatClass" return="java.lang.Class"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
<doc>
<![CDATA[Get the {@link InputFormat} class for the job.
@return the {@link InputFormat} class for the job.]]>
</doc>
</method>
<method name="getMapperClass" return="java.lang.Class"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
<doc>
<![CDATA[Get the {@link Mapper} class for the job.
@return the {@link Mapper} class for the job.]]>
</doc>
</method>
<method name="getCombinerClass" return="java.lang.Class"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
<doc>
<![CDATA[Get the combiner class for the job.
@return the combiner class for the job.]]>
</doc>
</method>
<method name="getReducerClass" return="java.lang.Class"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
<doc>
<![CDATA[Get the {@link Reducer} class for the job.
@return the {@link Reducer} class for the job.]]>
</doc>
</method>
<method name="getOutputFormatClass" return="java.lang.Class"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
<doc>
<![CDATA[Get the {@link OutputFormat} class for the job.
@return the {@link OutputFormat} class for the job.]]>
</doc>
</method>
<method name="getPartitionerClass" return="java.lang.Class"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
<doc>
<![CDATA[Get the {@link Partitioner} class for the job.
@return the {@link Partitioner} class for the job.]]>
</doc>
</method>
<method name="getSortComparator" return="org.apache.hadoop.io.RawComparator"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the {@link RawComparator} comparator used to compare keys.
@return the {@link RawComparator} comparator used to compare keys.]]>
</doc>
</method>
<method name="getJar" return="java.lang.String"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the pathname of the job's jar.
@return the pathname]]>
</doc>
</method>
<method name="getCombinerKeyGroupingComparator" return="org.apache.hadoop.io.RawComparator"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the user defined {@link RawComparator} comparator for
grouping keys of inputs to the combiner.
@return comparator set by the user for grouping values.
@see Job#setCombinerKeyGroupingComparatorClass(Class)]]>
</doc>
</method>
<method name="getGroupingComparator" return="org.apache.hadoop.io.RawComparator"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the user defined {@link RawComparator} comparator for
grouping keys of inputs to the reduce.
@return comparator set by the user for grouping values.
@see Job#setGroupingComparatorClass(Class)
@see #getCombinerKeyGroupingComparator()]]>
</doc>
</method>
<method name="getJobSetupCleanupNeeded" return="boolean"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get whether job-setup and job-cleanup is needed for the job
@return boolean]]>
</doc>
</method>
<method name="getTaskCleanupNeeded" return="boolean"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get whether task-cleanup is needed for the job
@return boolean]]>
</doc>
</method>
<method name="getProfileEnabled" return="boolean"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get whether the task profiling is enabled.
@return true if some tasks will be profiled]]>
</doc>
</method>
<method name="getProfileParams" return="java.lang.String"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the profiler configuration arguments.
The default value for this property is
"-agentlib:hprof=cpu=samples,heap=sites,force=n,thread=y,verbose=n,file=%s"
@return the parameters to pass to the task child to configure profiling]]>
</doc>
</method>
<method name="getProfileTaskRange" return="org.apache.hadoop.conf.Configuration.IntegerRanges"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="isMap" type="boolean"/>
<doc>
<![CDATA[Get the range of maps or reduces to profile.
@param isMap is the task a map?
@return the task ranges]]>
</doc>
</method>
<method name="getUser" return="java.lang.String"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the reported username for this job.
@return the username]]>
</doc>
</method>
<method name="getSymlink" return="boolean"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Originally intended to check if symlinks should be used, but currently
symlinks cannot be disabled.
@return true]]>
</doc>
</method>
<method name="getArchiveClassPaths" return="org.apache.hadoop.fs.Path[]"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the archive entries in classpath as an array of Path]]>
</doc>
</method>
<method name="getCacheArchives" return="java.net.URI[]"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get cache archives set in the Configuration
@return A URI array of the caches set in the Configuration
@throws IOException]]>
</doc>
</method>
<method name="getCacheFiles" return="java.net.URI[]"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get cache files set in the Configuration
@return A URI array of the files set in the Configuration
@throws IOException]]>
</doc>
</method>
<method name="getLocalCacheArchives" return="org.apache.hadoop.fs.Path[]"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="the array returned only includes the items the were
downloaded. There is no way to map this to what is returned by
{@link #getCacheArchives()}.">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Return the path array of the localized caches
@return A path array of localized caches
@throws IOException
@deprecated the array returned only includes the items the were
downloaded. There is no way to map this to what is returned by
{@link #getCacheArchives()}.]]>
</doc>
</method>
<method name="getLocalCacheFiles" return="org.apache.hadoop.fs.Path[]"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="the array returned only includes the items the were
downloaded. There is no way to map this to what is returned by
{@link #getCacheFiles()}.">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Return the path array of the localized files
@return A path array of localized files
@throws IOException
@deprecated the array returned only includes the items the were
downloaded. There is no way to map this to what is returned by
{@link #getCacheFiles()}.]]>
</doc>
</method>
<method name="getFileClassPaths" return="org.apache.hadoop.fs.Path[]"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the file entries in classpath as an array of Path]]>
</doc>
</method>
<method name="getArchiveTimestamps" return="java.lang.String[]"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the timestamps of the archives. Used by internal
DistributedCache and MapReduce code.
@return a string array of timestamps]]>
</doc>
</method>
<method name="getFileTimestamps" return="java.lang.String[]"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the timestamps of the files. Used by internal
DistributedCache and MapReduce code.
@return a string array of timestamps]]>
</doc>
</method>
<method name="getMaxMapAttempts" return="int"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the configured number of maximum attempts that will be made to run a
map task, as specified by the <code>mapred.map.max.attempts</code>
property. If this property is not already set, the default is 4 attempts.
@return the max number of attempts per map task.]]>
</doc>
</method>
<method name="getMaxReduceAttempts" return="int"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the configured number of maximum attempts that will be made to run a
reduce task, as specified by the <code>mapred.reduce.max.attempts</code>
property. If this property is not already set, the default is 4 attempts.
@return the max number of attempts per reduce task.]]>
</doc>
</method>
<doc>
<![CDATA[A read-only view of the job that is provided to the tasks while they
are running.]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapreduce.JobContext -->
<!-- start class org.apache.hadoop.mapreduce.JobCounter -->
<class name="JobCounter" extends="java.lang.Enum"
abstract="false"
static="false" final="true" visibility="public"
deprecated="not deprecated">
<method name="values" return="org.apache.hadoop.mapreduce.JobCounter[]"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="valueOf" return="org.apache.hadoop.mapreduce.JobCounter"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="name" type="java.lang.String"/>
</method>
</class>
<!-- end class org.apache.hadoop.mapreduce.JobCounter -->
<!-- start class org.apache.hadoop.mapreduce.JobID -->
<class name="JobID" extends="org.apache.hadoop.mapred.ID"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="java.lang.Comparable"/>
<constructor name="JobID" type="java.lang.String, int"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Constructs a JobID object
@param jtIdentifier jobTracker identifier
@param id job number]]>
</doc>
</constructor>
<constructor name="JobID"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getJtIdentifier" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="equals" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="o" type="java.lang.Object"/>
</method>
<method name="compareTo" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="o" type="org.apache.hadoop.mapreduce.ID"/>
<doc>
<![CDATA[Compare JobIds by first jtIdentifiers, then by job numbers]]>
</doc>
</method>
<method name="appendTo" return="java.lang.StringBuilder"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="builder" type="java.lang.StringBuilder"/>
<doc>
<![CDATA[Add the stuff after the "job" prefix to the given builder. This is useful,
because the sub-ids use this substring at the start of their string.
@param builder the builder to append to
@return the builder that was passed in]]>
</doc>
</method>
<method name="hashCode" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="toString" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="readFields"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type="java.io.DataInput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="write"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="out" type="java.io.DataOutput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="forName" return="org.apache.hadoop.mapreduce.JobID"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="str" type="java.lang.String"/>
<exception name="IllegalArgumentException" type="java.lang.IllegalArgumentException"/>
<doc>
<![CDATA[Construct a JobId object from given string
@return constructed JobId object or null if the given String is null
@throws IllegalArgumentException if the given string is malformed]]>
</doc>
</method>
<field name="JOB" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="JOBID_REGEX" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="idFormat" type="java.text.NumberFormat"
transient="false" volatile="false"
static="true" final="true" visibility="protected"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[JobID represents the immutable and unique identifier for
the job. JobID consists of two parts. First part
represents the jobtracker identifier, so that jobID to jobtracker map
is defined. For cluster setup this string is the jobtracker
start time, for local setting, it is "local" and a random number.
Second part of the JobID is the job number. <br>
An example JobID is :
<code>job_200707121733_0003</code> , which represents the third job
running at the jobtracker started at <code>200707121733</code>.
<p>
Applications should never construct or parse JobID strings, but rather
use appropriate constructors or {@link #forName(String)} method.
@see TaskID
@see TaskAttemptID]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.JobID -->
<!-- start class org.apache.hadoop.mapreduce.JobPriority -->
<class name="JobPriority" extends="java.lang.Enum"
abstract="false"
static="false" final="true" visibility="public"
deprecated="not deprecated">
<method name="values" return="org.apache.hadoop.mapreduce.JobPriority[]"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="valueOf" return="org.apache.hadoop.mapreduce.JobPriority"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="name" type="java.lang.String"/>
</method>
<doc>
<![CDATA[Used to describe the priority of the running job.
DEFAULT : While submitting a job, if the user is not specifying priority,
YARN has the capability to pick the default priority as per its config.
Hence MapReduce can indicate such cases with this new enum.
UNDEFINED_PRIORITY : YARN supports priority as an integer. Hence other than
the five defined enums, YARN can consider other integers also. To generalize
such cases, this specific enum is used.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.JobPriority -->
<!-- start class org.apache.hadoop.mapreduce.JobStatus -->
<class name="JobStatus" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.io.Writable"/>
<implements name="java.lang.Cloneable"/>
<constructor name="JobStatus"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<constructor name="JobStatus" type="org.apache.hadoop.mapreduce.JobID, float, float, float, float, org.apache.hadoop.mapreduce.JobStatus.State, org.apache.hadoop.mapreduce.JobPriority, java.lang.String, java.lang.String, java.lang.String, java.lang.String"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Create a job status object for a given jobid.
@param jobid The jobid of the job
@param setupProgress The progress made on the setup
@param mapProgress The progress made on the maps
@param reduceProgress The progress made on the reduces
@param cleanupProgress The progress made on the cleanup
@param runState The current state of the job
@param jp Priority of the job.
@param user userid of the person who submitted the job.
@param jobName user-specified job name.
@param jobFile job configuration file.
@param trackingUrl link to the web-ui for details of the job.]]>
</doc>
</constructor>
<constructor name="JobStatus" type="org.apache.hadoop.mapreduce.JobID, float, float, float, float, org.apache.hadoop.mapreduce.JobStatus.State, org.apache.hadoop.mapreduce.JobPriority, java.lang.String, java.lang.String, java.lang.String, java.lang.String, java.lang.String"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Create a job status object for a given jobid.
@param jobid The jobid of the job
@param setupProgress The progress made on the setup
@param mapProgress The progress made on the maps
@param reduceProgress The progress made on the reduces
@param cleanupProgress The progress made on the cleanup
@param runState The current state of the job
@param jp Priority of the job.
@param user userid of the person who submitted the job.
@param jobName user-specified job name.
@param queue queue name
@param jobFile job configuration file.
@param trackingUrl link to the web-ui for details of the job.]]>
</doc>
</constructor>
<constructor name="JobStatus" type="org.apache.hadoop.mapreduce.JobID, float, float, float, float, org.apache.hadoop.mapreduce.JobStatus.State, org.apache.hadoop.mapreduce.JobPriority, java.lang.String, java.lang.String, java.lang.String, java.lang.String, java.lang.String, boolean"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Create a job status object for a given jobid.
@param jobid The jobid of the job
@param setupProgress The progress made on the setup
@param mapProgress The progress made on the maps
@param reduceProgress The progress made on the reduces
@param cleanupProgress The progress made on the cleanup
@param runState The current state of the job
@param jp Priority of the job.
@param user userid of the person who submitted the job.
@param jobName user-specified job name.
@param queue queue name
@param jobFile job configuration file.
@param trackingUrl link to the web-ui for details of the job.
@param isUber Whether job running in uber mode]]>
</doc>
</constructor>
<constructor name="JobStatus" type="org.apache.hadoop.mapreduce.JobID, float, float, float, float, org.apache.hadoop.mapreduce.JobStatus.State, org.apache.hadoop.mapreduce.JobPriority, java.lang.String, java.lang.String, java.lang.String, java.lang.String, java.lang.String, boolean, java.lang.String"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Create a job status object for a given jobid.
@param jobid The jobid of the job
@param setupProgress The progress made on the setup
@param mapProgress The progress made on the maps
@param reduceProgress The progress made on the reduces
@param cleanupProgress The progress made on the cleanup
@param runState The current state of the job
@param jp Priority of the job.
@param user userid of the person who submitted the job.
@param jobName user-specified job name.
@param queue queue name
@param jobFile job configuration file.
@param trackingUrl link to the web-ui for details of the job.
@param isUber Whether job running in uber mode
@param historyFile history file]]>
</doc>
</constructor>
<method name="setMapProgress"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="p" type="float"/>
<doc>
<![CDATA[Sets the map progress of this job
@param p The value of map progress to set to]]>
</doc>
</method>
<method name="setCleanupProgress"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="p" type="float"/>
<doc>
<![CDATA[Sets the cleanup progress of this job
@param p The value of cleanup progress to set to]]>
</doc>
</method>
<method name="setSetupProgress"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="p" type="float"/>
<doc>
<![CDATA[Sets the setup progress of this job
@param p The value of setup progress to set to]]>
</doc>
</method>
<method name="setReduceProgress"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="p" type="float"/>
<doc>
<![CDATA[Sets the reduce progress of this Job
@param p The value of reduce progress to set to]]>
</doc>
</method>
<method name="setPriority"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="jp" type="org.apache.hadoop.mapreduce.JobPriority"/>
<doc>
<![CDATA[Set the priority of the job, defaulting to NORMAL.
@param jp new job priority]]>
</doc>
</method>
<method name="setFinishTime"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="finishTime" type="long"/>
<doc>
<![CDATA[Set the finish time of the job
@param finishTime The finishTime of the job]]>
</doc>
</method>
<method name="setHistoryFile"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="historyFile" type="java.lang.String"/>
<doc>
<![CDATA[Set the job history file url for a completed job]]>
</doc>
</method>
<method name="setTrackingUrl"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="trackingUrl" type="java.lang.String"/>
<doc>
<![CDATA[Set the link to the web-ui for details of the job.]]>
</doc>
</method>
<method name="setRetired"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<doc>
<![CDATA[Set the job retire flag to true.]]>
</doc>
</method>
<method name="setState"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="state" type="org.apache.hadoop.mapreduce.JobStatus.State"/>
<doc>
<![CDATA[Change the current run state of the job.]]>
</doc>
</method>
<method name="setStartTime"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="startTime" type="long"/>
<doc>
<![CDATA[Set the start time of the job
@param startTime The startTime of the job]]>
</doc>
</method>
<method name="setUsername"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="userName" type="java.lang.String"/>
<doc>
<![CDATA[@param userName The username of the job]]>
</doc>
</method>
<method name="setSchedulingInfo"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="schedulingInfo" type="java.lang.String"/>
<doc>
<![CDATA[Used to set the scheduling information associated to a particular Job.
@param schedulingInfo Scheduling information of the job]]>
</doc>
</method>
<method name="setJobACLs"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="acls" type="java.util.Map"/>
<doc>
<![CDATA[Set the job acls.
@param acls {@link Map} from {@link JobACL} to {@link AccessControlList}]]>
</doc>
</method>
<method name="setQueue"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="queue" type="java.lang.String"/>
<doc>
<![CDATA[Set queue name
@param queue queue name]]>
</doc>
</method>
<method name="setFailureInfo"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="failureInfo" type="java.lang.String"/>
<doc>
<![CDATA[Set diagnostic information.
@param failureInfo diagnostic information]]>
</doc>
</method>
<method name="getQueue" return="java.lang.String"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get queue name
@return queue name]]>
</doc>
</method>
<method name="getMapProgress" return="float"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return Percentage of progress in maps]]>
</doc>
</method>
<method name="getCleanupProgress" return="float"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return Percentage of progress in cleanup]]>
</doc>
</method>
<method name="getSetupProgress" return="float"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return Percentage of progress in setup]]>
</doc>
</method>
<method name="getReduceProgress" return="float"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return Percentage of progress in reduce]]>
</doc>
</method>
<method name="getState" return="org.apache.hadoop.mapreduce.JobStatus.State"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return running state of the job]]>
</doc>
</method>
<method name="getStartTime" return="long"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return start time of the job]]>
</doc>
</method>
<method name="clone" return="java.lang.Object"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getJobID" return="org.apache.hadoop.mapreduce.JobID"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return The jobid of the Job]]>
</doc>
</method>
<method name="getUsername" return="java.lang.String"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the username of the job]]>
</doc>
</method>
<method name="getSchedulingInfo" return="java.lang.String"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Gets the Scheduling information associated to a particular Job.
@return the scheduling information of the job]]>
</doc>
</method>
<method name="getJobACLs" return="java.util.Map"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the job acls.
@return a {@link Map} from {@link JobACL} to {@link AccessControlList}]]>
</doc>
</method>
<method name="getPriority" return="org.apache.hadoop.mapreduce.JobPriority"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Return the priority of the job
@return job priority]]>
</doc>
</method>
<method name="getFailureInfo" return="java.lang.String"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Gets any available info on the reason of failure of the job.
@return diagnostic information on why a job might have failed.]]>
</doc>
</method>
<method name="isJobComplete" return="boolean"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns true if the status is for a completed job.]]>
</doc>
</method>
<method name="write"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="out" type="java.io.DataOutput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="readFields"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type="java.io.DataInput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getJobName" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the user-specified job name.]]>
</doc>
</method>
<method name="getJobFile" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the configuration file for the job.]]>
</doc>
</method>
<method name="getTrackingUrl" return="java.lang.String"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the link to the web-ui for details of the job.]]>
</doc>
</method>
<method name="getFinishTime" return="long"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the finish time of the job.]]>
</doc>
</method>
<method name="isRetired" return="boolean"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Check whether the job has retired.]]>
</doc>
</method>
<method name="getHistoryFile" return="java.lang.String"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the job history file name for a completed job. If job is not
completed or history file not available then return null.]]>
</doc>
</method>
<method name="getNumUsedSlots" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return number of used mapred slots]]>
</doc>
</method>
<method name="setNumUsedSlots"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="n" type="int"/>
<doc>
<![CDATA[@param n number of used mapred slots]]>
</doc>
</method>
<method name="getNumReservedSlots" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the number of reserved slots]]>
</doc>
</method>
<method name="setNumReservedSlots"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="n" type="int"/>
<doc>
<![CDATA[@param n the number of reserved slots]]>
</doc>
</method>
<method name="getUsedMem" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the used memory]]>
</doc>
</method>
<method name="setUsedMem"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="m" type="int"/>
<doc>
<![CDATA[@param m the used memory]]>
</doc>
</method>
<method name="getReservedMem" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the reserved memory]]>
</doc>
</method>
<method name="setReservedMem"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="r" type="int"/>
<doc>
<![CDATA[@param r the reserved memory]]>
</doc>
</method>
<method name="getNeededMem" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the needed memory]]>
</doc>
</method>
<method name="setNeededMem"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="n" type="int"/>
<doc>
<![CDATA[@param n the needed memory]]>
</doc>
</method>
<method name="isUber" return="boolean"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Whether job running in uber mode
@return job in uber-mode]]>
</doc>
</method>
<method name="setUber"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="isUber" type="boolean"/>
<doc>
<![CDATA[Set uber-mode flag
@param isUber Whether job running in uber-mode]]>
</doc>
</method>
<method name="toString" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<doc>
<![CDATA[Describes the current status of a job.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.JobStatus -->
<!-- start interface org.apache.hadoop.mapreduce.MapContext -->
<interface name="MapContext" abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapreduce.TaskInputOutputContext"/>
<method name="getInputSplit" return="org.apache.hadoop.mapreduce.InputSplit"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the input split for this map.]]>
</doc>
</method>
<doc>
<![CDATA[The context that is given to the {@link Mapper}.
@param <KEYIN> the key input type to the Mapper
@param <VALUEIN> the value input type to the Mapper
@param <KEYOUT> the key output type from the Mapper
@param <VALUEOUT> the value output type from the Mapper]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapreduce.MapContext -->
<!-- start class org.apache.hadoop.mapreduce.Mapper -->
<class name="Mapper" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="Mapper"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="setup"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Called once at the beginning of the task.]]>
</doc>
</method>
<method name="map"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="key" type="KEYIN"/>
<param name="value" type="VALUEIN"/>
<param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Called once for each key/value pair in the input split. Most applications
should override this, but the default is the identity function.]]>
</doc>
</method>
<method name="cleanup"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Called once at the end of the task.]]>
</doc>
</method>
<method name="run"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Expert users can override this method for more complete control over the
execution of the Mapper.
@param context
@throws IOException]]>
</doc>
</method>
<doc>
<![CDATA[Maps input key/value pairs to a set of intermediate key/value pairs.
<p>Maps are the individual tasks which transform input records into a
intermediate records. The transformed intermediate records need not be of
the same type as the input records. A given input pair may map to zero or
many output pairs.</p>
<p>The Hadoop Map-Reduce framework spawns one map task for each
{@link InputSplit} generated by the {@link InputFormat} for the job.
<code>Mapper</code> implementations can access the {@link Configuration} for
the job via the {@link JobContext#getConfiguration()}.
<p>The framework first calls
{@link #setup(org.apache.hadoop.mapreduce.Mapper.Context)}, followed by
{@link #map(Object, Object, org.apache.hadoop.mapreduce.Mapper.Context)}
for each key/value pair in the <code>InputSplit</code>. Finally
{@link #cleanup(org.apache.hadoop.mapreduce.Mapper.Context)} is called.</p>
<p>All intermediate values associated with a given output key are
subsequently grouped by the framework, and passed to a {@link Reducer} to
determine the final output. Users can control the sorting and grouping by
specifying two key {@link RawComparator} classes.</p>
<p>The <code>Mapper</code> outputs are partitioned per
<code>Reducer</code>. Users can control which keys (and hence records) go to
which <code>Reducer</code> by implementing a custom {@link Partitioner}.
<p>Users can optionally specify a <code>combiner</code>, via
{@link Job#setCombinerClass(Class)}, to perform local aggregation of the
intermediate outputs, which helps to cut down the amount of data transferred
from the <code>Mapper</code> to the <code>Reducer</code>.
<p>Applications can specify if and how the intermediate
outputs are to be compressed and which {@link CompressionCodec}s are to be
used via the <code>Configuration</code>.</p>
<p>If the job has zero
reduces then the output of the <code>Mapper</code> is directly written
to the {@link OutputFormat} without sorting by keys.</p>
<p>Example:</p>
<p><blockquote><pre>
public class TokenCounterMapper
extends Mapper&lt;Object, Text, Text, IntWritable&gt;{
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
StringTokenizer itr = new StringTokenizer(value.toString());
while (itr.hasMoreTokens()) {
word.set(itr.nextToken());
context.write(word, one);
}
}
}
</pre></blockquote>
<p>Applications may override the
{@link #run(org.apache.hadoop.mapreduce.Mapper.Context)} method to exert
greater control on map processing e.g. multi-threaded <code>Mapper</code>s
etc.</p>
@see InputFormat
@see JobContext
@see Partitioner
@see Reducer]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.Mapper -->
<!-- start class org.apache.hadoop.mapreduce.MarkableIterator -->
<class name="MarkableIterator" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapreduce.MarkableIteratorInterface"/>
<constructor name="MarkableIterator" type="java.util.Iterator"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Create a new iterator layered on the input iterator
@param itr underlying iterator that implements MarkableIteratorInterface]]>
</doc>
</constructor>
<method name="mark"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="reset"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="clearMark"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="hasNext" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="next" return="VALUE"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="remove"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<doc>
<![CDATA[<code>MarkableIterator</code> is a wrapper iterator class that
implements the {@link MarkableIteratorInterface}.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.MarkableIterator -->
<!-- start class org.apache.hadoop.mapreduce.OutputCommitter -->
<class name="OutputCommitter" extends="java.lang.Object"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="OutputCommitter"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="setupJob"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobContext" type="org.apache.hadoop.mapreduce.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[For the framework to setup the job output during initialization. This is
called from the application master process for the entire job. This will be
called multiple times, once per job attempt.
@param jobContext Context of the job whose output is being written.
@throws IOException if temporary output could not be created]]>
</doc>
</method>
<method name="cleanupJob"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="Use {@link #commitJob(JobContext)} and
{@link #abortJob(JobContext, JobStatus.State)} instead.">
<param name="jobContext" type="org.apache.hadoop.mapreduce.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[For cleaning up the job's output after job completion. This is called
from the application master process for the entire job. This may be called
multiple times.
@param jobContext Context of the job whose output is being written.
@throws IOException
@deprecated Use {@link #commitJob(JobContext)} and
{@link #abortJob(JobContext, JobStatus.State)} instead.]]>
</doc>
</method>
<method name="commitJob"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobContext" type="org.apache.hadoop.mapreduce.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[For committing job's output after successful job completion. Note that this
is invoked for jobs with final runstate as SUCCESSFUL. This is called
from the application master process for the entire job. This is guaranteed
to only be called once. If it throws an exception the entire job will
fail.
@param jobContext Context of the job whose output is being written.
@throws IOException]]>
</doc>
</method>
<method name="abortJob"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobContext" type="org.apache.hadoop.mapreduce.JobContext"/>
<param name="state" type="org.apache.hadoop.mapreduce.JobStatus.State"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[For aborting an unsuccessful job's output. Note that this is invoked for
jobs with final runstate as {@link JobStatus.State#FAILED} or
{@link JobStatus.State#KILLED}. This is called from the application
master process for the entire job. This may be called multiple times.
@param jobContext Context of the job whose output is being written.
@param state final runstate of the job
@throws IOException]]>
</doc>
</method>
<method name="setupTask"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Sets up output for the task. This is called from each individual task's
process that will output to HDFS, and it is called just for that task. This
may be called multiple times for the same task, but for different task
attempts.
@param taskContext Context of the task whose output is being written.
@throws IOException]]>
</doc>
</method>
<method name="needsTaskCommit" return="boolean"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Check whether task needs a commit. This is called from each individual
task's process that will output to HDFS, and it is called just for that
task.
@param taskContext
@return true/false
@throws IOException]]>
</doc>
</method>
<method name="commitTask"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[To promote the task's temporary output to final output location.
If {@link #needsTaskCommit(TaskAttemptContext)} returns true and this
task is the task that the AM determines finished first, this method
is called to commit an individual task's output. This is to mark
that tasks output as complete, as {@link #commitJob(JobContext)} will
also be called later on if the entire job finished successfully. This
is called from a task's process. This may be called multiple times for the
same task, but different task attempts. It should be very rare for this to
be called multiple times and requires odd networking failures to make this
happen. In the future the Hadoop framework may eliminate this race.
@param taskContext Context of the task whose output is being written.
@throws IOException if commit is not successful.]]>
</doc>
</method>
<method name="abortTask"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Discard the task output. This is called from a task's process to clean
up a single task's output that can not yet been committed. This may be
called multiple times for the same task, but for different task attempts.
@param taskContext
@throws IOException]]>
</doc>
</method>
<method name="isRecoverySupported" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="Use {@link #isRecoverySupported(JobContext)} instead.">
<doc>
<![CDATA[Is task output recovery supported for restarting jobs?
If task output recovery is supported, job restart can be done more
efficiently.
@return <code>true</code> if task output recovery is supported,
<code>false</code> otherwise
@see #recoverTask(TaskAttemptContext)
@deprecated Use {@link #isRecoverySupported(JobContext)} instead.]]>
</doc>
</method>
<method name="isCommitJobRepeatable" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobContext" type="org.apache.hadoop.mapreduce.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Returns true if an in-progress job commit can be retried. If the MR AM is
re-run then it will check this value to determine if it can retry an
in-progress commit that was started by a previous version.
Note that in rare scenarios, the previous AM version might still be running
at that time, due to system anomalies. Hence if this method returns true
then the retry commit operation should be able to run concurrently with
the previous operation.
If repeatable job commit is supported, job restart can tolerate previous
AM failures during job commit.
By default, it is not supported. Extended classes (like:
FileOutputCommitter) should explicitly override it if provide support.
@param jobContext
Context of the job whose output is being written.
@return <code>true</code> repeatable job commit is supported,
<code>false</code> otherwise
@throws IOException]]>
</doc>
</method>
<method name="isRecoverySupported" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobContext" type="org.apache.hadoop.mapreduce.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Is task output recovery supported for restarting jobs?
If task output recovery is supported, job restart can be done more
efficiently.
@param jobContext
Context of the job whose output is being written.
@return <code>true</code> if task output recovery is supported,
<code>false</code> otherwise
@throws IOException
@see #recoverTask(TaskAttemptContext)]]>
</doc>
</method>
<method name="recoverTask"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Recover the task output.
The retry-count for the job will be passed via the
{@link MRJobConfig#APPLICATION_ATTEMPT_ID} key in
{@link TaskAttemptContext#getConfiguration()} for the
<code>OutputCommitter</code>. This is called from the application master
process, but it is called individually for each task.
If an exception is thrown the task will be attempted again.
This may be called multiple times for the same task. But from different
application attempts.
@param taskContext Context of the task whose output is being recovered
@throws IOException]]>
</doc>
</method>
<doc>
<![CDATA[<code>OutputCommitter</code> describes the commit of task output for a
Map-Reduce job.
<p>The Map-Reduce framework relies on the <code>OutputCommitter</code> of
the job to:<p>
<ol>
<li>
Setup the job during initialization. For example, create the temporary
output directory for the job during the initialization of the job.
</li>
<li>
Cleanup the job after the job completion. For example, remove the
temporary output directory after the job completion.
</li>
<li>
Setup the task temporary output.
</li>
<li>
Check whether a task needs a commit. This is to avoid the commit
procedure if a task does not need commit.
</li>
<li>
Commit of the task output.
</li>
<li>
Discard the task commit.
</li>
</ol>
The methods in this class can be called from several different processes and
from several different contexts. It is important to know which process and
which context each is called from. Each method should be marked accordingly
in its documentation. It is also important to note that not all methods are
guaranteed to be called once and only once. If a method is not guaranteed to
have this property the output committer needs to handle this appropriately.
Also note it will only be in rare situations where they may be called
multiple times for the same task.
@see org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
@see JobContext
@see TaskAttemptContext]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.OutputCommitter -->
<!-- start class org.apache.hadoop.mapreduce.OutputFormat -->
<class name="OutputFormat" extends="java.lang.Object"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="OutputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getRecordWriter" return="org.apache.hadoop.mapreduce.RecordWriter"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Get the {@link RecordWriter} for the given task.
@param context the information about the current task.
@return a {@link RecordWriter} to write the output for the job.
@throws IOException]]>
</doc>
</method>
<method name="checkOutputSpecs"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Check for validity of the output-specification for the job.
<p>This is to validate the output specification for the job when it is
a job is submitted. Typically checks that it does not already exist,
throwing an exception when it already exists, so that output is not
overwritten.</p>
@param context information about the job
@throws IOException when output should not be attempted]]>
</doc>
</method>
<method name="getOutputCommitter" return="org.apache.hadoop.mapreduce.OutputCommitter"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Get the output committer for this output format. This is responsible
for ensuring the output is committed correctly.
@param context the task context
@return an output committer
@throws IOException
@throws InterruptedException]]>
</doc>
</method>
<doc>
<![CDATA[<code>OutputFormat</code> describes the output-specification for a
Map-Reduce job.
<p>The Map-Reduce framework relies on the <code>OutputFormat</code> of the
job to:<p>
<ol>
<li>
Validate the output-specification of the job. For e.g. check that the
output directory doesn't already exist.
<li>
Provide the {@link RecordWriter} implementation to be used to write out
the output files of the job. Output files are stored in a
{@link FileSystem}.
</li>
</ol>
@see RecordWriter]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.OutputFormat -->
<!-- start class org.apache.hadoop.mapreduce.Partitioner -->
<class name="Partitioner" extends="java.lang.Object"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="Partitioner"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getPartition" return="int"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="KEY"/>
<param name="value" type="VALUE"/>
<param name="numPartitions" type="int"/>
<doc>
<![CDATA[Get the partition number for a given key (hence record) given the total
number of partitions i.e. number of reduce-tasks for the job.
<p>Typically a hash function on a all or a subset of the key.</p>
@param key the key to be partioned.
@param value the entry value.
@param numPartitions the total number of partitions.
@return the partition number for the <code>key</code>.]]>
</doc>
</method>
<doc>
<![CDATA[Partitions the key space.
<p><code>Partitioner</code> controls the partitioning of the keys of the
intermediate map-outputs. The key (or a subset of the key) is used to derive
the partition, typically by a hash function. The total number of partitions
is the same as the number of reduce tasks for the job. Hence this controls
which of the <code>m</code> reduce tasks the intermediate key (and hence the
record) is sent for reduction.</p>
<p>Note: A <code>Partitioner</code> is created only when there are multiple
reducers.</p>
<p>Note: If you require your Partitioner class to obtain the Job's
configuration object, implement the {@link Configurable} interface.</p>
@see Reducer]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.Partitioner -->
<!-- start class org.apache.hadoop.mapreduce.QueueAclsInfo -->
<class name="QueueAclsInfo" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.io.Writable"/>
<constructor name="QueueAclsInfo"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Default constructor for QueueAclsInfo.]]>
</doc>
</constructor>
<constructor name="QueueAclsInfo" type="java.lang.String, java.lang.String[]"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Construct a new QueueAclsInfo object using the queue name and the
queue operations array
@param queueName Name of the job queue
@param operations]]>
</doc>
</constructor>
<method name="getQueueName" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get queue name.
@return name]]>
</doc>
</method>
<method name="setQueueName"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="queueName" type="java.lang.String"/>
</method>
<method name="getOperations" return="java.lang.String[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get opearations allowed on queue.
@return array of String]]>
</doc>
</method>
<method name="readFields"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type="java.io.DataInput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="write"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="out" type="java.io.DataOutput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[Class to encapsulate Queue ACLs for a particular
user.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.QueueAclsInfo -->
<!-- start class org.apache.hadoop.mapreduce.QueueInfo -->
<class name="QueueInfo" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.io.Writable"/>
<constructor name="QueueInfo"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Default constructor for QueueInfo.]]>
</doc>
</constructor>
<constructor name="QueueInfo" type="java.lang.String, java.lang.String"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Construct a new QueueInfo object using the queue name and the
scheduling information passed.
@param queueName Name of the job queue
@param schedulingInfo Scheduling Information associated with the job
queue]]>
</doc>
</constructor>
<constructor name="QueueInfo" type="java.lang.String, java.lang.String, org.apache.hadoop.mapreduce.QueueState, org.apache.hadoop.mapreduce.JobStatus[]"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@param queueName
@param schedulingInfo
@param state
@param stats]]>
</doc>
</constructor>
<method name="setQueueName"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="queueName" type="java.lang.String"/>
<doc>
<![CDATA[Set the queue name of the JobQueueInfo
@param queueName Name of the job queue.]]>
</doc>
</method>
<method name="getQueueName" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the queue name from JobQueueInfo
@return queue name]]>
</doc>
</method>
<method name="setSchedulingInfo"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="schedulingInfo" type="java.lang.String"/>
<doc>
<![CDATA[Set the scheduling information associated to particular job queue
@param schedulingInfo]]>
</doc>
</method>
<method name="getSchedulingInfo" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Gets the scheduling information associated to particular job queue.
If nothing is set would return <b>"N/A"</b>
@return Scheduling information associated to particular Job Queue]]>
</doc>
</method>
<method name="setState"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="state" type="org.apache.hadoop.mapreduce.QueueState"/>
<doc>
<![CDATA[Set the state of the queue
@param state state of the queue.]]>
</doc>
</method>
<method name="getState" return="org.apache.hadoop.mapreduce.QueueState"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Return the queue state
@return the queue state.]]>
</doc>
</method>
<method name="setJobStatuses"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="stats" type="org.apache.hadoop.mapreduce.JobStatus[]"/>
</method>
<method name="getQueueChildren" return="java.util.List"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get immediate children.
@return list of QueueInfo]]>
</doc>
</method>
<method name="setQueueChildren"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="children" type="java.util.List"/>
</method>
<method name="getProperties" return="java.util.Properties"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get properties.
@return Properties]]>
</doc>
</method>
<method name="setProperties"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="props" type="java.util.Properties"/>
</method>
<method name="getJobStatuses" return="org.apache.hadoop.mapreduce.JobStatus[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the jobs submitted to queue
@return list of JobStatus for the submitted jobs]]>
</doc>
</method>
<method name="readFields"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type="java.io.DataInput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="write"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="out" type="java.io.DataOutput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[Class that contains the information regarding the Job Queues which are
maintained by the Hadoop Map/Reduce framework.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.QueueInfo -->
<!-- start class org.apache.hadoop.mapreduce.QueueState -->
<class name="QueueState" extends="java.lang.Enum"
abstract="false"
static="false" final="true" visibility="public"
deprecated="not deprecated">
<method name="values" return="org.apache.hadoop.mapreduce.QueueState[]"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="valueOf" return="org.apache.hadoop.mapreduce.QueueState"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="name" type="java.lang.String"/>
</method>
<method name="getStateName" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the stateName]]>
</doc>
</method>
<method name="getState" return="org.apache.hadoop.mapreduce.QueueState"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="state" type="java.lang.String"/>
</method>
<method name="toString" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<doc>
<![CDATA[Enum representing queue state]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.QueueState -->
<!-- start class org.apache.hadoop.mapreduce.RecordReader -->
<class name="RecordReader" extends="java.lang.Object"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="java.io.Closeable"/>
<constructor name="RecordReader"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="initialize"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Called once at initialization.
@param split the split that defines the range of records to read
@param context the information about the task
@throws IOException
@throws InterruptedException]]>
</doc>
</method>
<method name="nextKeyValue" return="boolean"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Read the next key, value pair.
@return true if a key/value pair was read
@throws IOException
@throws InterruptedException]]>
</doc>
</method>
<method name="getCurrentKey" return="KEYIN"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Get the current key
@return the current key or null if there is no current key
@throws IOException
@throws InterruptedException]]>
</doc>
</method>
<method name="getCurrentValue" return="VALUEIN"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Get the current value.
@return the object that was read
@throws IOException
@throws InterruptedException]]>
</doc>
</method>
<method name="getProgress" return="float"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[The current progress of the record reader through its data.
@return a number between 0.0 and 1.0 that is the fraction of the data read
@throws IOException
@throws InterruptedException]]>
</doc>
</method>
<method name="close"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Close the record reader.]]>
</doc>
</method>
<doc>
<![CDATA[The record reader breaks the data into key/value pairs for input to the
{@link Mapper}.
@param <KEYIN>
@param <VALUEIN>]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.RecordReader -->
<!-- start class org.apache.hadoop.mapreduce.RecordWriter -->
<class name="RecordWriter" extends="java.lang.Object"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="RecordWriter"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="write"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="K"/>
<param name="value" type="V"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Writes a key/value pair.
@param key the key to write.
@param value the value to write.
@throws IOException]]>
</doc>
</method>
<method name="close"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Close this <code>RecordWriter</code> to future operations.
@param context the context of the task
@throws IOException]]>
</doc>
</method>
<doc>
<![CDATA[<code>RecordWriter</code> writes the output &lt;key, value&gt; pairs
to an output file.
<p><code>RecordWriter</code> implementations write the job outputs to the
{@link FileSystem}.
@see OutputFormat]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.RecordWriter -->
<!-- start interface org.apache.hadoop.mapreduce.ReduceContext -->
<interface name="ReduceContext" abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapreduce.TaskInputOutputContext"/>
<method name="nextKey" return="boolean"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Start processing next unique key.]]>
</doc>
</method>
<method name="getValues" return="java.lang.Iterable"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Iterate through the values for the current key, reusing the same value
object, which is stored in the context.
@return the series of values associated with the current key. All of the
objects returned directly and indirectly from this method are reused.]]>
</doc>
</method>
<doc>
<![CDATA[The context passed to the {@link Reducer}.
@param <KEYIN> the class of the input keys
@param <VALUEIN> the class of the input values
@param <KEYOUT> the class of the output keys
@param <VALUEOUT> the class of the output values]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapreduce.ReduceContext -->
<!-- start class org.apache.hadoop.mapreduce.Reducer -->
<class name="Reducer" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="Reducer"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="setup"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Called once at the start of the task.]]>
</doc>
</method>
<method name="reduce"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="key" type="KEYIN"/>
<param name="values" type="java.lang.Iterable"/>
<param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[This method is called once for each key. Most applications will define
their reduce class by overriding this method. The default implementation
is an identity function.]]>
</doc>
</method>
<method name="cleanup"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Called once at the end of the task.]]>
</doc>
</method>
<method name="run"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Advanced application writers can use the
{@link #run(org.apache.hadoop.mapreduce.Reducer.Context)} method to
control how the reduce task works.]]>
</doc>
</method>
<doc>
<![CDATA[Reduces a set of intermediate values which share a key to a smaller set of
values.
<p><code>Reducer</code> implementations
can access the {@link Configuration} for the job via the
{@link JobContext#getConfiguration()} method.</p>
<p><code>Reducer</code> has 3 primary phases:</p>
<ol>
<li>
<b id="Shuffle">Shuffle</b>
<p>The <code>Reducer</code> copies the sorted output from each
{@link Mapper} using HTTP across the network.</p>
</li>
<li>
<b id="Sort">Sort</b>
<p>The framework merge sorts <code>Reducer</code> inputs by
<code>key</code>s
(since different <code>Mapper</code>s may have output the same key).</p>
<p>The shuffle and sort phases occur simultaneously i.e. while outputs are
being fetched they are merged.</p>
<b id="SecondarySort">SecondarySort</b>
<p>To achieve a secondary sort on the values returned by the value
iterator, the application should extend the key with the secondary
key and define a grouping comparator. The keys will be sorted using the
entire key, but will be grouped using the grouping comparator to decide
which keys and values are sent in the same call to reduce.The grouping
comparator is specified via
{@link Job#setGroupingComparatorClass(Class)}. The sort order is
controlled by
{@link Job#setSortComparatorClass(Class)}.</p>
For example, say that you want to find duplicate web pages and tag them
all with the url of the "best" known example. You would set up the job
like:
<ul>
<li>Map Input Key: url</li>
<li>Map Input Value: document</li>
<li>Map Output Key: document checksum, url pagerank</li>
<li>Map Output Value: url</li>
<li>Partitioner: by checksum</li>
<li>OutputKeyComparator: by checksum and then decreasing pagerank</li>
<li>OutputValueGroupingComparator: by checksum</li>
</ul>
</li>
<li>
<b id="Reduce">Reduce</b>
<p>In this phase the
{@link #reduce(Object, Iterable, org.apache.hadoop.mapreduce.Reducer.Context)}
method is called for each <code>&lt;key, (collection of values)&gt;</code> in
the sorted inputs.</p>
<p>The output of the reduce task is typically written to a
{@link RecordWriter} via
{@link Context#write(Object, Object)}.</p>
</li>
</ol>
<p>The output of the <code>Reducer</code> is <b>not re-sorted</b>.</p>
<p>Example:</p>
<p><blockquote><pre>
public class IntSumReducer&lt;Key&gt; extends Reducer&lt;Key,IntWritable,
Key,IntWritable&gt; {
private IntWritable result = new IntWritable();
public void reduce(Key key, Iterable&lt;IntWritable&gt; values,
Context context) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
result.set(sum);
context.write(key, result);
}
}
</pre></blockquote>
@see Mapper
@see Partitioner]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.Reducer -->
<!-- start interface org.apache.hadoop.mapreduce.TaskAttemptContext -->
<interface name="TaskAttemptContext" abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapreduce.JobContext"/>
<implements name="org.apache.hadoop.util.Progressable"/>
<method name="getTaskAttemptID" return="org.apache.hadoop.mapreduce.TaskAttemptID"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the unique name for this task attempt.]]>
</doc>
</method>
<method name="setStatus"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="msg" type="java.lang.String"/>
<doc>
<![CDATA[Set the current status of the task to the given string.]]>
</doc>
</method>
<method name="getStatus" return="java.lang.String"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the last set status message.
@return the current status message]]>
</doc>
</method>
<method name="getProgress" return="float"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[The current progress of the task attempt.
@return a number between 0.0 and 1.0 (inclusive) indicating the attempt's
progress.]]>
</doc>
</method>
<method name="getCounter" return="org.apache.hadoop.mapreduce.Counter"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="counterName" type="java.lang.Enum"/>
<doc>
<![CDATA[Get the {@link Counter} for the given <code>counterName</code>.
@param counterName counter name
@return the <code>Counter</code> for the given <code>counterName</code>]]>
</doc>
</method>
<method name="getCounter" return="org.apache.hadoop.mapreduce.Counter"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="groupName" type="java.lang.String"/>
<param name="counterName" type="java.lang.String"/>
<doc>
<![CDATA[Get the {@link Counter} for the given <code>groupName</code> and
<code>counterName</code>.
@param counterName counter name
@return the <code>Counter</code> for the given <code>groupName</code> and
<code>counterName</code>]]>
</doc>
</method>
<doc>
<![CDATA[The context for task attempts.]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapreduce.TaskAttemptContext -->
<!-- start class org.apache.hadoop.mapreduce.TaskAttemptID -->
<class name="TaskAttemptID" extends="org.apache.hadoop.mapred.ID"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="TaskAttemptID" type="org.apache.hadoop.mapreduce.TaskID, int"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Constructs a TaskAttemptID object from given {@link TaskID}.
@param taskId TaskID that this task belongs to
@param id the task attempt number]]>
</doc>
</constructor>
<constructor name="TaskAttemptID" type="java.lang.String, int, org.apache.hadoop.mapreduce.TaskType, int, int"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Constructs a TaskId object from given parts.
@param jtIdentifier jobTracker identifier
@param jobId job number
@param type the TaskType
@param taskId taskId number
@param id the task attempt number]]>
</doc>
</constructor>
<constructor name="TaskAttemptID" type="java.lang.String, int, boolean, int, int"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Constructs a TaskId object from given parts.
@param jtIdentifier jobTracker identifier
@param jobId job number
@param isMap whether the tip is a map
@param taskId taskId number
@param id the task attempt number]]>
</doc>
</constructor>
<constructor name="TaskAttemptID"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getJobID" return="org.apache.hadoop.mapreduce.JobID"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns the {@link JobID} object that this task attempt belongs to]]>
</doc>
</method>
<method name="getTaskID" return="org.apache.hadoop.mapreduce.TaskID"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns the {@link TaskID} object that this task attempt belongs to]]>
</doc>
</method>
<method name="isMap" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns whether this TaskID is a map ID]]>
</doc>
</method>
<method name="getTaskType" return="org.apache.hadoop.mapreduce.TaskType"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns the TaskType of the TaskAttemptID]]>
</doc>
</method>
<method name="equals" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="o" type="java.lang.Object"/>
</method>
<method name="appendTo" return="java.lang.StringBuilder"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="builder" type="java.lang.StringBuilder"/>
<doc>
<![CDATA[Add the unique string to the StringBuilder
@param builder the builder to append ot
@return the builder that was passed in.]]>
</doc>
</method>
<method name="readFields"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type="java.io.DataInput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="write"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="out" type="java.io.DataOutput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="hashCode" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="compareTo" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="o" type="org.apache.hadoop.mapreduce.ID"/>
<doc>
<![CDATA[Compare TaskIds by first tipIds, then by task numbers.]]>
</doc>
</method>
<method name="toString" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="forName" return="org.apache.hadoop.mapreduce.TaskAttemptID"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="str" type="java.lang.String"/>
<exception name="IllegalArgumentException" type="java.lang.IllegalArgumentException"/>
<doc>
<![CDATA[Construct a TaskAttemptID object from given string
@return constructed TaskAttemptID object or null if the given String is null
@throws IllegalArgumentException if the given string is malformed]]>
</doc>
</method>
<field name="ATTEMPT" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="protected"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[TaskAttemptID represents the immutable and unique identifier for
a task attempt. Each task attempt is one particular instance of a Map or
Reduce Task identified by its TaskID.
TaskAttemptID consists of 2 parts. First part is the
{@link TaskID}, that this TaskAttemptID belongs to.
Second part is the task attempt number. <br>
An example TaskAttemptID is :
<code>attempt_200707121733_0003_m_000005_0</code> , which represents the
zeroth task attempt for the fifth map task in the third job
running at the jobtracker started at <code>200707121733</code>.
<p>
Applications should never construct or parse TaskAttemptID strings
, but rather use appropriate constructors or {@link #forName(String)}
method.
@see JobID
@see TaskID]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.TaskAttemptID -->
<!-- start class org.apache.hadoop.mapreduce.TaskCompletionEvent -->
<class name="TaskCompletionEvent" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.io.Writable"/>
<constructor name="TaskCompletionEvent"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Default constructor for Writable.]]>
</doc>
</constructor>
<constructor name="TaskCompletionEvent" type="int, org.apache.hadoop.mapreduce.TaskAttemptID, int, boolean, org.apache.hadoop.mapreduce.TaskCompletionEvent.Status, java.lang.String"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Constructor. eventId should be created externally and incremented
per event for each job.
@param eventId event id, event id should be unique and assigned in
incrementally, starting from 0.
@param taskId task id
@param status task's status
@param taskTrackerHttp task tracker's host:port for http.]]>
</doc>
</constructor>
<method name="getEventId" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns event Id.
@return event id]]>
</doc>
</method>
<method name="getTaskAttemptId" return="org.apache.hadoop.mapreduce.TaskAttemptID"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns task id.
@return task id]]>
</doc>
</method>
<method name="getStatus" return="org.apache.hadoop.mapreduce.TaskCompletionEvent.Status"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns {@link Status}
@return task completion status]]>
</doc>
</method>
<method name="getTaskTrackerHttp" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[http location of the tasktracker where this task ran.
@return http location of tasktracker user logs]]>
</doc>
</method>
<method name="getTaskRunTime" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns time (in millisec) the task took to complete.]]>
</doc>
</method>
<method name="setTaskRunTime"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="taskCompletionTime" type="int"/>
<doc>
<![CDATA[Set the task completion time
@param taskCompletionTime time (in millisec) the task took to complete]]>
</doc>
</method>
<method name="setEventId"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="eventId" type="int"/>
<doc>
<![CDATA[set event Id. should be assigned incrementally starting from 0.
@param eventId]]>
</doc>
</method>
<method name="setTaskAttemptId"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="taskId" type="org.apache.hadoop.mapreduce.TaskAttemptID"/>
<doc>
<![CDATA[Sets task id.
@param taskId]]>
</doc>
</method>
<method name="setTaskStatus"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="status" type="org.apache.hadoop.mapreduce.TaskCompletionEvent.Status"/>
<doc>
<![CDATA[Set task status.
@param status]]>
</doc>
</method>
<method name="setTaskTrackerHttp"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="taskTrackerHttp" type="java.lang.String"/>
<doc>
<![CDATA[Set task tracker http location.
@param taskTrackerHttp]]>
</doc>
</method>
<method name="toString" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="equals" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="o" type="java.lang.Object"/>
</method>
<method name="hashCode" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="isMapTask" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="idWithinJob" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="write"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="out" type="java.io.DataOutput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="readFields"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type="java.io.DataInput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<field name="EMPTY_ARRAY" type="org.apache.hadoop.mapreduce.TaskCompletionEvent[]"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[This is used to track task completion events on
job tracker.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.TaskCompletionEvent -->
<!-- start class org.apache.hadoop.mapreduce.TaskCompletionEvent.Status -->
<class name="TaskCompletionEvent.Status" extends="java.lang.Enum"
abstract="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<method name="values" return="org.apache.hadoop.mapreduce.TaskCompletionEvent.Status[]"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="valueOf" return="org.apache.hadoop.mapreduce.TaskCompletionEvent.Status"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="name" type="java.lang.String"/>
</method>
</class>
<!-- end class org.apache.hadoop.mapreduce.TaskCompletionEvent.Status -->
<!-- start class org.apache.hadoop.mapreduce.TaskCounter -->
<class name="TaskCounter" extends="java.lang.Enum"
abstract="false"
static="false" final="true" visibility="public"
deprecated="not deprecated">
<method name="values" return="org.apache.hadoop.mapreduce.TaskCounter[]"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="valueOf" return="org.apache.hadoop.mapreduce.TaskCounter"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="name" type="java.lang.String"/>
</method>
</class>
<!-- end class org.apache.hadoop.mapreduce.TaskCounter -->
<!-- start class org.apache.hadoop.mapreduce.TaskID -->
<class name="TaskID" extends="org.apache.hadoop.mapred.ID"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="TaskID" type="org.apache.hadoop.mapreduce.JobID, org.apache.hadoop.mapreduce.TaskType, int"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Constructs a TaskID object from given {@link JobID}.
@param jobId JobID that this tip belongs to
@param type the {@link TaskType} of the task
@param id the tip number]]>
</doc>
</constructor>
<constructor name="TaskID" type="java.lang.String, int, org.apache.hadoop.mapreduce.TaskType, int"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Constructs a TaskInProgressId object from given parts.
@param jtIdentifier jobTracker identifier
@param jobId job number
@param type the TaskType
@param id the tip number]]>
</doc>
</constructor>
<constructor name="TaskID" type="org.apache.hadoop.mapreduce.JobID, boolean, int"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Constructs a TaskID object from given {@link JobID}.
@param jobId JobID that this tip belongs to
@param isMap whether the tip is a map
@param id the tip number]]>
</doc>
</constructor>
<constructor name="TaskID" type="java.lang.String, int, boolean, int"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Constructs a TaskInProgressId object from given parts.
@param jtIdentifier jobTracker identifier
@param jobId job number
@param isMap whether the tip is a map
@param id the tip number]]>
</doc>
</constructor>
<constructor name="TaskID"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Default constructor for Writable. Sets the task type to
{@link TaskType#REDUCE}, the ID to 0, and the job ID to an empty job ID.]]>
</doc>
</constructor>
<method name="getJobID" return="org.apache.hadoop.mapreduce.JobID"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns the {@link JobID} object that this tip belongs to.
@return the JobID object]]>
</doc>
</method>
<method name="isMap" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns whether this TaskID is a map ID.
@return whether this TaskID is a map ID]]>
</doc>
</method>
<method name="getTaskType" return="org.apache.hadoop.mapreduce.TaskType"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the type of the task.
@return the type of the task]]>
</doc>
</method>
<method name="equals" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="o" type="java.lang.Object"/>
</method>
<method name="compareTo" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="o" type="org.apache.hadoop.mapreduce.ID"/>
<doc>
<![CDATA[Compare TaskInProgressIds by first jobIds, then by tip numbers.
Reducers are defined as greater than mappers.
@param o the TaskID against which to compare
@return 0 if equal, positive if this TaskID is greater, and negative if
this TaskID is less]]>
</doc>
</method>
<method name="toString" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="appendTo" return="java.lang.StringBuilder"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="builder" type="java.lang.StringBuilder"/>
<doc>
<![CDATA[Add the unique string to the given builder.
@param builder the builder to append to
@return the builder that was passed in]]>
</doc>
</method>
<method name="hashCode" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="readFields"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type="java.io.DataInput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="write"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="out" type="java.io.DataOutput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="forName" return="org.apache.hadoop.mapreduce.TaskID"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="str" type="java.lang.String"/>
<exception name="IllegalArgumentException" type="java.lang.IllegalArgumentException"/>
<doc>
<![CDATA[Construct a TaskID object from given string.
@param str the target string
@return constructed TaskID object or null if the given String is null
@throws IllegalArgumentException if the given string is malformed]]>
</doc>
</method>
<method name="getRepresentingCharacter" return="char"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="type" type="org.apache.hadoop.mapreduce.TaskType"/>
<doc>
<![CDATA[Gets the character representing the {@link TaskType}.
@param type the TaskType
@return the character]]>
</doc>
</method>
<method name="getTaskType" return="org.apache.hadoop.mapreduce.TaskType"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="c" type="char"/>
<doc>
<![CDATA[Gets the {@link TaskType} corresponding to the character.
@param c the character
@return the TaskType]]>
</doc>
</method>
<method name="getAllTaskTypes" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns a string of characters describing all possible {@link TaskType}
values
@return a string of all task type characters]]>
</doc>
</method>
<field name="TASK" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="protected"
deprecated="not deprecated">
</field>
<field name="idFormat" type="java.text.NumberFormat"
transient="false" volatile="false"
static="true" final="true" visibility="protected"
deprecated="not deprecated">
</field>
<field name="TASK_ID_REGEX" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="taskIdPattern" type="java.util.regex.Pattern"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[TaskID represents the immutable and unique identifier for
a Map or Reduce Task. Each TaskID encompasses multiple attempts made to
execute the Map or Reduce Task, each of which are uniquely indentified by
their TaskAttemptID.
TaskID consists of 3 parts. First part is the {@link JobID}, that this
TaskInProgress belongs to. Second part of the TaskID is either 'm' or 'r'
representing whether the task is a map task or a reduce task.
And the third part is the task number. <br>
An example TaskID is :
<code>task_200707121733_0003_m_000005</code> , which represents the
fifth map task in the third job running at the jobtracker
started at <code>200707121733</code>.
<p>
Applications should never construct or parse TaskID strings
, but rather use appropriate constructors or {@link #forName(String)}
method.
@see JobID
@see TaskAttemptID]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.TaskID -->
<!-- start interface org.apache.hadoop.mapreduce.TaskInputOutputContext -->
<interface name="TaskInputOutputContext" abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<method name="nextKeyValue" return="boolean"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Advance to the next key, value pair, returning null if at end.
@return the key object that was read into, or null if no more]]>
</doc>
</method>
<method name="getCurrentKey" return="KEYIN"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Get the current key.
@return the current key object or null if there isn't one
@throws IOException
@throws InterruptedException]]>
</doc>
</method>
<method name="getCurrentValue" return="VALUEIN"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Get the current value.
@return the value object that was read into
@throws IOException
@throws InterruptedException]]>
</doc>
</method>
<method name="write"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="KEYOUT"/>
<param name="value" type="VALUEOUT"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Generate an output key/value pair.]]>
</doc>
</method>
<method name="getOutputCommitter" return="org.apache.hadoop.mapreduce.OutputCommitter"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the {@link OutputCommitter} for the task-attempt.
@return the <code>OutputCommitter</code> for the task-attempt]]>
</doc>
</method>
<doc>
<![CDATA[A context object that allows input and output from the task. It is only
supplied to the {@link Mapper} or {@link Reducer}.
@param <KEYIN> the input key type for the task
@param <VALUEIN> the input value type for the task
@param <KEYOUT> the output key type for the task
@param <VALUEOUT> the output value type for the task]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapreduce.TaskInputOutputContext -->
<!-- start class org.apache.hadoop.mapreduce.TaskTrackerInfo -->
<class name="TaskTrackerInfo" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.io.Writable"/>
<constructor name="TaskTrackerInfo"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<constructor name="TaskTrackerInfo" type="java.lang.String"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<constructor name="TaskTrackerInfo" type="java.lang.String, java.lang.String, java.lang.String"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getTaskTrackerName" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Gets the tasktracker's name.
@return tracker's name.]]>
</doc>
</method>
<method name="isBlacklisted" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Whether tracker is blacklisted
@return true if tracker is blacklisted
false otherwise]]>
</doc>
</method>
<method name="getReasonForBlacklist" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Gets the reason for which the tasktracker was blacklisted.
@return reason which tracker was blacklisted]]>
</doc>
</method>
<method name="getBlacklistReport" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Gets a descriptive report about why the tasktracker was blacklisted.
@return report describing why the tasktracker was blacklisted.]]>
</doc>
</method>
<method name="readFields"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type="java.io.DataInput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="write"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="out" type="java.io.DataOutput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[Information about TaskTracker.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.TaskTrackerInfo -->
<!-- start class org.apache.hadoop.mapreduce.TaskType -->
<class name="TaskType" extends="java.lang.Enum"
abstract="false"
static="false" final="true" visibility="public"
deprecated="not deprecated">
<method name="values" return="org.apache.hadoop.mapreduce.TaskType[]"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="valueOf" return="org.apache.hadoop.mapreduce.TaskType"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="name" type="java.lang.String"/>
</method>
<doc>
<![CDATA[Enum for map, reduce, job-setup, job-cleanup, task-cleanup task types.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.TaskType -->
</package>
<package name="org.apache.hadoop.mapreduce.checkpoint">
</package>
<package name="org.apache.hadoop.mapreduce.counters">
<!-- start class org.apache.hadoop.mapreduce.counters.AbstractCounters -->
<class name="AbstractCounters" extends="java.lang.Object"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.io.Writable"/>
<implements name="java.lang.Iterable"/>
<constructor name="AbstractCounters" type="org.apache.hadoop.mapreduce.counters.CounterGroupFactory"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<constructor name="AbstractCounters" type="org.apache.hadoop.mapreduce.counters.AbstractCounters, org.apache.hadoop.mapreduce.counters.CounterGroupFactory"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Construct from another counters object.
@param <C1> type of the other counter
@param <G1> type of the other counter group
@param counters the counters object to copy
@param groupFactory the factory for new groups]]>
</doc>
</constructor>
<method name="findCounter" return="C"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="groupName" type="java.lang.String"/>
<param name="counterName" type="java.lang.String"/>
<doc>
<![CDATA[Find a counter, create one if necessary
@param groupName of the counter
@param counterName name of the counter
@return the matching counter]]>
</doc>
</method>
<method name="findCounter" return="C"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Enum"/>
<doc>
<![CDATA[Find the counter for the given enum. The same enum will always return the
same counter.
@param key the counter key
@return the matching counter object]]>
</doc>
</method>
<method name="getGroupNames" return="java.lang.Iterable"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns the names of all counter classes.
@return Set of counter names.]]>
</doc>
</method>
<method name="iterator" return="java.util.Iterator"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getGroup" return="G"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="groupName" type="java.lang.String"/>
<doc>
<![CDATA[Returns the named counter group, or an empty group if there is none
with the specified name.
@param groupName name of the group
@return the group]]>
</doc>
</method>
<method name="countCounters" return="int"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns the total number of counters, by summing the number of counters
in each group.
@return the total number of counters]]>
</doc>
</method>
<method name="write"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="out" type="java.io.DataOutput"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Write the set of groups.
Counters ::= version #fgroups (groupId, group)* #groups (group)*]]>
</doc>
</method>
<method name="readFields"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type="java.io.DataInput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="toString" return="java.lang.String"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Return textual representation of the counter values.
@return the string]]>
</doc>
</method>
<method name="incrAllCounters"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="other" type="org.apache.hadoop.mapreduce.counters.AbstractCounters"/>
<doc>
<![CDATA[Increments multiple counters by their amounts in another Counters
instance.
@param other the other Counters instance]]>
</doc>
</method>
<method name="equals" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="genericRight" type="java.lang.Object"/>
</method>
<method name="hashCode" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<field name="LOG" type="org.slf4j.Logger"
transient="false" volatile="false"
static="true" final="true" visibility="protected"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[An abstract class to provide common implementation for the Counters
container in both mapred and mapreduce packages.
@param <C> type of counter inside the counters
@param <G> type of group inside the counters]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.counters.AbstractCounters -->
<!-- start interface org.apache.hadoop.mapreduce.counters.CounterGroupBase -->
<interface name="CounterGroupBase" abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.io.Writable"/>
<implements name="java.lang.Iterable"/>
<method name="getName" return="java.lang.String"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the internal name of the group
@return the internal name]]>
</doc>
</method>
<method name="getDisplayName" return="java.lang.String"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the display name of the group.
@return the human readable name]]>
</doc>
</method>
<method name="setDisplayName"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="displayName" type="java.lang.String"/>
<doc>
<![CDATA[Set the display name of the group
@param displayName of the group]]>
</doc>
</method>
<method name="addCounter"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="counter" type="T"/>
<doc>
<![CDATA[Add a counter to this group.
@param counter to add]]>
</doc>
</method>
<method name="addCounter" return="T"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="name" type="java.lang.String"/>
<param name="displayName" type="java.lang.String"/>
<param name="value" type="long"/>
<doc>
<![CDATA[Add a counter to this group
@param name of the counter
@param displayName of the counter
@param value of the counter
@return the counter]]>
</doc>
</method>
<method name="findCounter" return="T"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="counterName" type="java.lang.String"/>
<param name="displayName" type="java.lang.String"/>
<doc>
<![CDATA[Find a counter in the group.
@param counterName the name of the counter
@param displayName the display name of the counter
@return the counter that was found or added]]>
</doc>
</method>
<method name="findCounter" return="T"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="counterName" type="java.lang.String"/>
<param name="create" type="boolean"/>
<doc>
<![CDATA[Find a counter in the group
@param counterName the name of the counter
@param create create the counter if not found if true
@return the counter that was found or added or null if create is false]]>
</doc>
</method>
<method name="findCounter" return="T"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="counterName" type="java.lang.String"/>
<doc>
<![CDATA[Find a counter in the group.
@param counterName the name of the counter
@return the counter that was found or added]]>
</doc>
</method>
<method name="size" return="int"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the number of counters in this group.]]>
</doc>
</method>
<method name="incrAllCounters"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="rightGroup" type="org.apache.hadoop.mapreduce.counters.CounterGroupBase"/>
<doc>
<![CDATA[Increment all counters by a group of counters
@param rightGroup the group to be added to this group]]>
</doc>
</method>
<doc>
<![CDATA[The common counter group interface.
@param <T> type of the counter for the group]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapreduce.counters.CounterGroupBase -->
</package>
<package name="org.apache.hadoop.mapreduce.lib.aggregate">
<!-- start class org.apache.hadoop.mapreduce.lib.aggregate.DoubleValueSum -->
<class name="DoubleValueSum" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator"/>
<constructor name="DoubleValueSum"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[The default constructor]]>
</doc>
</constructor>
<method name="addNextValue"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="val" type="java.lang.Object"/>
<doc>
<![CDATA[add a value to the aggregator
@param val
an object whose string representation represents a double value.]]>
</doc>
</method>
<method name="addNextValue"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="val" type="double"/>
<doc>
<![CDATA[add a value to the aggregator
@param val
a double value.]]>
</doc>
</method>
<method name="getReport" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the string representation of the aggregated value]]>
</doc>
</method>
<method name="getSum" return="double"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the aggregated value]]>
</doc>
</method>
<method name="reset"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[reset the aggregator]]>
</doc>
</method>
<method name="getCombinerOutput" return="java.util.ArrayList"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return return an array of one element. The element is a string
representation of the aggregated value. The return value is
expected to be used by the a combiner.]]>
</doc>
</method>
<doc>
<![CDATA[This class implements a value aggregator that sums up a sequence of double
values.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.aggregate.DoubleValueSum -->
<!-- start class org.apache.hadoop.mapreduce.lib.aggregate.LongValueMax -->
<class name="LongValueMax" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator"/>
<constructor name="LongValueMax"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[the default constructor]]>
</doc>
</constructor>
<method name="addNextValue"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="val" type="java.lang.Object"/>
<doc>
<![CDATA[add a value to the aggregator
@param val
an object whose string representation represents a long value.]]>
</doc>
</method>
<method name="addNextValue"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="newVal" type="long"/>
<doc>
<![CDATA[add a value to the aggregator
@param newVal
a long value.]]>
</doc>
</method>
<method name="getVal" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the aggregated value]]>
</doc>
</method>
<method name="getReport" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the string representation of the aggregated value]]>
</doc>
</method>
<method name="reset"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[reset the aggregator]]>
</doc>
</method>
<method name="getCombinerOutput" return="java.util.ArrayList"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return return an array of one element. The element is a string
representation of the aggregated value. The return value is
expected to be used by the a combiner.]]>
</doc>
</method>
<doc>
<![CDATA[This class implements a value aggregator that maintain the maximum of
a sequence of long values.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.aggregate.LongValueMax -->
<!-- start class org.apache.hadoop.mapreduce.lib.aggregate.LongValueMin -->
<class name="LongValueMin" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator"/>
<constructor name="LongValueMin"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[the default constructor]]>
</doc>
</constructor>
<method name="addNextValue"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="val" type="java.lang.Object"/>
<doc>
<![CDATA[add a value to the aggregator
@param val
an object whose string representation represents a long value.]]>
</doc>
</method>
<method name="addNextValue"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="newVal" type="long"/>
<doc>
<![CDATA[add a value to the aggregator
@param newVal
a long value.]]>
</doc>
</method>
<method name="getVal" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the aggregated value]]>
</doc>
</method>
<method name="getReport" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the string representation of the aggregated value]]>
</doc>
</method>
<method name="reset"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[reset the aggregator]]>
</doc>
</method>
<method name="getCombinerOutput" return="java.util.ArrayList"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return return an array of one element. The element is a string
representation of the aggregated value. The return value is
expected to be used by the a combiner.]]>
</doc>
</method>
<doc>
<![CDATA[This class implements a value aggregator that maintain the minimum of
a sequence of long values.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.aggregate.LongValueMin -->
<!-- start class org.apache.hadoop.mapreduce.lib.aggregate.LongValueSum -->
<class name="LongValueSum" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator"/>
<constructor name="LongValueSum"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[the default constructor]]>
</doc>
</constructor>
<method name="addNextValue"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="val" type="java.lang.Object"/>
<doc>
<![CDATA[add a value to the aggregator
@param val
an object whose string representation represents a long value.]]>
</doc>
</method>
<method name="addNextValue"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="val" type="long"/>
<doc>
<![CDATA[add a value to the aggregator
@param val
a long value.]]>
</doc>
</method>
<method name="getSum" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the aggregated value]]>
</doc>
</method>
<method name="getReport" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the string representation of the aggregated value]]>
</doc>
</method>
<method name="reset"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[reset the aggregator]]>
</doc>
</method>
<method name="getCombinerOutput" return="java.util.ArrayList"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return return an array of one element. The element is a string
representation of the aggregated value. The return value is
expected to be used by the a combiner.]]>
</doc>
</method>
<doc>
<![CDATA[This class implements a value aggregator that sums up
a sequence of long values.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.aggregate.LongValueSum -->
<!-- start class org.apache.hadoop.mapreduce.lib.aggregate.StringValueMax -->
<class name="StringValueMax" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator"/>
<constructor name="StringValueMax"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[the default constructor]]>
</doc>
</constructor>
<method name="addNextValue"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="val" type="java.lang.Object"/>
<doc>
<![CDATA[add a value to the aggregator
@param val
a string.]]>
</doc>
</method>
<method name="getVal" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the aggregated value]]>
</doc>
</method>
<method name="getReport" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the string representation of the aggregated value]]>
</doc>
</method>
<method name="reset"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[reset the aggregator]]>
</doc>
</method>
<method name="getCombinerOutput" return="java.util.ArrayList"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return return an array of one element. The element is a string
representation of the aggregated value. The return value is
expected to be used by the a combiner.]]>
</doc>
</method>
<doc>
<![CDATA[This class implements a value aggregator that maintain the biggest of
a sequence of strings.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.aggregate.StringValueMax -->
<!-- start class org.apache.hadoop.mapreduce.lib.aggregate.StringValueMin -->
<class name="StringValueMin" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator"/>
<constructor name="StringValueMin"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[the default constructor]]>
</doc>
</constructor>
<method name="addNextValue"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="val" type="java.lang.Object"/>
<doc>
<![CDATA[add a value to the aggregator
@param val
a string.]]>
</doc>
</method>
<method name="getVal" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the aggregated value]]>
</doc>
</method>
<method name="getReport" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the string representation of the aggregated value]]>
</doc>
</method>
<method name="reset"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[reset the aggregator]]>
</doc>
</method>
<method name="getCombinerOutput" return="java.util.ArrayList"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return return an array of one element. The element is a string
representation of the aggregated value. The return value is
expected to be used by the a combiner.]]>
</doc>
</method>
<doc>
<![CDATA[This class implements a value aggregator that maintain the smallest of
a sequence of strings.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.aggregate.StringValueMin -->
<!-- start class org.apache.hadoop.mapreduce.lib.aggregate.UniqValueCount -->
<class name="UniqValueCount" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator"/>
<constructor name="UniqValueCount"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[the default constructor]]>
</doc>
</constructor>
<constructor name="UniqValueCount" type="long"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[constructor
@param maxNum the limit in the number of unique values to keep.]]>
</doc>
</constructor>
<method name="setMaxItems" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="n" type="long"/>
<doc>
<![CDATA[Set the limit on the number of unique values
@param n the desired limit on the number of unique values
@return the new limit on the number of unique values]]>
</doc>
</method>
<method name="addNextValue"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="val" type="java.lang.Object"/>
<doc>
<![CDATA[add a value to the aggregator
@param val
an object.]]>
</doc>
</method>
<method name="getReport" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return return the number of unique objects aggregated]]>
</doc>
</method>
<method name="getUniqueItems" return="java.util.Set"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the set of the unique objects]]>
</doc>
</method>
<method name="reset"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[reset the aggregator]]>
</doc>
</method>
<method name="getCombinerOutput" return="java.util.ArrayList"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return return an array of the unique objects. The return value is
expected to be used by the a combiner.]]>
</doc>
</method>
<field name="MAX_NUM_UNIQUE_VALUES" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[This class implements a value aggregator that dedupes a sequence of objects.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.aggregate.UniqValueCount -->
<!-- start class org.apache.hadoop.mapreduce.lib.aggregate.UserDefinedValueAggregatorDescriptor -->
<class name="UserDefinedValueAggregatorDescriptor" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorDescriptor"/>
<constructor name="UserDefinedValueAggregatorDescriptor" type="java.lang.String, org.apache.hadoop.conf.Configuration"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@param className the class name of the user defined descriptor class
@param conf a configure object used for decriptor configuration]]>
</doc>
</constructor>
<method name="createInstance" return="java.lang.Object"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="className" type="java.lang.String"/>
<doc>
<![CDATA[Create an instance of the given class
@param className the name of the class
@return a dynamically created instance of the given class]]>
</doc>
</method>
<method name="generateKeyValPairs" return="java.util.ArrayList"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Object"/>
<param name="val" type="java.lang.Object"/>
<doc>
<![CDATA[Generate a list of aggregation-id/value pairs for the given
key/value pairs by delegating the invocation to the real object.
@param key
input key
@param val
input value
@return a list of aggregation id/value pairs. An aggregation id encodes an
aggregation type which is used to guide the way to aggregate the
value in the reduce/combiner phrase of an Aggregate based job.]]>
</doc>
</method>
<method name="toString" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the string representation of this object.]]>
</doc>
</method>
<method name="configure"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<doc>
<![CDATA[Do nothing.]]>
</doc>
</method>
<field name="theAggregatorDescriptor" type="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorDescriptor"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[This class implements a wrapper for a user defined value
aggregator descriptor.
It serves two functions: One is to create an object of
ValueAggregatorDescriptor from the name of a user defined class
that may be dynamically loaded. The other is to
delegate invocations of generateKeyValPairs function to the created object.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.aggregate.UserDefinedValueAggregatorDescriptor -->
<!-- start interface org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator -->
<interface name="ValueAggregator" abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<method name="addNextValue"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="val" type="java.lang.Object"/>
<doc>
<![CDATA[add a value to the aggregator
@param val the value to be added]]>
</doc>
</method>
<method name="reset"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[reset the aggregator]]>
</doc>
</method>
<method name="getReport" return="java.lang.String"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the string representation of the agregator]]>
</doc>
</method>
<method name="getCombinerOutput" return="java.util.ArrayList"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return an array of values as the outputs of the combiner.]]>
</doc>
</method>
<doc>
<![CDATA[This interface defines the minimal protocol for value aggregators.]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator -->
<!-- start class org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorBaseDescriptor -->
<class name="ValueAggregatorBaseDescriptor" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorDescriptor"/>
<constructor name="ValueAggregatorBaseDescriptor"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="generateEntry" return="java.util.Map.Entry"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="type" type="java.lang.String"/>
<param name="id" type="java.lang.String"/>
<param name="val" type="org.apache.hadoop.io.Text"/>
<doc>
<![CDATA[@param type the aggregation type
@param id the aggregation id
@param val the val associated with the id to be aggregated
@return an Entry whose key is the aggregation id prefixed with
the aggregation type.]]>
</doc>
</method>
<method name="generateValueAggregator" return="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="type" type="java.lang.String"/>
<param name="uniqCount" type="long"/>
<doc>
<![CDATA[@param type the aggregation type
@param uniqCount the limit in the number of unique values to keep,
if type is UNIQ_VALUE_COUNT
@return a value aggregator of the given type.]]>
</doc>
</method>
<method name="generateKeyValPairs" return="java.util.ArrayList"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Object"/>
<param name="val" type="java.lang.Object"/>
<doc>
<![CDATA[Generate 1 or 2 aggregation-id/value pairs for the given key/value pair.
The first id will be of type LONG_VALUE_SUM, with "record_count" as
its aggregation id. If the input is a file split,
the second id of the same type will be generated too, with the file name
as its aggregation id. This achieves the behavior of counting the total
number of records in the input data, and the number of records
in each input file.
@param key
input key
@param val
input value
@return a list of aggregation id/value pairs. An aggregation id encodes an
aggregation type which is used to guide the way to aggregate the
value in the reduce/combiner phrase of an Aggregate based job.]]>
</doc>
</method>
<method name="configure"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<doc>
<![CDATA[get the input file name.
@param conf a configuration object]]>
</doc>
</method>
<field name="UNIQ_VALUE_COUNT" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="LONG_VALUE_SUM" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="DOUBLE_VALUE_SUM" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="VALUE_HISTOGRAM" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="LONG_VALUE_MAX" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="LONG_VALUE_MIN" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="STRING_VALUE_MAX" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="STRING_VALUE_MIN" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="inputFile" type="java.lang.String"
transient="false" volatile="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[This class implements the common functionalities of
the subclasses of ValueAggregatorDescriptor class.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorBaseDescriptor -->
<!-- start class org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorCombiner -->
<class name="ValueAggregatorCombiner" extends="org.apache.hadoop.mapreduce.Reducer"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="ValueAggregatorCombiner"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="reduce"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="org.apache.hadoop.io.Text"/>
<param name="values" type="java.lang.Iterable"/>
<param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Combines values for a given key.
@param key the key is expected to be a Text object, whose prefix indicates
the type of aggregation to aggregate the values.
@param values the values to combine
@param context to collect combined values]]>
</doc>
</method>
<doc>
<![CDATA[This class implements the generic combiner of Aggregate.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorCombiner -->
<!-- start interface org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorDescriptor -->
<interface name="ValueAggregatorDescriptor" abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<method name="generateKeyValPairs" return="java.util.ArrayList"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Object"/>
<param name="val" type="java.lang.Object"/>
<doc>
<![CDATA[Generate a list of aggregation-id/value pairs for
the given key/value pair.
This function is usually called by the mapper of an Aggregate based job.
@param key
input key
@param val
input value
@return a list of aggregation id/value pairs. An aggregation id encodes an
aggregation type which is used to guide the way to aggregate the
value in the reduce/combiner phrase of an Aggregate based job.]]>
</doc>
</method>
<method name="configure"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<doc>
<![CDATA[Configure the object
@param conf
a Configuration object that may contain the information
that can be used to configure the object.]]>
</doc>
</method>
<field name="TYPE_SEPARATOR" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="ONE" type="org.apache.hadoop.io.Text"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[This interface defines the contract a value aggregator descriptor must
support. Such a descriptor can be configured with a {@link Configuration}
object. Its main function is to generate a list of aggregation-id/value
pairs. An aggregation id encodes an aggregation type which is used to
guide the way to aggregate the value in the reduce/combiner phrase of an
Aggregate based job.
The mapper in an Aggregate based map/reduce job may create one or more of
ValueAggregatorDescriptor objects at configuration time. For each input
key/value pair, the mapper will use those objects to create aggregation
id/value pairs.]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorDescriptor -->
<!-- start class org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorJob -->
<class name="ValueAggregatorJob" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="ValueAggregatorJob"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="createValueAggregatorJobs" return="org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="args" type="java.lang.String[]"/>
<param name="descriptors" type="java.lang.Class[]"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="createValueAggregatorJobs" return="org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="args" type="java.lang.String[]"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="createValueAggregatorJob" return="org.apache.hadoop.mapreduce.Job"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<param name="args" type="java.lang.String[]"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Create an Aggregate based map/reduce job.
@param conf The configuration for job
@param args the arguments used for job creation. Generic hadoop
arguments are accepted.
@return a Job object ready for submission.
@throws IOException
@see GenericOptionsParser]]>
</doc>
</method>
<method name="createValueAggregatorJob" return="org.apache.hadoop.mapreduce.Job"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="args" type="java.lang.String[]"/>
<param name="descriptors" type="java.lang.Class[]"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="setAggregatorDescriptors" return="org.apache.hadoop.conf.Configuration"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="descriptors" type="java.lang.Class[]"/>
</method>
<method name="main"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="args" type="java.lang.String[]"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
<doc>
<![CDATA[create and run an Aggregate based map/reduce job.
@param args the arguments used for job creation
@throws IOException]]>
</doc>
</method>
<doc>
<![CDATA[This is the main class for creating a map/reduce job using Aggregate
framework. The Aggregate is a specialization of map/reduce framework,
specializing for performing various simple aggregations.
Generally speaking, in order to implement an application using Map/Reduce
model, the developer is to implement Map and Reduce functions (and possibly
combine function). However, a lot of applications related to counting and
statistics computing have very similar characteristics. Aggregate abstracts
out the general patterns of these functions and implementing those patterns.
In particular, the package provides generic mapper/redducer/combiner
classes, and a set of built-in value aggregators, and a generic utility
class that helps user create map/reduce jobs using the generic class.
The built-in aggregators include:
sum over numeric values count the number of distinct values compute the
histogram of values compute the minimum, maximum, media,average, standard
deviation of numeric values
The developer using Aggregate will need only to provide a plugin class
conforming to the following interface:
public interface ValueAggregatorDescriptor { public ArrayList&lt;Entry&gt;
generateKeyValPairs(Object key, Object value); public void
configure(Configuration conf); }
The package also provides a base class, ValueAggregatorBaseDescriptor,
implementing the above interface. The user can extend the base class and
implement generateKeyValPairs accordingly.
The primary work of generateKeyValPairs is to emit one or more key/value
pairs based on the input key/value pair. The key in an output key/value pair
encode two pieces of information: aggregation type and aggregation id. The
value will be aggregated onto the aggregation id according the aggregation
type.
This class offers a function to generate a map/reduce job using Aggregate
framework. The function takes the following parameters: input directory spec
input format (text or sequence file) output directory a file specifying the
user plugin class]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorJob -->
<!-- start class org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorJobBase -->
<class name="ValueAggregatorJobBase" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="ValueAggregatorJobBase"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="setup"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.conf.Configuration"/>
</method>
<method name="getValueAggregatorDescriptor" return="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorDescriptor"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="protected"
deprecated="not deprecated">
<param name="spec" type="java.lang.String"/>
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
</method>
<method name="getAggregatorDescriptors" return="java.util.ArrayList"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="protected"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
</method>
<method name="logSpec"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="protected"
deprecated="not deprecated">
</method>
<field name="DESCRIPTOR" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="DESCRIPTOR_NUM" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="USER_JAR" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="aggregatorDescriptorList" type="java.util.ArrayList"
transient="false" volatile="false"
static="true" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[This abstract class implements some common functionalities of the
the generic mapper, reducer and combiner classes of Aggregate.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorJobBase -->
<!-- start class org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorMapper -->
<class name="ValueAggregatorMapper" extends="org.apache.hadoop.mapreduce.Mapper"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="ValueAggregatorMapper"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="setup"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<method name="map"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="K1"/>
<param name="value" type="V1"/>
<param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[the map function. It iterates through the value aggregator descriptor
list to generate aggregation id/value pairs and emit them.]]>
</doc>
</method>
<doc>
<![CDATA[This class implements the generic mapper of Aggregate.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorMapper -->
<!-- start class org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorReducer -->
<class name="ValueAggregatorReducer" extends="org.apache.hadoop.mapreduce.Reducer"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="ValueAggregatorReducer"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="setup"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<method name="reduce"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="org.apache.hadoop.io.Text"/>
<param name="values" type="java.lang.Iterable"/>
<param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[@param key
the key is expected to be a Text object, whose prefix indicates
the type of aggregation to aggregate the values. In effect, data
driven computing is achieved. It is assumed that each aggregator's
getReport method emits appropriate output for the aggregator. This
may be further customized.
@param values the values to be aggregated
@param context]]>
</doc>
</method>
<doc>
<![CDATA[This class implements the generic reducer of Aggregate.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorReducer -->
<!-- start class org.apache.hadoop.mapreduce.lib.aggregate.ValueHistogram -->
<class name="ValueHistogram" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator"/>
<constructor name="ValueHistogram"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="addNextValue"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="val" type="java.lang.Object"/>
<doc>
<![CDATA[add the given val to the aggregator.
@param val the value to be added. It is expected to be a string
in the form of xxxx\tnum, meaning xxxx has num occurrences.]]>
</doc>
</method>
<method name="getReport" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the string representation of this aggregator.
It includes the following basic statistics of the histogram:
the number of unique values
the minimum value
the media value
the maximum value
the average value
the standard deviation]]>
</doc>
</method>
<method name="getReportDetails" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return a string representation of the list of value/frequence pairs of
the histogram]]>
</doc>
</method>
<method name="getCombinerOutput" return="java.util.ArrayList"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return a list value/frequence pairs.
The return value is expected to be used by the reducer.]]>
</doc>
</method>
<method name="getReportItems" return="java.util.TreeMap"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return a TreeMap representation of the histogram]]>
</doc>
</method>
<method name="reset"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[reset the aggregator]]>
</doc>
</method>
<doc>
<![CDATA[This class implements a value aggregator that computes the
histogram of a sequence of strings.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.aggregate.ValueHistogram -->
</package>
<package name="org.apache.hadoop.mapreduce.lib.chain">
<!-- start class org.apache.hadoop.mapreduce.lib.chain.ChainMapper -->
<class name="ChainMapper" extends="org.apache.hadoop.mapreduce.Mapper"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="ChainMapper"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="addMapper"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
<param name="klass" type="java.lang.Class"/>
<param name="inputKeyClass" type="java.lang.Class"/>
<param name="inputValueClass" type="java.lang.Class"/>
<param name="outputKeyClass" type="java.lang.Class"/>
<param name="outputValueClass" type="java.lang.Class"/>
<param name="mapperConf" type="org.apache.hadoop.conf.Configuration"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Adds a {@link Mapper} class to the chain mapper.
<p>
The key and values are passed from one element of the chain to the next, by
value. For the added Mapper the configuration given for it,
<code>mapperConf</code>, have precedence over the job's Configuration. This
precedence is in effect when the task is running.
</p>
<p>
IMPORTANT: There is no need to specify the output key/value classes for the
ChainMapper, this is done by the addMapper for the last mapper in the chain
</p>
@param job
The job.
@param klass
the Mapper class to add.
@param inputKeyClass
mapper input key class.
@param inputValueClass
mapper input value class.
@param outputKeyClass
mapper output key class.
@param outputValueClass
mapper output value class.
@param mapperConf
a configuration for the Mapper class. It is recommended to use a
Configuration without default values using the
<code>Configuration(boolean loadDefaults)</code> constructor with
FALSE.]]>
</doc>
</method>
<method name="setup"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/>
</method>
<method name="run"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<doc>
<![CDATA[The ChainMapper class allows to use multiple Mapper classes within a single
Map task.
<p>
The Mapper classes are invoked in a chained (or piped) fashion, the output of
the first becomes the input of the second, and so on until the last Mapper,
the output of the last Mapper will be written to the task's output.
</p>
<p>
The key functionality of this feature is that the Mappers in the chain do not
need to be aware that they are executed in a chain. This enables having
reusable specialized Mappers that can be combined to perform composite
operations within a single task.
</p>
<p>
Special care has to be taken when creating chains that the key/values output
by a Mapper are valid for the following Mapper in the chain. It is assumed
all Mappers and the Reduce in the chain use matching output and input key and
value classes as no conversion is done by the chaining code.
</p>
<p>
Using the ChainMapper and the ChainReducer classes is possible to compose
Map/Reduce jobs that look like <code>[MAP+ / REDUCE MAP*]</code>. And
immediate benefit of this pattern is a dramatic reduction in disk IO.
</p>
<p>
IMPORTANT: There is no need to specify the output key/value classes for the
ChainMapper, this is done by the addMapper for the last mapper in the chain.
</p>
ChainMapper usage pattern:
<p>
<pre>
...
Job = new Job(conf);
Configuration mapAConf = new Configuration(false);
...
ChainMapper.addMapper(job, AMap.class, LongWritable.class, Text.class,
Text.class, Text.class, true, mapAConf);
Configuration mapBConf = new Configuration(false);
...
ChainMapper.addMapper(job, BMap.class, Text.class, Text.class,
LongWritable.class, Text.class, false, mapBConf);
...
job.waitForComplettion(true);
...
</pre>]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.chain.ChainMapper -->
<!-- start class org.apache.hadoop.mapreduce.lib.chain.ChainReducer -->
<class name="ChainReducer" extends="org.apache.hadoop.mapreduce.Reducer"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="ChainReducer"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="setReducer"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
<param name="klass" type="java.lang.Class"/>
<param name="inputKeyClass" type="java.lang.Class"/>
<param name="inputValueClass" type="java.lang.Class"/>
<param name="outputKeyClass" type="java.lang.Class"/>
<param name="outputValueClass" type="java.lang.Class"/>
<param name="reducerConf" type="org.apache.hadoop.conf.Configuration"/>
<doc>
<![CDATA[Sets the {@link Reducer} class to the chain job.
<p>
The key and values are passed from one element of the chain to the next, by
value. For the added Reducer the configuration given for it,
<code>reducerConf</code>, have precedence over the job's Configuration.
This precedence is in effect when the task is running.
</p>
<p>
IMPORTANT: There is no need to specify the output key/value classes for the
ChainReducer, this is done by the setReducer or the addMapper for the last
element in the chain.
</p>
@param job
the job
@param klass
the Reducer class to add.
@param inputKeyClass
reducer input key class.
@param inputValueClass
reducer input value class.
@param outputKeyClass
reducer output key class.
@param outputValueClass
reducer output value class.
@param reducerConf
a configuration for the Reducer class. It is recommended to use a
Configuration without default values using the
<code>Configuration(boolean loadDefaults)</code> constructor with
FALSE.]]>
</doc>
</method>
<method name="addMapper"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
<param name="klass" type="java.lang.Class"/>
<param name="inputKeyClass" type="java.lang.Class"/>
<param name="inputValueClass" type="java.lang.Class"/>
<param name="outputKeyClass" type="java.lang.Class"/>
<param name="outputValueClass" type="java.lang.Class"/>
<param name="mapperConf" type="org.apache.hadoop.conf.Configuration"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Adds a {@link Mapper} class to the chain reducer.
<p>
The key and values are passed from one element of the chain to the next, by
value For the added Mapper the configuration given for it,
<code>mapperConf</code>, have precedence over the job's Configuration. This
precedence is in effect when the task is running.
</p>
<p>
IMPORTANT: There is no need to specify the output key/value classes for the
ChainMapper, this is done by the addMapper for the last mapper in the
chain.
</p>
@param job
The job.
@param klass
the Mapper class to add.
@param inputKeyClass
mapper input key class.
@param inputValueClass
mapper input value class.
@param outputKeyClass
mapper output key class.
@param outputValueClass
mapper output value class.
@param mapperConf
a configuration for the Mapper class. It is recommended to use a
Configuration without default values using the
<code>Configuration(boolean loadDefaults)</code> constructor with
FALSE.]]>
</doc>
</method>
<method name="setup"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/>
</method>
<method name="run"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<doc>
<![CDATA[The ChainReducer class allows to chain multiple Mapper classes after a
Reducer within the Reducer task.
<p>
For each record output by the Reducer, the Mapper classes are invoked in a
chained (or piped) fashion. The output of the reducer becomes the input of
the first mapper and output of first becomes the input of the second, and so
on until the last Mapper, the output of the last Mapper will be written to
the task's output.
</p>
<p>
The key functionality of this feature is that the Mappers in the chain do not
need to be aware that they are executed after the Reducer or in a chain. This
enables having reusable specialized Mappers that can be combined to perform
composite operations within a single task.
</p>
<p>
Special care has to be taken when creating chains that the key/values output
by a Mapper are valid for the following Mapper in the chain. It is assumed
all Mappers and the Reduce in the chain use matching output and input key and
value classes as no conversion is done by the chaining code.
</p>
<p> Using the ChainMapper and the ChainReducer classes is possible to
compose Map/Reduce jobs that look like <code>[MAP+ / REDUCE MAP*]</code>. And
immediate benefit of this pattern is a dramatic reduction in disk IO. </p>
<p>
IMPORTANT: There is no need to specify the output key/value classes for the
ChainReducer, this is done by the setReducer or the addMapper for the last
element in the chain.
</p>
ChainReducer usage pattern:
<p>
<pre>
...
Job = new Job(conf);
....
Configuration reduceConf = new Configuration(false);
...
ChainReducer.setReducer(job, XReduce.class, LongWritable.class, Text.class,
Text.class, Text.class, true, reduceConf);
ChainReducer.addMapper(job, CMap.class, Text.class, Text.class,
LongWritable.class, Text.class, false, null);
ChainReducer.addMapper(job, DMap.class, LongWritable.class, Text.class,
LongWritable.class, LongWritable.class, true, null);
...
job.waitForCompletion(true);
...
</pre>]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.chain.ChainReducer -->
</package>
<package name="org.apache.hadoop.mapreduce.lib.db">
<!-- start class org.apache.hadoop.mapreduce.lib.db.BigDecimalSplitter -->
<class name="BigDecimalSplitter" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapreduce.lib.db.DBSplitter"/>
<constructor name="BigDecimalSplitter"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="split" return="java.util.List"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<param name="results" type="java.sql.ResultSet"/>
<param name="colName" type="java.lang.String"/>
<exception name="SQLException" type="java.sql.SQLException"/>
</method>
<method name="tryDivide" return="java.math.BigDecimal"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="numerator" type="java.math.BigDecimal"/>
<param name="denominator" type="java.math.BigDecimal"/>
<doc>
<![CDATA[Divide numerator by denominator. If impossible in exact mode, use rounding.]]>
</doc>
</method>
<doc>
<![CDATA[Implement DBSplitter over BigDecimal values.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.db.BigDecimalSplitter -->
<!-- start class org.apache.hadoop.mapreduce.lib.db.BooleanSplitter -->
<class name="BooleanSplitter" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapreduce.lib.db.DBSplitter"/>
<constructor name="BooleanSplitter"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="split" return="java.util.List"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<param name="results" type="java.sql.ResultSet"/>
<param name="colName" type="java.lang.String"/>
<exception name="SQLException" type="java.sql.SQLException"/>
</method>
<doc>
<![CDATA[Implement DBSplitter over boolean values.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.db.BooleanSplitter -->
<!-- start class org.apache.hadoop.mapreduce.lib.db.DataDrivenDBInputFormat -->
<class name="DataDrivenDBInputFormat" extends="org.apache.hadoop.mapreduce.lib.db.DBInputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.conf.Configurable"/>
<constructor name="DataDrivenDBInputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getSplitter" return="org.apache.hadoop.mapreduce.lib.db.DBSplitter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="sqlDataType" type="int"/>
<doc>
<![CDATA[@return the DBSplitter implementation to use to divide the table/query into InputSplits.]]>
</doc>
</method>
<method name="getSplits" return="java.util.List"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<method name="getBoundingValsQuery" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<doc>
<![CDATA[@return a query which returns the minimum and maximum values for
the order-by column.
The min value should be in the first column, and the
max value should be in the second column of the results.]]>
</doc>
</method>
<method name="setBoundingQuery"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<param name="query" type="java.lang.String"/>
<doc>
<![CDATA[Set the user-defined bounding query to use with a user-defined query.
This *must* include the substring "$CONDITIONS"
(DataDrivenDBInputFormat.SUBSTITUTE_TOKEN) inside the WHERE clause,
so that DataDrivenDBInputFormat knows where to insert split clauses.
e.g., "SELECT foo FROM mytable WHERE $CONDITIONS"
This will be expanded to something like:
SELECT foo FROM mytable WHERE (id &gt; 100) AND (id &lt; 250)
inside each split.]]>
</doc>
</method>
<method name="createDBRecordReader" return="org.apache.hadoop.mapreduce.RecordReader"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit"/>
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="setInput"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
<param name="inputClass" type="java.lang.Class"/>
<param name="tableName" type="java.lang.String"/>
<param name="conditions" type="java.lang.String"/>
<param name="splitBy" type="java.lang.String"/>
<param name="fieldNames" type="java.lang.String[]"/>
<doc>
<![CDATA[Note that the "orderBy" column is called the "splitBy" in this version.
We reuse the same field, but it's not strictly ordering it -- just partitioning
the results.]]>
</doc>
</method>
<method name="setInput"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
<param name="inputClass" type="java.lang.Class"/>
<param name="inputQuery" type="java.lang.String"/>
<param name="inputBoundingQuery" type="java.lang.String"/>
<doc>
<![CDATA[setInput() takes a custom query and a separate "bounding query" to use
instead of the custom "count query" used by DBInputFormat.]]>
</doc>
</method>
<field name="SUBSTITUTE_TOKEN" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[If users are providing their own query, the following string is expected to
appear in the WHERE clause, which will be substituted with a pair of conditions
on the input to allow input splits to parallelise the import.]]>
</doc>
</field>
<doc>
<![CDATA[A InputFormat that reads input data from an SQL table.
Operates like DBInputFormat, but instead of using LIMIT and OFFSET to demarcate
splits, it tries to generate WHERE clauses which separate the data into roughly
equivalent shards.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.db.DataDrivenDBInputFormat -->
<!-- start class org.apache.hadoop.mapreduce.lib.db.DataDrivenDBRecordReader -->
<class name="DataDrivenDBRecordReader" extends="org.apache.hadoop.mapreduce.lib.db.DBRecordReader"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="DataDrivenDBRecordReader" type="org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit, java.lang.Class, org.apache.hadoop.conf.Configuration, java.sql.Connection, org.apache.hadoop.mapreduce.lib.db.DBConfiguration, java.lang.String, java.lang.String[], java.lang.String, java.lang.String"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="SQLException" type="java.sql.SQLException"/>
<doc>
<![CDATA[@param split The InputSplit to read data for
@throws SQLException]]>
</doc>
</constructor>
<method name="getSelectQuery" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<doc>
<![CDATA[Returns the query for selecting the records,
subclasses can override this for custom behaviour.]]>
</doc>
</method>
<doc>
<![CDATA[A RecordReader that reads records from a SQL table,
using data-driven WHERE clause splits.
Emits LongWritables containing the record number as
key and DBWritables as value.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.db.DataDrivenDBRecordReader -->
<!-- start class org.apache.hadoop.mapreduce.lib.db.DateSplitter -->
<class name="DateSplitter" extends="org.apache.hadoop.mapreduce.lib.db.IntegerSplitter"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="DateSplitter"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="split" return="java.util.List"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<param name="results" type="java.sql.ResultSet"/>
<param name="colName" type="java.lang.String"/>
<exception name="SQLException" type="java.sql.SQLException"/>
</method>
<method name="dateToString" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="d" type="java.util.Date"/>
<doc>
<![CDATA[Given a Date 'd', format it as a string for use in a SQL date
comparison operation.
@param d the date to format.
@return the string representing this date in SQL with any appropriate
quotation characters, etc.]]>
</doc>
</method>
<doc>
<![CDATA[Implement DBSplitter over date/time values.
Make use of logic from IntegerSplitter, since date/time are just longs
in Java.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.db.DateSplitter -->
<!-- start class org.apache.hadoop.mapreduce.lib.db.DBConfiguration -->
<class name="DBConfiguration" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="DBConfiguration" type="org.apache.hadoop.conf.Configuration"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="configureDB"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<param name="driverClass" type="java.lang.String"/>
<param name="dbUrl" type="java.lang.String"/>
<param name="userName" type="java.lang.String"/>
<param name="passwd" type="java.lang.String"/>
<doc>
<![CDATA[Sets the DB access related fields in the {@link Configuration}.
@param conf the configuration
@param driverClass JDBC Driver class name
@param dbUrl JDBC DB access URL.
@param userName DB access username
@param passwd DB access passwd]]>
</doc>
</method>
<method name="configureDB"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.conf.Configuration"/>
<param name="driverClass" type="java.lang.String"/>
<param name="dbUrl" type="java.lang.String"/>
<doc>
<![CDATA[Sets the DB access related fields in the JobConf.
@param job the job
@param driverClass JDBC Driver class name
@param dbUrl JDBC DB access URL.]]>
</doc>
</method>
<method name="getConnection" return="java.sql.Connection"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
<exception name="SQLException" type="java.sql.SQLException"/>
<doc>
<![CDATA[Returns a connection object o the DB
@throws ClassNotFoundException
@throws SQLException]]>
</doc>
</method>
<method name="getConf" return="org.apache.hadoop.conf.Configuration"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getInputTableName" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="setInputTableName"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="tableName" type="java.lang.String"/>
</method>
<method name="getInputFieldNames" return="java.lang.String[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="setInputFieldNames"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="fieldNames" type="java.lang.String[]"/>
</method>
<method name="getInputConditions" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="setInputConditions"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="conditions" type="java.lang.String"/>
</method>
<method name="getInputOrderBy" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="setInputOrderBy"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="orderby" type="java.lang.String"/>
</method>
<method name="getInputQuery" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="setInputQuery"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="query" type="java.lang.String"/>
</method>
<method name="getInputCountQuery" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="setInputCountQuery"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="query" type="java.lang.String"/>
</method>
<method name="setInputBoundingQuery"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="query" type="java.lang.String"/>
</method>
<method name="getInputBoundingQuery" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getInputClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="setInputClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="inputClass" type="java.lang.Class"/>
</method>
<method name="getOutputTableName" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="setOutputTableName"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="tableName" type="java.lang.String"/>
</method>
<method name="getOutputFieldNames" return="java.lang.String[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="setOutputFieldNames"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="fieldNames" type="java.lang.String[]"/>
</method>
<method name="setOutputFieldCount"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="fieldCount" type="int"/>
</method>
<method name="getOutputFieldCount" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<field name="DRIVER_CLASS_PROPERTY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[The JDBC Driver class name]]>
</doc>
</field>
<field name="URL_PROPERTY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[JDBC Database access URL]]>
</doc>
</field>
<field name="USERNAME_PROPERTY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[User name to access the database]]>
</doc>
</field>
<field name="PASSWORD_PROPERTY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Password to access the database]]>
</doc>
</field>
<field name="INPUT_TABLE_NAME_PROPERTY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Input table name]]>
</doc>
</field>
<field name="INPUT_FIELD_NAMES_PROPERTY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Field names in the Input table]]>
</doc>
</field>
<field name="INPUT_CONDITIONS_PROPERTY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[WHERE clause in the input SELECT statement]]>
</doc>
</field>
<field name="INPUT_ORDER_BY_PROPERTY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[ORDER BY clause in the input SELECT statement]]>
</doc>
</field>
<field name="INPUT_QUERY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Whole input query, exluding LIMIT...OFFSET]]>
</doc>
</field>
<field name="INPUT_COUNT_QUERY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Input query to get the count of records]]>
</doc>
</field>
<field name="INPUT_BOUNDING_QUERY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Input query to get the max and min values of the jdbc.input.query]]>
</doc>
</field>
<field name="INPUT_CLASS_PROPERTY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Class name implementing DBWritable which will hold input tuples]]>
</doc>
</field>
<field name="OUTPUT_TABLE_NAME_PROPERTY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Output table name]]>
</doc>
</field>
<field name="OUTPUT_FIELD_NAMES_PROPERTY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Field names in the Output table]]>
</doc>
</field>
<field name="OUTPUT_FIELD_COUNT_PROPERTY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Number of fields in the Output table]]>
</doc>
</field>
<doc>
<![CDATA[A container for configuration property names for jobs with DB input/output.
The job can be configured using the static methods in this class,
{@link DBInputFormat}, and {@link DBOutputFormat}.
Alternatively, the properties can be set in the configuration with proper
values.
@see DBConfiguration#configureDB(Configuration, String, String, String, String)
@see DBInputFormat#setInput(Job, Class, String, String)
@see DBInputFormat#setInput(Job, Class, String, String, String, String...)
@see DBOutputFormat#setOutput(Job, String, String...)]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.db.DBConfiguration -->
<!-- start class org.apache.hadoop.mapreduce.lib.db.DBInputFormat -->
<class name="DBInputFormat" extends="org.apache.hadoop.mapreduce.InputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.conf.Configurable"/>
<constructor name="DBInputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="setConf"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<method name="getConf" return="org.apache.hadoop.conf.Configuration"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getDBConf" return="org.apache.hadoop.mapreduce.lib.db.DBConfiguration"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getConnection" return="java.sql.Connection"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="createConnection" return="java.sql.Connection"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getDBProductName" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="createDBRecordReader" return="org.apache.hadoop.mapreduce.RecordReader"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit"/>
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<method name="getSplits" return="java.util.List"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<method name="getCountQuery" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<doc>
<![CDATA[Returns the query for getting the total number of rows,
subclasses can override this for custom behaviour.]]>
</doc>
</method>
<method name="setInput"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
<param name="inputClass" type="java.lang.Class"/>
<param name="tableName" type="java.lang.String"/>
<param name="conditions" type="java.lang.String"/>
<param name="orderBy" type="java.lang.String"/>
<param name="fieldNames" type="java.lang.String[]"/>
<doc>
<![CDATA[Initializes the map-part of the job with the appropriate input settings.
@param job The map-reduce job
@param inputClass the class object implementing DBWritable, which is the
Java object holding tuple fields.
@param tableName The table to read data from
@param conditions The condition which to select data with,
eg. '(updated &gt; 20070101 AND length &gt; 0)'
@param orderBy the fieldNames in the orderBy clause.
@param fieldNames The field names in the table
@see #setInput(Job, Class, String, String)]]>
</doc>
</method>
<method name="setInput"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
<param name="inputClass" type="java.lang.Class"/>
<param name="inputQuery" type="java.lang.String"/>
<param name="inputCountQuery" type="java.lang.String"/>
<doc>
<![CDATA[Initializes the map-part of the job with the appropriate input settings.
@param job The map-reduce job
@param inputClass the class object implementing DBWritable, which is the
Java object holding tuple fields.
@param inputQuery the input query to select fields. Example :
"SELECT f1, f2, f3 FROM Mytable ORDER BY f1"
@param inputCountQuery the input query that returns
the number of records in the table.
Example : "SELECT COUNT(f1) FROM Mytable"
@see #setInput(Job, Class, String, String, String, String...)]]>
</doc>
</method>
<method name="closeConnection"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</method>
<field name="dbProductName" type="java.lang.String"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<field name="conditions" type="java.lang.String"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<field name="connection" type="java.sql.Connection"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<field name="tableName" type="java.lang.String"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<field name="fieldNames" type="java.lang.String[]"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<field name="dbConf" type="org.apache.hadoop.mapreduce.lib.db.DBConfiguration"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[A InputFormat that reads input data from an SQL table.
<p>
DBInputFormat emits LongWritables containing the record number as
key and DBWritables as value.
The SQL query, and input class can be using one of the two
setInput methods.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.db.DBInputFormat -->
<!-- start class org.apache.hadoop.mapreduce.lib.db.DBOutputFormat -->
<class name="DBOutputFormat" extends="org.apache.hadoop.mapreduce.OutputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="DBOutputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="checkOutputSpecs"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<method name="getOutputCommitter" return="org.apache.hadoop.mapreduce.OutputCommitter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<method name="constructQuery" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="table" type="java.lang.String"/>
<param name="fieldNames" type="java.lang.String[]"/>
<doc>
<![CDATA[Constructs the query used as the prepared statement to insert data.
@param table
the table to insert into
@param fieldNames
the fields to insert into. If field names are unknown, supply an
array of nulls.]]>
</doc>
</method>
<method name="getRecordWriter" return="org.apache.hadoop.mapreduce.RecordWriter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<method name="setOutput"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
<param name="tableName" type="java.lang.String"/>
<param name="fieldNames" type="java.lang.String[]"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Initializes the reduce-part of the job with
the appropriate output settings
@param job The job
@param tableName The table to insert data into
@param fieldNames The field names in the table.]]>
</doc>
</method>
<method name="setOutput"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
<param name="tableName" type="java.lang.String"/>
<param name="fieldCount" type="int"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Initializes the reduce-part of the job
with the appropriate output settings
@param job The job
@param tableName The table to insert data into
@param fieldCount the number of fields in the table.]]>
</doc>
</method>
<field name="dbProductName" type="java.lang.String"
transient="false" volatile="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[A OutputFormat that sends the reduce output to a SQL table.
<p>
{@link DBOutputFormat} accepts &lt;key,value&gt; pairs, where
key has a type extending DBWritable. Returned {@link RecordWriter}
writes <b>only the key</b> to the database with a batch SQL query.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.db.DBOutputFormat -->
<!-- start class org.apache.hadoop.mapreduce.lib.db.DBRecordReader -->
<class name="DBRecordReader" extends="org.apache.hadoop.mapreduce.RecordReader"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="DBRecordReader" type="org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit, java.lang.Class, org.apache.hadoop.conf.Configuration, java.sql.Connection, org.apache.hadoop.mapreduce.lib.db.DBConfiguration, java.lang.String, java.lang.String[], java.lang.String"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="SQLException" type="java.sql.SQLException"/>
<doc>
<![CDATA[@param split The InputSplit to read data for
@throws SQLException]]>
</doc>
</constructor>
<method name="executeQuery" return="java.sql.ResultSet"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="query" type="java.lang.String"/>
<exception name="SQLException" type="java.sql.SQLException"/>
</method>
<method name="getSelectQuery" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<doc>
<![CDATA[Returns the query for selecting the records,
subclasses can override this for custom behaviour.]]>
</doc>
</method>
<method name="close"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<method name="initialize"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<method name="getCurrentKey" return="org.apache.hadoop.io.LongWritable"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<method name="getCurrentValue" return="T"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<method name="createValue" return="T"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="deprecated, no comment">
<doc>
<![CDATA[@deprecated]]>
</doc>
</method>
<method name="getPos" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="deprecated, no comment">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[@deprecated]]>
</doc>
</method>
<method name="next" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="Use {@link #nextKeyValue()}">
<param name="key" type="org.apache.hadoop.io.LongWritable"/>
<param name="value" type="T"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[@deprecated Use {@link #nextKeyValue()}]]>
</doc>
</method>
<method name="getProgress" return="float"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<method name="nextKeyValue" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<method name="getSplit" return="org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</method>
<method name="getFieldNames" return="java.lang.String[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</method>
<method name="getTableName" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</method>
<method name="getConditions" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</method>
<method name="getDBConf" return="org.apache.hadoop.mapreduce.lib.db.DBConfiguration"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</method>
<method name="getConnection" return="java.sql.Connection"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</method>
<method name="getStatement" return="java.sql.PreparedStatement"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</method>
<method name="setStatement"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="stmt" type="java.sql.PreparedStatement"/>
</method>
<field name="statement" type="java.sql.PreparedStatement"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[A RecordReader that reads records from a SQL table.
Emits LongWritables containing the record number as
key and DBWritables as value.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.db.DBRecordReader -->
<!-- start interface org.apache.hadoop.mapreduce.lib.db.DBSplitter -->
<interface name="DBSplitter" abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<method name="split" return="java.util.List"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<param name="results" type="java.sql.ResultSet"/>
<param name="colName" type="java.lang.String"/>
<exception name="SQLException" type="java.sql.SQLException"/>
<doc>
<![CDATA[Given a ResultSet containing one record (and already advanced to that record)
with two columns (a low value, and a high value, both of the same type), determine
a set of splits that span the given values.]]>
</doc>
</method>
<doc>
<![CDATA[DBSplitter will generate DBInputSplits to use with DataDrivenDBInputFormat.
DataDrivenDBInputFormat needs to interpolate between two values that
represent the lowest and highest valued records to import. Depending
on the data-type of the column, this requires different behavior.
DBSplitter implementations should perform this for a data type or family
of data types.]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapreduce.lib.db.DBSplitter -->
<!-- start interface org.apache.hadoop.mapreduce.lib.db.DBWritable -->
<interface name="DBWritable" abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<method name="write"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="statement" type="java.sql.PreparedStatement"/>
<exception name="SQLException" type="java.sql.SQLException"/>
<doc>
<![CDATA[Sets the fields of the object in the {@link PreparedStatement}.
@param statement the statement that the fields are put into.
@throws SQLException]]>
</doc>
</method>
<method name="readFields"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="resultSet" type="java.sql.ResultSet"/>
<exception name="SQLException" type="java.sql.SQLException"/>
<doc>
<![CDATA[Reads the fields of the object from the {@link ResultSet}.
@param resultSet the {@link ResultSet} to get the fields from.
@throws SQLException]]>
</doc>
</method>
<doc>
<![CDATA[Objects that are read from/written to a database should implement
<code>DBWritable</code>. DBWritable, is similar to {@link Writable}
except that the {@link #write(PreparedStatement)} method takes a
{@link PreparedStatement}, and {@link #readFields(ResultSet)}
takes a {@link ResultSet}.
<p>
Implementations are responsible for writing the fields of the object
to PreparedStatement, and reading the fields of the object from the
ResultSet.
<p>Example:</p>
If we have the following table in the database :
<pre>
CREATE TABLE MyTable (
counter INTEGER NOT NULL,
timestamp BIGINT NOT NULL,
);
</pre>
then we can read/write the tuples from/to the table with :
<p><pre>
public class MyWritable implements Writable, DBWritable {
// Some data
private int counter;
private long timestamp;
//Writable#write() implementation
public void write(DataOutput out) throws IOException {
out.writeInt(counter);
out.writeLong(timestamp);
}
//Writable#readFields() implementation
public void readFields(DataInput in) throws IOException {
counter = in.readInt();
timestamp = in.readLong();
}
public void write(PreparedStatement statement) throws SQLException {
statement.setInt(1, counter);
statement.setLong(2, timestamp);
}
public void readFields(ResultSet resultSet) throws SQLException {
counter = resultSet.getInt(1);
timestamp = resultSet.getLong(2);
}
}
</pre>]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapreduce.lib.db.DBWritable -->
<!-- start class org.apache.hadoop.mapreduce.lib.db.FloatSplitter -->
<class name="FloatSplitter" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapreduce.lib.db.DBSplitter"/>
<constructor name="FloatSplitter"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="split" return="java.util.List"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<param name="results" type="java.sql.ResultSet"/>
<param name="colName" type="java.lang.String"/>
<exception name="SQLException" type="java.sql.SQLException"/>
</method>
<doc>
<![CDATA[Implement DBSplitter over floating-point values.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.db.FloatSplitter -->
<!-- start class org.apache.hadoop.mapreduce.lib.db.IntegerSplitter -->
<class name="IntegerSplitter" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapreduce.lib.db.DBSplitter"/>
<constructor name="IntegerSplitter"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="split" return="java.util.List"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<param name="results" type="java.sql.ResultSet"/>
<param name="colName" type="java.lang.String"/>
<exception name="SQLException" type="java.sql.SQLException"/>
</method>
<doc>
<![CDATA[Implement DBSplitter over integer values.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.db.IntegerSplitter -->
<!-- start class org.apache.hadoop.mapreduce.lib.db.MySQLDataDrivenDBRecordReader -->
<class name="MySQLDataDrivenDBRecordReader" extends="org.apache.hadoop.mapreduce.lib.db.DataDrivenDBRecordReader"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="MySQLDataDrivenDBRecordReader" type="org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit, java.lang.Class, org.apache.hadoop.conf.Configuration, java.sql.Connection, org.apache.hadoop.mapreduce.lib.db.DBConfiguration, java.lang.String, java.lang.String[], java.lang.String"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="SQLException" type="java.sql.SQLException"/>
</constructor>
<method name="executeQuery" return="java.sql.ResultSet"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="query" type="java.lang.String"/>
<exception name="SQLException" type="java.sql.SQLException"/>
</method>
<doc>
<![CDATA[A RecordReader that reads records from a MySQL table via DataDrivenDBRecordReader]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.db.MySQLDataDrivenDBRecordReader -->
<!-- start class org.apache.hadoop.mapreduce.lib.db.MySQLDBRecordReader -->
<class name="MySQLDBRecordReader" extends="org.apache.hadoop.mapreduce.lib.db.DBRecordReader"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="MySQLDBRecordReader" type="org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit, java.lang.Class, org.apache.hadoop.conf.Configuration, java.sql.Connection, org.apache.hadoop.mapreduce.lib.db.DBConfiguration, java.lang.String, java.lang.String[], java.lang.String"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="SQLException" type="java.sql.SQLException"/>
</constructor>
<method name="executeQuery" return="java.sql.ResultSet"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="query" type="java.lang.String"/>
<exception name="SQLException" type="java.sql.SQLException"/>
</method>
<doc>
<![CDATA[A RecordReader that reads records from a MySQL table.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.db.MySQLDBRecordReader -->
<!-- start class org.apache.hadoop.mapreduce.lib.db.OracleDataDrivenDBInputFormat -->
<class name="OracleDataDrivenDBInputFormat" extends="org.apache.hadoop.mapreduce.lib.db.DataDrivenDBInputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.conf.Configurable"/>
<constructor name="OracleDataDrivenDBInputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getSplitter" return="org.apache.hadoop.mapreduce.lib.db.DBSplitter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="sqlDataType" type="int"/>
<doc>
<![CDATA[@return the DBSplitter implementation to use to divide the table/query into InputSplits.]]>
</doc>
</method>
<method name="createDBRecordReader" return="org.apache.hadoop.mapreduce.RecordReader"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit"/>
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[A InputFormat that reads input data from an SQL table in an Oracle db.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.db.OracleDataDrivenDBInputFormat -->
<!-- start class org.apache.hadoop.mapreduce.lib.db.OracleDataDrivenDBRecordReader -->
<class name="OracleDataDrivenDBRecordReader" extends="org.apache.hadoop.mapreduce.lib.db.DataDrivenDBRecordReader"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="OracleDataDrivenDBRecordReader" type="org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit, java.lang.Class, org.apache.hadoop.conf.Configuration, java.sql.Connection, org.apache.hadoop.mapreduce.lib.db.DBConfiguration, java.lang.String, java.lang.String[], java.lang.String"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="SQLException" type="java.sql.SQLException"/>
</constructor>
<doc>
<![CDATA[A RecordReader that reads records from a Oracle table via DataDrivenDBRecordReader]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.db.OracleDataDrivenDBRecordReader -->
<!-- start class org.apache.hadoop.mapreduce.lib.db.OracleDateSplitter -->
<class name="OracleDateSplitter" extends="org.apache.hadoop.mapreduce.lib.db.DateSplitter"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="OracleDateSplitter"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="dateToString" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="d" type="java.util.Date"/>
</method>
<doc>
<![CDATA[Implement DBSplitter over date/time values returned by an Oracle db.
Make use of logic from DateSplitter, since this just needs to use
some Oracle-specific functions on the formatting end when generating
InputSplits.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.db.OracleDateSplitter -->
<!-- start class org.apache.hadoop.mapreduce.lib.db.OracleDBRecordReader -->
<class name="OracleDBRecordReader" extends="org.apache.hadoop.mapreduce.lib.db.DBRecordReader"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="OracleDBRecordReader" type="org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit, java.lang.Class, org.apache.hadoop.conf.Configuration, java.sql.Connection, org.apache.hadoop.mapreduce.lib.db.DBConfiguration, java.lang.String, java.lang.String[], java.lang.String"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="SQLException" type="java.sql.SQLException"/>
</constructor>
<method name="getSelectQuery" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<doc>
<![CDATA[Returns the query for selecting the records from an Oracle DB.]]>
</doc>
</method>
<method name="setSessionTimeZone"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<param name="conn" type="java.sql.Connection"/>
<exception name="SQLException" type="java.sql.SQLException"/>
<doc>
<![CDATA[Set session time zone
@param conf The current configuration.
We read the 'oracle.sessionTimeZone' property from here.
@param conn The connection to alter the timezone properties of.]]>
</doc>
</method>
<field name="SESSION_TIMEZONE_KEY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Configuration key to set to a timezone string.]]>
</doc>
</field>
<doc>
<![CDATA[A RecordReader that reads records from an Oracle SQL table.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.db.OracleDBRecordReader -->
<!-- start class org.apache.hadoop.mapreduce.lib.db.TextSplitter -->
<class name="TextSplitter" extends="org.apache.hadoop.mapreduce.lib.db.BigDecimalSplitter"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="TextSplitter"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="split" return="java.util.List"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<param name="results" type="java.sql.ResultSet"/>
<param name="colName" type="java.lang.String"/>
<exception name="SQLException" type="java.sql.SQLException"/>
<doc>
<![CDATA[This method needs to determine the splits between two user-provided strings.
In the case where the user's strings are 'A' and 'Z', this is not hard; we
could create two splits from ['A', 'M') and ['M', 'Z'], 26 splits for strings
beginning with each letter, etc.
If a user has provided us with the strings "Ham" and "Haze", however, we need
to create splits that differ in the third letter.
The algorithm used is as follows:
Since there are 2**16 unicode characters, we interpret characters as digits in
base 65536. Given a string 's' containing characters s_0, s_1 .. s_n, we interpret
the string as the number: 0.s_0 s_1 s_2.. s_n in base 65536. Having mapped the
low and high strings into floating-point values, we then use the BigDecimalSplitter
to establish the even split points, then map the resulting floating point values
back into strings.]]>
</doc>
</method>
<doc>
<![CDATA[Implement DBSplitter over text strings.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.db.TextSplitter -->
</package>
<package name="org.apache.hadoop.mapreduce.lib.fieldsel">
<!-- start class org.apache.hadoop.mapreduce.lib.fieldsel.FieldSelectionHelper -->
<class name="FieldSelectionHelper" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="FieldSelectionHelper"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<constructor name="FieldSelectionHelper" type="org.apache.hadoop.io.Text, org.apache.hadoop.io.Text"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="parseOutputKeyValueSpec" return="int"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="keyValueSpec" type="java.lang.String"/>
<param name="keyFieldList" type="java.util.List"/>
<param name="valueFieldList" type="java.util.List"/>
</method>
<method name="specToString" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="fieldSeparator" type="java.lang.String"/>
<param name="keyValueSpec" type="java.lang.String"/>
<param name="allValueFieldsFrom" type="int"/>
<param name="keyFieldList" type="java.util.List"/>
<param name="valueFieldList" type="java.util.List"/>
</method>
<method name="getKey" return="org.apache.hadoop.io.Text"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getValue" return="org.apache.hadoop.io.Text"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="extractOutputKeyValue"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.String"/>
<param name="val" type="java.lang.String"/>
<param name="fieldSep" type="java.lang.String"/>
<param name="keyFieldList" type="java.util.List"/>
<param name="valFieldList" type="java.util.List"/>
<param name="allValueFieldsFrom" type="int"/>
<param name="ignoreKey" type="boolean"/>
<param name="isMap" type="boolean"/>
</method>
<field name="emptyText" type="org.apache.hadoop.io.Text"
transient="false" volatile="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
</field>
<field name="DATA_FIELD_SEPARATOR" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="DATA_FIELD_SEPERATOR" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="Use {@link #DATA_FIELD_SEPARATOR}">
<doc>
<![CDATA[@deprecated Use {@link #DATA_FIELD_SEPARATOR}]]>
</doc>
</field>
<field name="MAP_OUTPUT_KEY_VALUE_SPEC" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="REDUCE_OUTPUT_KEY_VALUE_SPEC" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[This class implements a mapper/reducer class that can be used to perform
field selections in a manner similar to unix cut. The input data is treated
as fields separated by a user specified separator (the default value is
"\t"). The user can specify a list of fields that form the map output keys,
and a list of fields that form the map output values. If the inputformat is
TextInputFormat, the mapper will ignore the key to the map function. and the
fields are from the value only. Otherwise, the fields are the union of those
from the key and those from the value.
The field separator is under attribute "mapreduce.fieldsel.data.field.separator"
The map output field list spec is under attribute
"mapreduce.fieldsel.map.output.key.value.fields.spec".
The value is expected to be like "keyFieldsSpec:valueFieldsSpec"
key/valueFieldsSpec are comma (,) separated field spec: fieldSpec,fieldSpec,fieldSpec ...
Each field spec can be a simple number (e.g. 5) specifying a specific field, or a range
(like 2-5) to specify a range of fields, or an open range (like 3-) specifying all
the fields starting from field 3. The open range field spec applies value fields only.
They have no effect on the key fields.
Here is an example: "4,3,0,1:6,5,1-3,7-". It specifies to use fields 4,3,0 and 1 for keys,
and use fields 6,5,1,2,3,7 and above for values.
The reduce output field list spec is under attribute
"mapreduce.fieldsel.reduce.output.key.value.fields.spec".
The reducer extracts output key/value pairs in a similar manner, except that
the key is never ignored.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.fieldsel.FieldSelectionHelper -->
<!-- start class org.apache.hadoop.mapreduce.lib.fieldsel.FieldSelectionMapper -->
<class name="FieldSelectionMapper" extends="org.apache.hadoop.mapreduce.Mapper"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="FieldSelectionMapper"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="setup"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<method name="map"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="K"/>
<param name="val" type="V"/>
<param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[The identify function. Input key/value pair is written directly to output.]]>
</doc>
</method>
<field name="LOG" type="org.slf4j.Logger"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[This class implements a mapper class that can be used to perform
field selections in a manner similar to unix cut. The input data is treated
as fields separated by a user specified separator (the default value is
"\t"). The user can specify a list of fields that form the map output keys,
and a list of fields that form the map output values. If the inputformat is
TextInputFormat, the mapper will ignore the key to the map function. and the
fields are from the value only. Otherwise, the fields are the union of those
from the key and those from the value.
The field separator is under attribute "mapreduce.fieldsel.data.field.separator"
The map output field list spec is under attribute
"mapreduce.fieldsel.map.output.key.value.fields.spec".
The value is expected to be like
"keyFieldsSpec:valueFieldsSpec" key/valueFieldsSpec are comma (,) separated
field spec: fieldSpec,fieldSpec,fieldSpec ... Each field spec can be a
simple number (e.g. 5) specifying a specific field, or a range (like 2-5)
to specify a range of fields, or an open range (like 3-) specifying all
the fields starting from field 3. The open range field spec applies value
fields only. They have no effect on the key fields.
Here is an example: "4,3,0,1:6,5,1-3,7-". It specifies to use fields
4,3,0 and 1 for keys, and use fields 6,5,1,2,3,7 and above for values.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.fieldsel.FieldSelectionMapper -->
<!-- start class org.apache.hadoop.mapreduce.lib.fieldsel.FieldSelectionReducer -->
<class name="FieldSelectionReducer" extends="org.apache.hadoop.mapreduce.Reducer"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="FieldSelectionReducer"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="setup"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<method name="reduce"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="org.apache.hadoop.io.Text"/>
<param name="values" type="java.lang.Iterable"/>
<param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<field name="LOG" type="org.slf4j.Logger"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[This class implements a reducer class that can be used to perform field
selections in a manner similar to unix cut.
The input data is treated as fields separated by a user specified
separator (the default value is "\t"). The user can specify a list of
fields that form the reduce output keys, and a list of fields that form
the reduce output values. The fields are the union of those from the key
and those from the value.
The field separator is under attribute "mapreduce.fieldsel.data.field.separator"
The reduce output field list spec is under attribute
"mapreduce.fieldsel.reduce.output.key.value.fields.spec".
The value is expected to be like
"keyFieldsSpec:valueFieldsSpec" key/valueFieldsSpec are comma (,)
separated field spec: fieldSpec,fieldSpec,fieldSpec ... Each field spec
can be a simple number (e.g. 5) specifying a specific field, or a range
(like 2-5) to specify a range of fields, or an open range (like 3-)
specifying all the fields starting from field 3. The open range field
spec applies value fields only. They have no effect on the key fields.
Here is an example: "4,3,0,1:6,5,1-3,7-". It specifies to use fields
4,3,0 and 1 for keys, and use fields 6,5,1,2,3,7 and above for values.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.fieldsel.FieldSelectionReducer -->
</package>
<package name="org.apache.hadoop.mapreduce.lib.input">
<!-- start class org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat -->
<class name="CombineFileInputFormat" extends="org.apache.hadoop.mapreduce.lib.input.FileInputFormat"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="CombineFileInputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[default constructor]]>
</doc>
</constructor>
<method name="setMaxSplitSize"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="maxSplitSize" type="long"/>
<doc>
<![CDATA[Specify the maximum size (in bytes) of each split. Each split is
approximately equal to the specified size.]]>
</doc>
</method>
<method name="setMinSplitSizeNode"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="minSplitSizeNode" type="long"/>
<doc>
<![CDATA[Specify the minimum size (in bytes) of each split per node.
This applies to data that is left over after combining data on a single
node into splits that are of maximum size specified by maxSplitSize.
This leftover data will be combined into its own split if its size
exceeds minSplitSizeNode.]]>
</doc>
</method>
<method name="setMinSplitSizeRack"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="minSplitSizeRack" type="long"/>
<doc>
<![CDATA[Specify the minimum size (in bytes) of each split per rack.
This applies to data that is left over after combining data on a single
rack into splits that are of maximum size specified by maxSplitSize.
This leftover data will be combined into its own split if its size
exceeds minSplitSizeRack.]]>
</doc>
</method>
<method name="createPool"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="filters" type="java.util.List"/>
<doc>
<![CDATA[Create a new pool and add the filters to it.
A split cannot have files from different pools.]]>
</doc>
</method>
<method name="createPool"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="filters" type="org.apache.hadoop.fs.PathFilter[]"/>
<doc>
<![CDATA[Create a new pool and add the filters to it.
A pathname can satisfy any one of the specified filters.
A split cannot have files from different pools.]]>
</doc>
</method>
<method name="isSplitable" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
<param name="file" type="org.apache.hadoop.fs.Path"/>
</method>
<method name="getSplits" return="java.util.List"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[This is not implemented yet.]]>
</doc>
</method>
<method name="getFileBlockLocations" return="org.apache.hadoop.fs.BlockLocation[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="fs" type="org.apache.hadoop.fs.FileSystem"/>
<param name="stat" type="org.apache.hadoop.fs.FileStatus"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<field name="SPLIT_MINSIZE_PERNODE" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="SPLIT_MINSIZE_PERRACK" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[An abstract {@link InputFormat} that returns {@link CombineFileSplit}'s in
{@link InputFormat#getSplits(JobContext)} method.
Splits are constructed from the files under the input paths.
A split cannot have files from different pools.
Each split returned may contain blocks from different files.
If a maxSplitSize is specified, then blocks on the same node are
combined to form a single split. Blocks that are left over are
then combined with other blocks in the same rack.
If maxSplitSize is not specified, then blocks from the same rack
are combined in a single split; no attempt is made to create
node-local splits.
If the maxSplitSize is equal to the block size, then this class
is similar to the default splitting behavior in Hadoop: each
block is a locally processed split.
Subclasses implement
{@link InputFormat#createRecordReader(InputSplit, TaskAttemptContext)}
to construct <code>RecordReader</code>'s for
<code>CombineFileSplit</code>'s.
@see CombineFileSplit]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat -->
<!-- start class org.apache.hadoop.mapreduce.lib.input.CombineFileRecordReader -->
<class name="CombineFileRecordReader" extends="org.apache.hadoop.mapreduce.RecordReader"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="CombineFileRecordReader" type="org.apache.hadoop.mapreduce.lib.input.CombineFileSplit, org.apache.hadoop.mapreduce.TaskAttemptContext, java.lang.Class"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[A generic RecordReader that can hand out different recordReaders
for each chunk in the CombineFileSplit.]]>
</doc>
</constructor>
<method name="initialize"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<method name="nextKeyValue" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<method name="getCurrentKey" return="K"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<method name="getCurrentValue" return="V"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<method name="close"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getProgress" return="float"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[return progress based on the amount of data processed so far.]]>
</doc>
</method>
<method name="initNextRecordReader" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get the record reader for the next chunk in this CombineFileSplit.]]>
</doc>
</method>
<field name="split" type="org.apache.hadoop.mapreduce.lib.input.CombineFileSplit"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<field name="rrConstructor" type="java.lang.reflect.Constructor"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<field name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<field name="idx" type="int"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<field name="progress" type="long"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<field name="curReader" type="org.apache.hadoop.mapreduce.RecordReader"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[A generic RecordReader that can hand out different recordReaders
for each chunk in a {@link CombineFileSplit}.
A CombineFileSplit can combine data chunks from multiple files.
This class allows using different RecordReaders for processing
these data chunks from different files.
@see CombineFileSplit]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.input.CombineFileRecordReader -->
<!-- start class org.apache.hadoop.mapreduce.lib.input.CombineFileRecordReaderWrapper -->
<class name="CombineFileRecordReaderWrapper" extends="org.apache.hadoop.mapreduce.RecordReader"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="CombineFileRecordReaderWrapper" type="org.apache.hadoop.mapreduce.lib.input.FileInputFormat, org.apache.hadoop.mapreduce.lib.input.CombineFileSplit, org.apache.hadoop.mapreduce.TaskAttemptContext, java.lang.Integer"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</constructor>
<method name="initialize"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<method name="nextKeyValue" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<method name="getCurrentKey" return="K"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<method name="getCurrentValue" return="V"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<method name="getProgress" return="float"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<method name="close"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[A wrapper class for a record reader that handles a single file split. It
delegates most of the methods to the wrapped instance. A concrete subclass
needs to provide a constructor that calls this parent constructor with the
appropriate input format. The subclass constructor must satisfy the specific
constructor signature that is required by
<code>CombineFileRecordReader</code>.
Subclassing is needed to get a concrete record reader wrapper because of the
constructor requirement.
@see CombineFileRecordReader
@see CombineFileInputFormat]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.input.CombineFileRecordReaderWrapper -->
<!-- start class org.apache.hadoop.mapreduce.lib.input.CombineFileSplit -->
<class name="CombineFileSplit" extends="org.apache.hadoop.mapreduce.InputSplit"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.io.Writable"/>
<constructor name="CombineFileSplit"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[default constructor]]>
</doc>
</constructor>
<constructor name="CombineFileSplit" type="org.apache.hadoop.fs.Path[], long[], long[], java.lang.String[]"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<constructor name="CombineFileSplit" type="org.apache.hadoop.fs.Path[], long[]"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<constructor name="CombineFileSplit" type="org.apache.hadoop.mapreduce.lib.input.CombineFileSplit"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Copy constructor]]>
</doc>
</constructor>
<method name="getLength" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getStartOffsets" return="long[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns an array containing the start offsets of the files in the split]]>
</doc>
</method>
<method name="getLengths" return="long[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns an array containing the lengths of the files in the split]]>
</doc>
</method>
<method name="getOffset" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="i" type="int"/>
<doc>
<![CDATA[Returns the start offset of the i<sup>th</sup> Path]]>
</doc>
</method>
<method name="getLength" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="i" type="int"/>
<doc>
<![CDATA[Returns the length of the i<sup>th</sup> Path]]>
</doc>
</method>
<method name="getNumPaths" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns the number of Paths in the split]]>
</doc>
</method>
<method name="getPath" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="i" type="int"/>
<doc>
<![CDATA[Returns the i<sup>th</sup> Path]]>
</doc>
</method>
<method name="getPaths" return="org.apache.hadoop.fs.Path[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns all the Paths in the split]]>
</doc>
</method>
<method name="getLocations" return="java.lang.String[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Returns all the Paths where this input-split resides]]>
</doc>
</method>
<method name="readFields"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type="java.io.DataInput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="write"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="out" type="java.io.DataOutput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="toString" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<doc>
<![CDATA[A sub-collection of input files.
Unlike {@link FileSplit}, CombineFileSplit class does not represent
a split of a file, but a split of input files into smaller sets.
A split may contain blocks from different file but all
the blocks in the same split are probably local to some rack <br>
CombineFileSplit can be used to implement {@link RecordReader}'s,
with reading one record per file.
@see FileSplit
@see CombineFileInputFormat]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.input.CombineFileSplit -->
<!-- start class org.apache.hadoop.mapreduce.lib.input.CombineSequenceFileInputFormat -->
<class name="CombineSequenceFileInputFormat" extends="org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="CombineSequenceFileInputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[Input format that is a <code>CombineFileInputFormat</code>-equivalent for
<code>SequenceFileInputFormat</code>.
@see CombineFileInputFormat]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.input.CombineSequenceFileInputFormat -->
<!-- start class org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat -->
<class name="CombineTextInputFormat" extends="org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="CombineTextInputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[Input format that is a <code>CombineFileInputFormat</code>-equivalent for
<code>TextInputFormat</code>.
@see CombineFileInputFormat]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat -->
<!-- start class org.apache.hadoop.mapreduce.lib.input.FileInputFormat -->
<class name="FileInputFormat" extends="org.apache.hadoop.mapreduce.InputFormat"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="FileInputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="setInputDirRecursive"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
<param name="inputDirRecursive" type="boolean"/>
<doc>
<![CDATA[@param job
the job to modify
@param inputDirRecursive]]>
</doc>
</method>
<method name="getInputDirRecursive" return="boolean"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
<doc>
<![CDATA[@param job
the job to look at.
@return should the files to be read recursively?]]>
</doc>
</method>
<method name="getFormatMinSplitSize" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<doc>
<![CDATA[Get the lower bound on split size imposed by the format.
@return the number of bytes of the minimal split for this format]]>
</doc>
</method>
<method name="isSplitable" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
<param name="filename" type="org.apache.hadoop.fs.Path"/>
<doc>
<![CDATA[Is the given filename splittable? Usually, true, but if the file is
stream compressed, it will not be.
The default implementation in <code>FileInputFormat</code> always returns
true. Implementations that may deal with non-splittable files <i>must</i>
override this method.
<code>FileInputFormat</code> implementations can override this and return
<code>false</code> to ensure that individual input files are never split-up
so that {@link Mapper}s process entire files.
@param context the job context
@param filename the file name to check
@return is this file splitable?]]>
</doc>
</method>
<method name="setInputPathFilter"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
<param name="filter" type="java.lang.Class"/>
<doc>
<![CDATA[Set a PathFilter to be applied to the input paths for the map-reduce job.
@param job the job to modify
@param filter the PathFilter class use for filtering the input paths.]]>
</doc>
</method>
<method name="setMinInputSplitSize"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
<param name="size" type="long"/>
<doc>
<![CDATA[Set the minimum input split size
@param job the job to modify
@param size the minimum size]]>
</doc>
</method>
<method name="getMinSplitSize" return="long"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
<doc>
<![CDATA[Get the minimum split size
@param job the job
@return the minimum number of bytes that can be in a split]]>
</doc>
</method>
<method name="setMaxInputSplitSize"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
<param name="size" type="long"/>
<doc>
<![CDATA[Set the maximum split size
@param job the job to modify
@param size the maximum split size]]>
</doc>
</method>
<method name="getMaxSplitSize" return="long"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
<doc>
<![CDATA[Get the maximum split size.
@param context the job to look at.
@return the maximum number of bytes a split can include]]>
</doc>
</method>
<method name="getInputPathFilter" return="org.apache.hadoop.fs.PathFilter"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
<doc>
<![CDATA[Get a PathFilter instance of the filter set for the input paths.
@return the PathFilter instance set for the job, NULL if none has been set.]]>
</doc>
</method>
<method name="listStatus" return="java.util.List"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[List input directories.
Subclasses may override to, e.g., select only files matching a regular
expression.
@param job the job to list input paths for
@return array of FileStatus objects
@throws IOException if zero items.]]>
</doc>
</method>
<method name="addInputPathRecursively"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="result" type="java.util.List"/>
<param name="fs" type="org.apache.hadoop.fs.FileSystem"/>
<param name="path" type="org.apache.hadoop.fs.Path"/>
<param name="inputFilter" type="org.apache.hadoop.fs.PathFilter"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Add files in the input path recursively into the results.
@param result
The List to store all files.
@param fs
The FileSystem.
@param path
The input path.
@param inputFilter
The input filter that can be used to filter files/dirs.
@throws IOException]]>
</doc>
</method>
<method name="makeSplit" return="org.apache.hadoop.mapreduce.lib.input.FileSplit"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="file" type="org.apache.hadoop.fs.Path"/>
<param name="start" type="long"/>
<param name="length" type="long"/>
<param name="hosts" type="java.lang.String[]"/>
<doc>
<![CDATA[A factory that makes the split for this class. It can be overridden
by sub-classes to make sub-types]]>
</doc>
</method>
<method name="makeSplit" return="org.apache.hadoop.mapreduce.lib.input.FileSplit"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="file" type="org.apache.hadoop.fs.Path"/>
<param name="start" type="long"/>
<param name="length" type="long"/>
<param name="hosts" type="java.lang.String[]"/>
<param name="inMemoryHosts" type="java.lang.String[]"/>
<doc>
<![CDATA[A factory that makes the split for this class. It can be overridden
by sub-classes to make sub-types]]>
</doc>
</method>
<method name="getSplits" return="java.util.List"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Generate the list of files and make them into FileSplits.
@param job the job context
@throws IOException]]>
</doc>
</method>
<method name="computeSplitSize" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="blockSize" type="long"/>
<param name="minSize" type="long"/>
<param name="maxSize" type="long"/>
</method>
<method name="getBlockIndex" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="blkLocations" type="org.apache.hadoop.fs.BlockLocation[]"/>
<param name="offset" type="long"/>
</method>
<method name="setInputPaths"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
<param name="commaSeparatedPaths" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Sets the given comma separated paths as the list of inputs
for the map-reduce job.
@param job the job
@param commaSeparatedPaths Comma separated paths to be set as
the list of inputs for the map-reduce job.]]>
</doc>
</method>
<method name="addInputPaths"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
<param name="commaSeparatedPaths" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Add the given comma separated paths to the list of inputs for
the map-reduce job.
@param job The job to modify
@param commaSeparatedPaths Comma separated paths to be added to
the list of inputs for the map-reduce job.]]>
</doc>
</method>
<method name="setInputPaths"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
<param name="inputPaths" type="org.apache.hadoop.fs.Path[]"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Set the array of {@link Path}s as the list of inputs
for the map-reduce job.
@param job The job to modify
@param inputPaths the {@link Path}s of the input directories/files
for the map-reduce job.]]>
</doc>
</method>
<method name="addInputPath"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
<param name="path" type="org.apache.hadoop.fs.Path"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Add a {@link Path} to the list of inputs for the map-reduce job.
@param job The {@link Job} to modify
@param path {@link Path} to be added to the list of inputs for
the map-reduce job.]]>
</doc>
</method>
<method name="getInputPaths" return="org.apache.hadoop.fs.Path[]"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
<doc>
<![CDATA[Get the list of input {@link Path}s for the map-reduce job.
@param context The job
@return the list of input {@link Path}s for the map-reduce job.]]>
</doc>
</method>
<field name="INPUT_DIR" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="SPLIT_MAXSIZE" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="SPLIT_MINSIZE" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="PATHFILTER_CLASS" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="NUM_INPUT_FILES" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="INPUT_DIR_RECURSIVE" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="LIST_STATUS_NUM_THREADS" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="DEFAULT_LIST_STATUS_NUM_THREADS" type="int"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[A base class for file-based {@link InputFormat}s.
<p><code>FileInputFormat</code> is the base class for all file-based
<code>InputFormat</code>s. This provides a generic implementation of
{@link #getSplits(JobContext)}.
Implementations of <code>FileInputFormat</code> can also override the
{@link #isSplitable(JobContext, Path)} method to prevent input files
from being split-up in certain situations. Implementations that may
deal with non-splittable files <i>must</i> override this method, since
the default implementation assumes splitting is always possible.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.input.FileInputFormat -->
<!-- start class org.apache.hadoop.mapreduce.lib.input.FileInputFormatCounter -->
<class name="FileInputFormatCounter" extends="java.lang.Enum"
abstract="false"
static="false" final="true" visibility="public"
deprecated="not deprecated">
<method name="values" return="org.apache.hadoop.mapreduce.lib.input.FileInputFormatCounter[]"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="valueOf" return="org.apache.hadoop.mapreduce.lib.input.FileInputFormatCounter"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="name" type="java.lang.String"/>
</method>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.input.FileInputFormatCounter -->
<!-- start class org.apache.hadoop.mapreduce.lib.input.FileSplit -->
<class name="FileSplit" extends="org.apache.hadoop.mapreduce.InputSplit"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.io.Writable"/>
<constructor name="FileSplit"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<constructor name="FileSplit" type="org.apache.hadoop.fs.Path, long, long, java.lang.String[]"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Constructs a split with host information
@param file the file name
@param start the position of the first byte in the file to process
@param length the number of bytes in the file to process
@param hosts the list of hosts containing the block, possibly null]]>
</doc>
</constructor>
<constructor name="FileSplit" type="org.apache.hadoop.fs.Path, long, long, java.lang.String[], java.lang.String[]"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Constructs a split with host and cached-blocks information
@param file the file name
@param start the position of the first byte in the file to process
@param length the number of bytes in the file to process
@param hosts the list of hosts containing the block
@param inMemoryHosts the list of hosts containing the block in memory]]>
</doc>
</constructor>
<method name="getPath" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[The file containing this split's data.]]>
</doc>
</method>
<method name="getStart" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[The position of the first byte in the file to process.]]>
</doc>
</method>
<method name="getLength" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[The number of bytes in the file to process.]]>
</doc>
</method>
<method name="toString" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="write"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="out" type="java.io.DataOutput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="readFields"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type="java.io.DataInput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getLocations" return="java.lang.String[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getLocationInfo" return="org.apache.hadoop.mapred.SplitLocationInfo[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[A section of an input file. Returned by {@link
InputFormat#getSplits(JobContext)} and passed to
{@link InputFormat#createRecordReader(InputSplit,TaskAttemptContext)}.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.input.FileSplit -->
<!-- start class org.apache.hadoop.mapreduce.lib.input.FixedLengthInputFormat -->
<class name="FixedLengthInputFormat" extends="org.apache.hadoop.mapreduce.lib.input.FileInputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="FixedLengthInputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="setRecordLength"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<param name="recordLength" type="int"/>
<doc>
<![CDATA[Set the length of each record
@param conf configuration
@param recordLength the length of a record]]>
</doc>
</method>
<method name="getRecordLength" return="int"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<doc>
<![CDATA[Get record length value
@param conf configuration
@return the record length, zero means none was set]]>
</doc>
</method>
<method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<method name="isSplitable" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
<param name="file" type="org.apache.hadoop.fs.Path"/>
</method>
<field name="FIXED_RECORD_LENGTH" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[FixedLengthInputFormat is an input format used to read input files
which contain fixed length records. The content of a record need not be
text. It can be arbitrary binary data. Users must configure the record
length property by calling:
FixedLengthInputFormat.setRecordLength(conf, recordLength);<br><br> or
conf.setInt(FixedLengthInputFormat.FIXED_RECORD_LENGTH, recordLength);
<br><br>
@see FixedLengthRecordReader]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.input.FixedLengthInputFormat -->
<!-- start class org.apache.hadoop.mapreduce.lib.input.InvalidInputException -->
<class name="InvalidInputException" extends="java.io.IOException"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="InvalidInputException" type="java.util.List"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Create the exception with the given list.
@param probs the list of problems to report. this list is not copied.]]>
</doc>
</constructor>
<method name="getProblems" return="java.util.List"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the complete list of the problems reported.
@return the list of problems, which must not be modified]]>
</doc>
</method>
<method name="getMessage" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get a summary message of the problems found.
@return the concatenated messages from all of the problems.]]>
</doc>
</method>
<doc>
<![CDATA[This class wraps a list of problems with the input, so that the user
can get a list of problems together instead of finding and fixing them one
by one.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.input.InvalidInputException -->
<!-- start class org.apache.hadoop.mapreduce.lib.input.KeyValueLineRecordReader -->
<class name="KeyValueLineRecordReader" extends="org.apache.hadoop.mapreduce.RecordReader"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="KeyValueLineRecordReader" type="org.apache.hadoop.conf.Configuration"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</constructor>
<method name="getKeyClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="initialize"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="genericSplit" type="org.apache.hadoop.mapreduce.InputSplit"/>
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="findSeparator" return="int"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="utf" type="byte[]"/>
<param name="start" type="int"/>
<param name="length" type="int"/>
<param name="sep" type="byte"/>
</method>
<method name="setKeyValue"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="org.apache.hadoop.io.Text"/>
<param name="value" type="org.apache.hadoop.io.Text"/>
<param name="line" type="byte[]"/>
<param name="lineLen" type="int"/>
<param name="pos" type="int"/>
</method>
<method name="nextKeyValue" return="boolean"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Read key/value pair in a line.]]>
</doc>
</method>
<method name="getCurrentKey" return="org.apache.hadoop.io.Text"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getCurrentValue" return="org.apache.hadoop.io.Text"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getProgress" return="float"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="close"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<field name="KEY_VALUE_SEPARATOR" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="KEY_VALUE_SEPERATOR" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="Use {@link #KEY_VALUE_SEPARATOR}">
<doc>
<![CDATA[@deprecated Use {@link #KEY_VALUE_SEPARATOR}]]>
</doc>
</field>
<doc>
<![CDATA[This class treats a line in the input as a key/value pair separated by a
separator character. The separator can be specified in config file
under the attribute name mapreduce.input.keyvaluelinerecordreader.key.value.separator. The default
separator is the tab character ('\t').]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.input.KeyValueLineRecordReader -->
<!-- start class org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat -->
<class name="KeyValueTextInputFormat" extends="org.apache.hadoop.mapreduce.lib.input.FileInputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="KeyValueTextInputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="isSplitable" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
<param name="file" type="org.apache.hadoop.fs.Path"/>
</method>
<method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="genericSplit" type="org.apache.hadoop.mapreduce.InputSplit"/>
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[An {@link InputFormat} for plain text files. Files are broken into lines.
Either line feed or carriage-return are used to signal end of line.
Each line is divided into key and value parts by a separator byte. If no
such a byte exists, the key will be the entire line and value will be empty.
The separator byte can be specified in config file under the attribute name
mapreduce.input.keyvaluelinerecordreader.key.value.separator. The default
is the tab character ('\t').]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat -->
<!-- start class org.apache.hadoop.mapreduce.lib.input.MultipleInputs -->
<class name="MultipleInputs" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="MultipleInputs"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="addInputPath"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
<param name="path" type="org.apache.hadoop.fs.Path"/>
<param name="inputFormatClass" type="java.lang.Class"/>
<doc>
<![CDATA[Add a {@link Path} with a custom {@link InputFormat} to the list of
inputs for the map-reduce job.
@param job The {@link Job}
@param path {@link Path} to be added to the list of inputs for the job
@param inputFormatClass {@link InputFormat} class to use for this path]]>
</doc>
</method>
<method name="addInputPath"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
<param name="path" type="org.apache.hadoop.fs.Path"/>
<param name="inputFormatClass" type="java.lang.Class"/>
<param name="mapperClass" type="java.lang.Class"/>
<doc>
<![CDATA[Add a {@link Path} with a custom {@link InputFormat} and
{@link Mapper} to the list of inputs for the map-reduce job.
@param job The {@link Job}
@param path {@link Path} to be added to the list of inputs for the job
@param inputFormatClass {@link InputFormat} class to use for this path
@param mapperClass {@link Mapper} class to use for this path]]>
</doc>
</method>
<field name="DIR_FORMATS" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="DIR_MAPPERS" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[This class supports MapReduce jobs that have multiple input paths with
a different {@link InputFormat} and {@link Mapper} for each path]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.input.MultipleInputs -->
<!-- start class org.apache.hadoop.mapreduce.lib.input.NLineInputFormat -->
<class name="NLineInputFormat" extends="org.apache.hadoop.mapreduce.lib.input.FileInputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="NLineInputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="genericSplit" type="org.apache.hadoop.mapreduce.InputSplit"/>
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getSplits" return="java.util.List"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Logically splits the set of input files for the job, splits N lines
of the input as one split.
@see FileInputFormat#getSplits(JobContext)]]>
</doc>
</method>
<method name="getSplitsForFile" return="java.util.List"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="status" type="org.apache.hadoop.fs.FileStatus"/>
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<param name="numLinesPerSplit" type="int"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="createFileSplit" return="org.apache.hadoop.mapreduce.lib.input.FileSplit"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="protected"
deprecated="not deprecated">
<param name="fileName" type="org.apache.hadoop.fs.Path"/>
<param name="begin" type="long"/>
<param name="length" type="long"/>
<doc>
<![CDATA[NLineInputFormat uses LineRecordReader, which always reads
(and consumes) at least one character out of its upper split
boundary. So to make sure that each mapper gets N lines, we
move back the upper split limits of each split
by one character here.
@param fileName Path of file
@param begin the position of the first byte in the file to process
@param length number of bytes in InputSplit
@return FileSplit]]>
</doc>
</method>
<method name="setNumLinesPerSplit"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
<param name="numLines" type="int"/>
<doc>
<![CDATA[Set the number of lines per split
@param job the job to modify
@param numLines the number of lines per split]]>
</doc>
</method>
<method name="getNumLinesPerSplit" return="int"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
<doc>
<![CDATA[Get the number of lines per split
@param job the job
@return the number of lines per split]]>
</doc>
</method>
<field name="LINES_PER_MAP" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[NLineInputFormat which splits N lines of input as one split.
In many "pleasantly" parallel applications, each process/mapper
processes the same input file (s), but with computations are
controlled by different parameters.(Referred to as "parameter sweeps").
One way to achieve this, is to specify a set of parameters
(one set per line) as input in a control file
(which is the input path to the map-reduce application,
where as the input dataset is specified
via a config variable in JobConf.).
The NLineInputFormat can be used in such applications, that splits
the input file such that by default, one line is fed as
a value to one map task, and key is the offset.
i.e. (k,v) is (LongWritable, Text).
The location hints will span the whole mapred cluster.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.input.NLineInputFormat -->
<!-- start class org.apache.hadoop.mapreduce.lib.input.SequenceFileAsBinaryInputFormat -->
<class name="SequenceFileAsBinaryInputFormat" extends="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="SequenceFileAsBinaryInputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[InputFormat reading keys, values from SequenceFiles in binary (raw)
format.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.input.SequenceFileAsBinaryInputFormat -->
<!-- start class org.apache.hadoop.mapreduce.lib.input.SequenceFileAsTextInputFormat -->
<class name="SequenceFileAsTextInputFormat" extends="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="SequenceFileAsTextInputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[This class is similar to SequenceFileInputFormat, except it generates
SequenceFileAsTextRecordReader which converts the input keys and values
to their String forms by calling toString() method.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.input.SequenceFileAsTextInputFormat -->
<!-- start class org.apache.hadoop.mapreduce.lib.input.SequenceFileAsTextRecordReader -->
<class name="SequenceFileAsTextRecordReader" extends="org.apache.hadoop.mapreduce.RecordReader"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="SequenceFileAsTextRecordReader"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</constructor>
<method name="initialize"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<method name="getCurrentKey" return="org.apache.hadoop.io.Text"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<method name="getCurrentValue" return="org.apache.hadoop.io.Text"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<method name="nextKeyValue" return="boolean"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Read key/value pair in a line.]]>
</doc>
</method>
<method name="getProgress" return="float"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<method name="close"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[This class converts the input keys and values to their String forms by
calling toString() method. This class to SequenceFileAsTextInputFormat
class is as LineRecordReader class to TextInputFormat class.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.input.SequenceFileAsTextRecordReader -->
<!-- start class org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFilter -->
<class name="SequenceFileInputFilter" extends="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="SequenceFileInputFilter"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Create a record reader for the given split
@param split file split
@param context the task-attempt context
@return RecordReader]]>
</doc>
</method>
<method name="setFilterClass"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
<param name="filterClass" type="java.lang.Class"/>
<doc>
<![CDATA[set the filter class
@param job The job
@param filterClass filter class]]>
</doc>
</method>
<field name="LOG" type="org.slf4j.Logger"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="FILTER_CLASS" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="FILTER_FREQUENCY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="FILTER_REGEX" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[A class that allows a map/red job to work on a sample of sequence files.
The sample is decided by the filter class set by the job.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFilter -->
<!-- start class org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat -->
<class name="SequenceFileInputFormat" extends="org.apache.hadoop.mapreduce.lib.input.FileInputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="SequenceFileInputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getFormatMinSplitSize" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</method>
<method name="listStatus" return="java.util.List"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[An {@link InputFormat} for {@link SequenceFile}s.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat -->
<!-- start class org.apache.hadoop.mapreduce.lib.input.SequenceFileRecordReader -->
<class name="SequenceFileRecordReader" extends="org.apache.hadoop.mapreduce.RecordReader"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="SequenceFileRecordReader"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="initialize"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<method name="nextKeyValue" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<method name="getCurrentKey" return="K"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getCurrentValue" return="V"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getProgress" return="float"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Return the progress within the input split
@return 0.0 to 1.0 of the input byte range]]>
</doc>
</method>
<method name="close"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<field name="conf" type="org.apache.hadoop.conf.Configuration"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[An {@link RecordReader} for {@link SequenceFile}s.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.input.SequenceFileRecordReader -->
<!-- start class org.apache.hadoop.mapreduce.lib.input.TextInputFormat -->
<class name="TextInputFormat" extends="org.apache.hadoop.mapreduce.lib.input.FileInputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="TextInputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
</method>
<method name="isSplitable" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
<param name="file" type="org.apache.hadoop.fs.Path"/>
</method>
<doc>
<![CDATA[An {@link InputFormat} for plain text files. Files are broken into lines.
Either linefeed or carriage-return are used to signal end of line. Keys are
the position in the file, and values are the line of text..]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.input.TextInputFormat -->
</package>
<package name="org.apache.hadoop.mapreduce.lib.jobcontrol">
<!-- start class org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob -->
<class name="ControlledJob" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="ControlledJob" type="org.apache.hadoop.mapreduce.Job, java.util.List"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Construct a job.
@param job a mapreduce job to be executed.
@param dependingJobs an array of jobs the current job depends on]]>
</doc>
</constructor>
<constructor name="ControlledJob" type="org.apache.hadoop.conf.Configuration"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Construct a job.
@param conf mapred job configuration representing a job to be executed.
@throws IOException]]>
</doc>
</constructor>
<method name="toString" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getJobName" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the job name of this job]]>
</doc>
</method>
<method name="setJobName"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobName" type="java.lang.String"/>
<doc>
<![CDATA[Set the job name for this job.
@param jobName the job name]]>
</doc>
</method>
<method name="getJobID" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the job ID of this job assigned by JobControl]]>
</doc>
</method>
<method name="setJobID"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="id" type="java.lang.String"/>
<doc>
<![CDATA[Set the job ID for this job.
@param id the job ID]]>
</doc>
</method>
<method name="getMapredJobId" return="org.apache.hadoop.mapreduce.JobID"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the mapred ID of this job as assigned by the mapred framework.]]>
</doc>
</method>
<method name="getJob" return="org.apache.hadoop.mapreduce.Job"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the mapreduce job]]>
</doc>
</method>
<method name="setJob"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
<doc>
<![CDATA[Set the mapreduce job
@param job the mapreduce job for this job.]]>
</doc>
</method>
<method name="getJobState" return="org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob.State"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the state of this job]]>
</doc>
</method>
<method name="setJobState"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="state" type="org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob.State"/>
<doc>
<![CDATA[Set the state for this job.
@param state the new state for this job.]]>
</doc>
</method>
<method name="getMessage" return="java.lang.String"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the message of this job]]>
</doc>
</method>
<method name="setMessage"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="message" type="java.lang.String"/>
<doc>
<![CDATA[Set the message for this job.
@param message the message for this job.]]>
</doc>
</method>
<method name="getDependentJobs" return="java.util.List"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the depending jobs of this job]]>
</doc>
</method>
<method name="addDependingJob" return="boolean"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="dependingJob" type="org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob"/>
<doc>
<![CDATA[Add a job to this jobs' dependency list.
Dependent jobs can only be added while a Job
is waiting to run, not during or afterwards.
@param dependingJob Job that this Job depends on.
@return <tt>true</tt> if the Job was added.]]>
</doc>
</method>
<method name="isCompleted" return="boolean"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return true if this job is in a complete state]]>
</doc>
</method>
<method name="isReady" return="boolean"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return true if this job is in READY state]]>
</doc>
</method>
<method name="killJob"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<method name="failJob"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="message" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<method name="submit"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<doc>
<![CDATA[Submit this job to mapred. The state becomes RUNNING if submission
is successful, FAILED otherwise.]]>
</doc>
</method>
<field name="CREATE_DIR" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[This class encapsulates a MapReduce job and its dependency. It monitors
the states of the depending jobs and updates the state of this job.
A job starts in the WAITING state. If it does not have any depending jobs,
or all of the depending jobs are in SUCCESS state, then the job state
will become READY. If any depending jobs fail, the job will fail too.
When in READY state, the job can be submitted to Hadoop for execution, with
the state changing into RUNNING state. From RUNNING state, the job
can get into SUCCESS or FAILED state, depending
the status of the job execution.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob -->
<!-- start class org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl -->
<class name="JobControl" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="java.lang.Runnable"/>
<constructor name="JobControl" type="java.lang.String"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Construct a job control for a group of jobs.
@param groupName a name identifying this group]]>
</doc>
</constructor>
<method name="getWaitingJobList" return="java.util.List"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the jobs in the waiting state]]>
</doc>
</method>
<method name="getRunningJobList" return="java.util.List"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the jobs in the running state]]>
</doc>
</method>
<method name="getReadyJobsList" return="java.util.List"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the jobs in the ready state]]>
</doc>
</method>
<method name="getSuccessfulJobList" return="java.util.List"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the jobs in the success state]]>
</doc>
</method>
<method name="getFailedJobList" return="java.util.List"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="addJob" return="java.lang.String"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="aJob" type="org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob"/>
<doc>
<![CDATA[Add a new controlled job.
@param aJob the new controlled job]]>
</doc>
</method>
<method name="addJob" return="java.lang.String"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="aJob" type="org.apache.hadoop.mapred.jobcontrol.Job"/>
<doc>
<![CDATA[Add a new job.
@param aJob the new job]]>
</doc>
</method>
<method name="addJobCollection"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobs" type="java.util.Collection"/>
<doc>
<![CDATA[Add a collection of jobs
@param jobs]]>
</doc>
</method>
<method name="getThreadState" return="org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl.ThreadState"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the thread state]]>
</doc>
</method>
<method name="stop"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[set the thread state to STOPPING so that the
thread will stop when it wakes up.]]>
</doc>
</method>
<method name="suspend"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[suspend the running thread]]>
</doc>
</method>
<method name="resume"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[resume the suspended thread]]>
</doc>
</method>
<method name="allFinished" return="boolean"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="run"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[The main loop for the thread.
The loop does the following:
Check the states of the running jobs
Update the states of waiting jobs
Submit the jobs in ready state]]>
</doc>
</method>
<doc>
<![CDATA[This class encapsulates a set of MapReduce jobs and its dependency.
It tracks the states of the jobs by placing them into different tables
according to their states.
This class provides APIs for the client app to add a job to the group
and to get the jobs in the group in different states. When a job is
added, an ID unique to the group is assigned to the job.
This class has a thread that submits jobs when they become ready,
monitors the states of the running jobs, and updates the states of jobs
based on the state changes of their depending jobs states. The class
provides APIs for suspending/resuming the thread, and
for stopping the thread.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl -->
</package>
<package name="org.apache.hadoop.mapreduce.lib.join">
<!-- start class org.apache.hadoop.mapreduce.lib.join.ArrayListBackedIterator -->
<class name="ArrayListBackedIterator" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapreduce.lib.join.ResetableIterator"/>
<constructor name="ArrayListBackedIterator"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<constructor name="ArrayListBackedIterator" type="java.util.ArrayList"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="hasNext" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="next" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="val" type="X"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="replay" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="val" type="X"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="reset"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="add"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="item" type="X"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="close"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="clear"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<doc>
<![CDATA[This class provides an implementation of ResetableIterator. The
implementation uses an {@link java.util.ArrayList} to store elements
added to it, replaying them as requested.
Prefer {@link StreamBackedIterator}.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.join.ArrayListBackedIterator -->
<!-- start class org.apache.hadoop.mapreduce.lib.join.ComposableInputFormat -->
<class name="ComposableInputFormat" extends="org.apache.hadoop.mapreduce.InputFormat"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="ComposableInputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="createRecordReader" return="org.apache.hadoop.mapreduce.lib.join.ComposableRecordReader"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<doc>
<![CDATA[Refinement of InputFormat requiring implementors to provide
ComposableRecordReader instead of RecordReader.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.join.ComposableInputFormat -->
<!-- start class org.apache.hadoop.mapreduce.lib.join.ComposableRecordReader -->
<class name="ComposableRecordReader" extends="org.apache.hadoop.mapreduce.RecordReader"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="java.lang.Comparable"/>
<constructor name="ComposableRecordReader"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<doc>
<![CDATA[Additional operations required of a RecordReader to participate in a join.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.join.ComposableRecordReader -->
<!-- start class org.apache.hadoop.mapreduce.lib.join.CompositeInputFormat -->
<class name="CompositeInputFormat" extends="org.apache.hadoop.mapreduce.InputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="CompositeInputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="setFormat"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Interpret a given string as a composite expression.
{@code
func ::= <ident>([<func>,]*<func>)
func ::= tbl(<class>,"<path>")
class ::= @see java.lang.Class#forName(java.lang.String)
path ::= @see org.apache.hadoop.fs.Path#Path(java.lang.String)
}
Reads expression from the <tt>mapreduce.join.expr</tt> property and
user-supplied join types from <tt>mapreduce.join.define.&lt;ident&gt;</tt>
types. Paths supplied to <tt>tbl</tt> are given as input paths to the
InputFormat class listed.
@see #compose(java.lang.String, java.lang.Class, java.lang.String...)]]>
</doc>
</method>
<method name="addDefaults"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<doc>
<![CDATA[Adds the default set of identifiers to the parser.]]>
</doc>
</method>
<method name="getSplits" return="java.util.List"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Build a CompositeInputSplit from the child InputFormats by assigning the
ith split from each child to the ith composite split.]]>
</doc>
</method>
<method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
<param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Construct a CompositeRecordReader for the children of this InputFormat
as defined in the init expression.
The outermost join need only be composable, not necessarily a composite.
Mandating TupleWritable isn't strictly correct.]]>
</doc>
</method>
<method name="compose" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="inf" type="java.lang.Class"/>
<param name="path" type="java.lang.String"/>
<doc>
<![CDATA[Convenience method for constructing composite formats.
Given InputFormat class (inf), path (p) return:
{@code tbl(<inf>, <p>) }]]>
</doc>
</method>
<method name="compose" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="op" type="java.lang.String"/>
<param name="inf" type="java.lang.Class"/>
<param name="path" type="java.lang.String[]"/>
<doc>
<![CDATA[Convenience method for constructing composite formats.
Given operation (op), Object class (inf), set of paths (p) return:
{@code <op>(tbl(<inf>,<p1>),tbl(<inf>,<p2>),...,tbl(<inf>,<pn>)) }]]>
</doc>
</method>
<method name="compose" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="op" type="java.lang.String"/>
<param name="inf" type="java.lang.Class"/>
<param name="path" type="org.apache.hadoop.fs.Path[]"/>
<doc>
<![CDATA[Convenience method for constructing composite formats.
Given operation (op), Object class (inf), set of paths (p) return:
{@code <op>(tbl(<inf>,<p1>),tbl(<inf>,<p2>),...,tbl(<inf>,<pn>)) }]]>
</doc>
</method>
<field name="JOIN_EXPR" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="JOIN_COMPARATOR" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[An InputFormat capable of performing joins over a set of data sources sorted
and partitioned the same way.
A user may define new join types by setting the property
<tt>mapreduce.join.define.&lt;ident&gt;</tt> to a classname.
In the expression <tt>mapreduce.join.expr</tt>, the identifier will be
assumed to be a ComposableRecordReader.
<tt>mapreduce.join.keycomparator</tt> can be a classname used to compare
keys in the join.
@see #setFormat
@see JoinRecordReader
@see MultiFilterRecordReader]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.join.CompositeInputFormat -->
<!-- start class org.apache.hadoop.mapreduce.lib.join.CompositeInputSplit -->
<class name="CompositeInputSplit" extends="org.apache.hadoop.mapreduce.InputSplit"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.io.Writable"/>
<constructor name="CompositeInputSplit"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<constructor name="CompositeInputSplit" type="int"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="add"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="s" type="org.apache.hadoop.mapreduce.InputSplit"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Add an InputSplit to this collection.
@throws IOException If capacity was not specified during construction
or if capacity has been reached.]]>
</doc>
</method>
<method name="get" return="org.apache.hadoop.mapreduce.InputSplit"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="i" type="int"/>
<doc>
<![CDATA[Get ith child InputSplit.]]>
</doc>
</method>
<method name="getLength" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Return the aggregate length of all child InputSplits currently added.]]>
</doc>
</method>
<method name="getLength" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="i" type="int"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Get the length of ith child InputSplit.]]>
</doc>
</method>
<method name="getLocations" return="java.lang.String[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Collect a set of hosts from all child InputSplits.]]>
</doc>
</method>
<method name="getLocation" return="java.lang.String[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="i" type="int"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[getLocations from ith InputSplit.]]>
</doc>
</method>
<method name="write"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="out" type="java.io.DataOutput"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Write splits in the following format.
{@code
<count><class1><class2>...<classn><split1><split2>...<splitn>
}]]>
</doc>
</method>
<method name="readFields"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type="java.io.DataInput"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[{@inheritDoc}
@throws IOException If the child InputSplit cannot be read, typically
for failing access checks.]]>
</doc>
</method>
<doc>
<![CDATA[This InputSplit contains a set of child InputSplits. Any InputSplit inserted
into this collection must have a public default constructor.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.join.CompositeInputSplit -->
<!-- start class org.apache.hadoop.mapreduce.lib.join.CompositeRecordReader -->
<class name="CompositeRecordReader" extends="org.apache.hadoop.mapreduce.lib.join.ComposableRecordReader"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.conf.Configurable"/>
<constructor name="CompositeRecordReader" type="int, int, java.lang.Class"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Create a RecordReader with <tt>capacity</tt> children to position
<tt>id</tt> in the parent reader.
The id of a root CompositeRecordReader is -1 by convention, but relying
on this is not recommended.]]>
</doc>
</constructor>
<method name="combine" return="boolean"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="srcs" type="java.lang.Object[]"/>
<param name="value" type="org.apache.hadoop.mapreduce.lib.join.TupleWritable"/>
</method>
<method name="initialize"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<method name="id" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Return the position in the collector this class occupies.]]>
</doc>
</method>
<method name="setConf"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<method name="getConf" return="org.apache.hadoop.conf.Configuration"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<method name="getRecordReaderQueue" return="java.util.PriorityQueue"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<doc>
<![CDATA[Return sorted list of RecordReaders for this composite.]]>
</doc>
</method>
<method name="getComparator" return="org.apache.hadoop.io.WritableComparator"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<doc>
<![CDATA[Return comparator defining the ordering for RecordReaders in this
composite.]]>
</doc>
</method>
<method name="add"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="rr" type="org.apache.hadoop.mapreduce.lib.join.ComposableRecordReader"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Add a RecordReader to this collection.
The id() of a RecordReader determines where in the Tuple its
entry will appear. Adding RecordReaders with the same id has
undefined behavior.]]>
</doc>
</method>
<method name="key" return="K"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Return the key for the current join or the value at the top of the
RecordReader heap.]]>
</doc>
</method>
<method name="key"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="K"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Clone the key at the top of this RR into the given object.]]>
</doc>
</method>
<method name="getCurrentKey" return="K"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="hasNext" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Return true if it is possible that this could emit more values.]]>
</doc>
</method>
<method name="skip"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="K"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Pass skip key to child RRs.]]>
</doc>
</method>
<method name="getDelegate" return="org.apache.hadoop.mapreduce.lib.join.ResetableIterator"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<doc>
<![CDATA[Obtain an iterator over the child RRs apropos of the value type
ultimately emitted from this join.]]>
</doc>
</method>
<method name="accept"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jc" type="org.apache.hadoop.mapreduce.lib.join.CompositeRecordReader.JoinCollector"/>
<param name="key" type="K"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[If key provided matches that of this Composite, give JoinCollector
iterator over values it may emit.]]>
</doc>
</method>
<method name="fillJoinCollector"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="iterkey" type="K"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[For all child RRs offering the key provided, obtain an iterator
at that position in the JoinCollector.]]>
</doc>
</method>
<method name="compareTo" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="other" type="org.apache.hadoop.mapreduce.lib.join.ComposableRecordReader"/>
<doc>
<![CDATA[Implement Comparable contract (compare key of join or head of heap
with that of another).]]>
</doc>
</method>
<method name="createKey" return="K"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<doc>
<![CDATA[Create a new key common to all child RRs.
@throws ClassCastException if key classes differ.]]>
</doc>
</method>
<method name="createTupleWritable" return="org.apache.hadoop.mapreduce.lib.join.TupleWritable"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<doc>
<![CDATA[Create a value to be used internally for joins.]]>
</doc>
</method>
<method name="getCurrentValue" return="X"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<method name="close"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Close all child RRs.]]>
</doc>
</method>
<method name="getProgress" return="float"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Report progress as the minimum of all child RR progress.]]>
</doc>
</method>
<field name="conf" type="org.apache.hadoop.conf.Configuration"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<field name="keyclass" type="java.lang.Class"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<field name="jc" type="org.apache.hadoop.mapreduce.lib.join.CompositeRecordReader.JoinCollector"
transient="false" volatile="false"
static="false" final="true" visibility="protected"
deprecated="not deprecated">
</field>
<field name="kids" type="org.apache.hadoop.mapreduce.lib.join.ComposableRecordReader[]"
transient="false" volatile="false"
static="false" final="true" visibility="protected"
deprecated="not deprecated">
</field>
<field name="key" type="K"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<field name="value" type="X"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[A RecordReader that can effect joins of RecordReaders sharing a common key
type and partitioning.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.join.CompositeRecordReader -->
<!-- start class org.apache.hadoop.mapreduce.lib.join.InnerJoinRecordReader -->
<class name="InnerJoinRecordReader" extends="org.apache.hadoop.mapreduce.lib.join.JoinRecordReader"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<method name="combine" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="srcs" type="java.lang.Object[]"/>
<param name="dst" type="org.apache.hadoop.mapreduce.lib.join.TupleWritable"/>
<doc>
<![CDATA[Return true iff the tuple is full (all data sources contain this key).]]>
</doc>
</method>
<doc>
<![CDATA[Full inner join.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.join.InnerJoinRecordReader -->
<!-- start class org.apache.hadoop.mapreduce.lib.join.JoinRecordReader -->
<class name="JoinRecordReader" extends="org.apache.hadoop.mapreduce.lib.join.CompositeRecordReader"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="JoinRecordReader" type="int, org.apache.hadoop.conf.Configuration, int, java.lang.Class"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</constructor>
<method name="nextKeyValue" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Emit the next set of key, value pairs as defined by the child
RecordReaders and operation associated with this composite RR.]]>
</doc>
</method>
<method name="createValue" return="org.apache.hadoop.mapreduce.lib.join.TupleWritable"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getDelegate" return="org.apache.hadoop.mapreduce.lib.join.ResetableIterator"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<doc>
<![CDATA[Return an iterator wrapping the JoinCollector.]]>
</doc>
</method>
<doc>
<![CDATA[Base class for Composite joins returning Tuples of arbitrary Writables.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.join.JoinRecordReader -->
<!-- start class org.apache.hadoop.mapreduce.lib.join.MultiFilterRecordReader -->
<class name="MultiFilterRecordReader" extends="org.apache.hadoop.mapreduce.lib.join.CompositeRecordReader"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="MultiFilterRecordReader" type="int, org.apache.hadoop.conf.Configuration, int, java.lang.Class"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</constructor>
<method name="emit" return="V"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="dst" type="org.apache.hadoop.mapreduce.lib.join.TupleWritable"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[For each tuple emitted, return a value (typically one of the values
in the tuple).
Modifying the Writables in the tuple is permitted and unlikely to affect
join behavior in most cases, but it is not recommended. It's safer to
clone first.]]>
</doc>
</method>
<method name="combine" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="srcs" type="java.lang.Object[]"/>
<param name="dst" type="org.apache.hadoop.mapreduce.lib.join.TupleWritable"/>
<doc>
<![CDATA[Default implementation offers {@link #emit} every Tuple from the
collector (the outer join of child RRs).]]>
</doc>
</method>
<method name="nextKeyValue" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<method name="initialize"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<method name="getDelegate" return="org.apache.hadoop.mapreduce.lib.join.ResetableIterator"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<doc>
<![CDATA[Return an iterator returning a single value from the tuple.
@see MultiFilterDelegationIterator]]>
</doc>
</method>
<doc>
<![CDATA[Base class for Composite join returning values derived from multiple
sources, but generally not tuples.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.join.MultiFilterRecordReader -->
<!-- start class org.apache.hadoop.mapreduce.lib.join.OuterJoinRecordReader -->
<class name="OuterJoinRecordReader" extends="org.apache.hadoop.mapreduce.lib.join.JoinRecordReader"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<method name="combine" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="srcs" type="java.lang.Object[]"/>
<param name="dst" type="org.apache.hadoop.mapreduce.lib.join.TupleWritable"/>
<doc>
<![CDATA[Emit everything from the collector.]]>
</doc>
</method>
<doc>
<![CDATA[Full outer join.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.join.OuterJoinRecordReader -->
<!-- start class org.apache.hadoop.mapreduce.lib.join.OverrideRecordReader -->
<class name="OverrideRecordReader" extends="org.apache.hadoop.mapreduce.lib.join.MultiFilterRecordReader"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<method name="emit" return="V"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="dst" type="org.apache.hadoop.mapreduce.lib.join.TupleWritable"/>
<doc>
<![CDATA[Emit the value with the highest position in the tuple.]]>
</doc>
</method>
<method name="createValue" return="V"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="fillJoinCollector"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="iterkey" type="K"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Instead of filling the JoinCollector with iterators from all
data sources, fill only the rightmost for this key.
This not only saves space by discarding the other sources, but
it also emits the number of key-value pairs in the preferred
RecordReader instead of repeating that stream n times, where
n is the cardinality of the cross product of the discarded
streams for the given key.]]>
</doc>
</method>
<doc>
<![CDATA[Prefer the &quot;rightmost&quot; data source for this key.
For example, <tt>override(S1,S2,S3)</tt> will prefer values
from S3 over S2, and values from S2 over S1 for all keys
emitted from all sources.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.join.OverrideRecordReader -->
<!-- start class org.apache.hadoop.mapreduce.lib.join.Parser -->
<class name="Parser" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="Parser"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<doc>
<![CDATA[Very simple shift-reduce parser for join expressions.
This should be sufficient for the user extension permitted now, but ought to
be replaced with a parser generator if more complex grammars are supported.
In particular, this &quot;shift-reduce&quot; parser has no states. Each set
of formals requires a different internal node type, which is responsible for
interpreting the list of tokens it receives. This is sufficient for the
current grammar, but it has several annoying properties that might inhibit
extension. In particular, parenthesis are always function calls; an
algebraic or filter grammar would not only require a node type, but must
also work around the internals of this parser.
For most other cases, adding classes to the hierarchy- particularly by
extending JoinRecordReader and MultiFilterRecordReader- is fairly
straightforward. One need only override the relevant method(s) (usually only
{@link CompositeRecordReader#combine}) and include a property to map its
value to an identifier in the parser.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.join.Parser -->
<!-- start class org.apache.hadoop.mapreduce.lib.join.Parser.Node -->
<class name="Parser.Node" extends="org.apache.hadoop.mapreduce.lib.join.ComposableInputFormat"
abstract="true"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="Node" type="java.lang.String"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</constructor>
<method name="addIdentifier"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="protected"
deprecated="not deprecated">
<param name="ident" type="java.lang.String"/>
<param name="mcstrSig" type="java.lang.Class[]"/>
<param name="nodetype" type="java.lang.Class"/>
<param name="cl" type="java.lang.Class"/>
<exception name="NoSuchMethodException" type="java.lang.NoSuchMethodException"/>
<doc>
<![CDATA[For a given identifier, add a mapping to the nodetype for the parse
tree and to the ComposableRecordReader to be created, including the
formals required to invoke the constructor.
The nodetype and constructor signature should be filled in from the
child node.]]>
</doc>
</method>
<method name="setID"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="id" type="int"/>
</method>
<method name="setKeyComparator"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="cmpcl" type="java.lang.Class"/>
</method>
<field name="rrCstrMap" type="java.util.Map"
transient="false" volatile="false"
static="true" final="true" visibility="protected"
deprecated="not deprecated">
</field>
<field name="id" type="int"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<field name="ident" type="java.lang.String"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<field name="cmpcl" type="java.lang.Class"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.join.Parser.Node -->
<!-- start class org.apache.hadoop.mapreduce.lib.join.Parser.NodeToken -->
<class name="Parser.NodeToken" extends="org.apache.hadoop.mapreduce.lib.join.Parser.Token"
abstract="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<method name="getNode" return="org.apache.hadoop.mapreduce.lib.join.Parser.Node"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.join.Parser.NodeToken -->
<!-- start class org.apache.hadoop.mapreduce.lib.join.Parser.NumToken -->
<class name="Parser.NumToken" extends="org.apache.hadoop.mapreduce.lib.join.Parser.Token"
abstract="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="NumToken" type="double"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getNum" return="double"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.join.Parser.NumToken -->
<!-- start class org.apache.hadoop.mapreduce.lib.join.Parser.StrToken -->
<class name="Parser.StrToken" extends="org.apache.hadoop.mapreduce.lib.join.Parser.Token"
abstract="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="StrToken" type="org.apache.hadoop.mapreduce.lib.join.Parser.TType, java.lang.String"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getStr" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.join.Parser.StrToken -->
<!-- start class org.apache.hadoop.mapreduce.lib.join.Parser.Token -->
<class name="Parser.Token" extends="java.lang.Object"
abstract="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<method name="getType" return="org.apache.hadoop.mapreduce.lib.join.Parser.TType"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getNode" return="org.apache.hadoop.mapreduce.lib.join.Parser.Node"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getNum" return="double"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getStr" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[Tagged-union type for tokens from the join expression.
@see Parser.TType]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.join.Parser.Token -->
<!-- start class org.apache.hadoop.mapreduce.lib.join.Parser.TType -->
<class name="Parser.TType" extends="java.lang.Enum"
abstract="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<method name="values" return="org.apache.hadoop.mapreduce.lib.join.Parser.TType[]"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="valueOf" return="org.apache.hadoop.mapreduce.lib.join.Parser.TType"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="name" type="java.lang.String"/>
</method>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.join.Parser.TType -->
<!-- start interface org.apache.hadoop.mapreduce.lib.join.ResetableIterator -->
<interface name="ResetableIterator" abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<method name="hasNext" return="boolean"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[True if a call to next may return a value. This is permitted false
positives, but not false negatives.]]>
</doc>
</method>
<method name="next" return="boolean"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="val" type="T"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Assign next value to actual.
It is required that elements added to a ResetableIterator be returned in
the same order after a call to {@link #reset} (FIFO).
Note that a call to this may fail for nested joins (i.e. more elements
available, but none satisfying the constraints of the join)]]>
</doc>
</method>
<method name="replay" return="boolean"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="val" type="T"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Assign last value returned to actual.]]>
</doc>
</method>
<method name="reset"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Set iterator to return to the start of its range. Must be called after
calling {@link #add} to avoid a ConcurrentModificationException.]]>
</doc>
</method>
<method name="add"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="item" type="T"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Add an element to the collection of elements to iterate over.]]>
</doc>
</method>
<method name="close"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Close datasources and release resources. Calling methods on the iterator
after calling close has undefined behavior.]]>
</doc>
</method>
<method name="clear"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Close datasources, but do not release internal resources. Calling this
method should permit the object to be reused with a different datasource.]]>
</doc>
</method>
<doc>
<![CDATA[This defines an interface to a stateful Iterator that can replay elements
added to it directly.
Note that this does not extend {@link java.util.Iterator}.]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapreduce.lib.join.ResetableIterator -->
<!-- start class org.apache.hadoop.mapreduce.lib.join.StreamBackedIterator -->
<class name="StreamBackedIterator" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapreduce.lib.join.ResetableIterator"/>
<constructor name="StreamBackedIterator"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="hasNext" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="next" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="val" type="X"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="replay" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="val" type="X"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="reset"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="add"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="item" type="X"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="close"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="clear"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<doc>
<![CDATA[This class provides an implementation of ResetableIterator. This
implementation uses a byte array to store elements added to it.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.join.StreamBackedIterator -->
<!-- start class org.apache.hadoop.mapreduce.lib.join.TupleWritable -->
<class name="TupleWritable" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.io.Writable"/>
<implements name="java.lang.Iterable"/>
<constructor name="TupleWritable"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Create an empty tuple with no allocated storage for writables.]]>
</doc>
</constructor>
<constructor name="TupleWritable" type="org.apache.hadoop.io.Writable[]"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Initialize tuple with storage; unknown whether any of them contain
&quot;written&quot; values.]]>
</doc>
</constructor>
<method name="has" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="i" type="int"/>
<doc>
<![CDATA[Return true if tuple has an element at the position provided.]]>
</doc>
</method>
<method name="get" return="org.apache.hadoop.io.Writable"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="i" type="int"/>
<doc>
<![CDATA[Get ith Writable from Tuple.]]>
</doc>
</method>
<method name="size" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[The number of children in this Tuple.]]>
</doc>
</method>
<method name="equals" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="other" type="java.lang.Object"/>
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<method name="hashCode" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="iterator" return="java.util.Iterator"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Return an iterator over the elements in this tuple.
Note that this doesn't flatten the tuple; one may receive tuples
from this iterator.]]>
</doc>
</method>
<method name="toString" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Convert Tuple to String as in the following.
<tt>[&lt;child1&gt;,&lt;child2&gt;,...,&lt;childn&gt;]</tt>]]>
</doc>
</method>
<method name="write"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="out" type="java.io.DataOutput"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Writes each Writable to <code>out</code>.
TupleWritable format:
{@code
<count><type1><type2>...<typen><obj1><obj2>...<objn>
}]]>
</doc>
</method>
<method name="readFields"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type="java.io.DataInput"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<field name="written" type="java.util.BitSet"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[Writable type storing multiple {@link org.apache.hadoop.io.Writable}s.
This is *not* a general-purpose tuple type. In almost all cases, users are
encouraged to implement their own serializable types, which can perform
better validation and provide more efficient encodings than this class is
capable. TupleWritable relies on the join framework for type safety and
assumes its instances will rarely be persisted, assumptions not only
incompatible with, but contrary to the general case.
@see org.apache.hadoop.io.Writable]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.join.TupleWritable -->
<!-- start class org.apache.hadoop.mapreduce.lib.join.WrappedRecordReader -->
<class name="WrappedRecordReader" extends="org.apache.hadoop.mapreduce.lib.join.ComposableRecordReader"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="WrappedRecordReader" type="int"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</constructor>
<method name="initialize"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<method name="createKey" return="K"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Request new key from proxied RR.]]>
</doc>
</method>
<method name="createValue" return="U"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="id" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<method name="key" return="K"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Return the key at the head of this RR.]]>
</doc>
</method>
<method name="key"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="qkey" type="K"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Clone the key at the head of this RR into the object supplied.]]>
</doc>
</method>
<method name="hasNext" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Return true if the RR- including the k,v pair stored in this object-
is exhausted.]]>
</doc>
</method>
<method name="skip"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="K"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Skip key-value pairs with keys less than or equal to the key provided.]]>
</doc>
</method>
<method name="accept"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="i" type="org.apache.hadoop.mapreduce.lib.join.CompositeRecordReader.JoinCollector"/>
<param name="key" type="K"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Add an iterator to the collector at the position occupied by this
RecordReader over the values in this stream paired with the key
provided (ie register a stream of values from this source matching K
with a collector).]]>
</doc>
</method>
<method name="nextKeyValue" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Read the next k,v pair into the head of this object; return true iff
the RR and this are exhausted.]]>
</doc>
</method>
<method name="getCurrentKey" return="K"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Get current key]]>
</doc>
</method>
<method name="getCurrentValue" return="U"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Get current value]]>
</doc>
</method>
<method name="getProgress" return="float"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Request progress from proxied RR.]]>
</doc>
</method>
<method name="close"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Forward close request to proxied RR.]]>
</doc>
</method>
<method name="compareTo" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="other" type="org.apache.hadoop.mapreduce.lib.join.ComposableRecordReader"/>
<doc>
<![CDATA[Implement Comparable contract (compare key at head of proxied RR
with that of another).]]>
</doc>
</method>
<method name="equals" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="other" type="java.lang.Object"/>
<doc>
<![CDATA[Return true iff compareTo(other) retn true.]]>
</doc>
</method>
<method name="hashCode" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<field name="empty" type="boolean"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<field name="cmp" type="org.apache.hadoop.io.WritableComparator"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[Proxy class for a RecordReader participating in the join framework.
This class keeps track of the &quot;head&quot; key-value pair for the
provided RecordReader and keeps a store of values matching a key when
this source is participating in a join.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.join.WrappedRecordReader -->
</package>
<package name="org.apache.hadoop.mapreduce.lib.map">
<!-- start class org.apache.hadoop.mapreduce.lib.map.InverseMapper -->
<class name="InverseMapper" extends="org.apache.hadoop.mapreduce.Mapper"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="InverseMapper"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="map"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="K"/>
<param name="value" type="V"/>
<param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[The inverse function. Input keys and values are swapped.]]>
</doc>
</method>
<doc>
<![CDATA[A {@link Mapper} that swaps keys and values.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.map.InverseMapper -->
<!-- start class org.apache.hadoop.mapreduce.lib.map.MultithreadedMapper -->
<class name="MultithreadedMapper" extends="org.apache.hadoop.mapreduce.Mapper"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="MultithreadedMapper"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getNumberOfThreads" return="int"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
<doc>
<![CDATA[The number of threads in the thread pool that will run the map function.
@param job the job
@return the number of threads]]>
</doc>
</method>
<method name="setNumberOfThreads"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
<param name="threads" type="int"/>
<doc>
<![CDATA[Set the number of threads in the pool for running maps.
@param job the job to modify
@param threads the new number of threads]]>
</doc>
</method>
<method name="getMapperClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
<doc>
<![CDATA[Get the application's mapper class.
@param <K1> the map's input key type
@param <V1> the map's input value type
@param <K2> the map's output key type
@param <V2> the map's output value type
@param job the job
@return the mapper class to run]]>
</doc>
</method>
<method name="setMapperClass"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
<param name="cls" type="java.lang.Class"/>
<doc>
<![CDATA[Set the application's mapper class.
@param <K1> the map input key type
@param <V1> the map input value type
@param <K2> the map output key type
@param <V2> the map output value type
@param job the job to modify
@param cls the class to use as the mapper]]>
</doc>
</method>
<method name="run"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Run the application's maps using a thread pool.]]>
</doc>
</method>
<field name="NUM_THREADS" type="java.lang.String"
transient="false" volatile="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
</field>
<field name="MAP_CLASS" type="java.lang.String"
transient="false" volatile="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[Multithreaded implementation for @link org.apache.hadoop.mapreduce.Mapper.
<p>
It can be used instead of the default implementation,
{@link org.apache.hadoop.mapred.MapRunner}, when the Map operation is not CPU
bound in order to improve throughput.
<p>
Mapper implementations using this MapRunnable must be thread-safe.
<p>
The Map-Reduce job has to be configured with the mapper to use via
{@link #setMapperClass(Job, Class)} and
the number of thread the thread-pool can use with the
{@link #getNumberOfThreads(JobContext)} method. The default
value is 10 threads.
<p>]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.map.MultithreadedMapper -->
<!-- start class org.apache.hadoop.mapreduce.lib.map.RegexMapper -->
<class name="RegexMapper" extends="org.apache.hadoop.mapreduce.Mapper"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="RegexMapper"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="setup"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/>
</method>
<method name="map"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="K"/>
<param name="value" type="org.apache.hadoop.io.Text"/>
<param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<field name="PATTERN" type="java.lang.String"
transient="false" volatile="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
</field>
<field name="GROUP" type="java.lang.String"
transient="false" volatile="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[A {@link Mapper} that extracts text matching a regular expression.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.map.RegexMapper -->
<!-- start class org.apache.hadoop.mapreduce.lib.map.TokenCounterMapper -->
<class name="TokenCounterMapper" extends="org.apache.hadoop.mapreduce.Mapper"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="TokenCounterMapper"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="map"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Object"/>
<param name="value" type="org.apache.hadoop.io.Text"/>
<param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<doc>
<![CDATA[Tokenize the input values and emit each word with a count of 1.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.map.TokenCounterMapper -->
<!-- start class org.apache.hadoop.mapreduce.lib.map.WrappedMapper -->
<class name="WrappedMapper" extends="org.apache.hadoop.mapreduce.Mapper"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="WrappedMapper"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getMapContext" return="org.apache.hadoop.mapreduce.Mapper.Context"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="mapContext" type="org.apache.hadoop.mapreduce.MapContext"/>
<doc>
<![CDATA[Get a wrapped {@link Mapper.Context} for custom implementations.
@param mapContext <code>MapContext</code> to be wrapped
@return a wrapped <code>Mapper.Context</code> for custom implementations]]>
</doc>
</method>
<doc>
<![CDATA[A {@link Mapper} which wraps a given one to allow custom
{@link Mapper.Context} implementations.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.map.WrappedMapper -->
</package>
<package name="org.apache.hadoop.mapreduce.lib.output">
<!-- start class org.apache.hadoop.mapreduce.lib.output.BindingPathOutputCommitter -->
<class name="BindingPathOutputCommitter" extends="org.apache.hadoop.mapreduce.lib.output.PathOutputCommitter"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="BindingPathOutputCommitter" type="org.apache.hadoop.fs.Path, org.apache.hadoop.mapreduce.TaskAttemptContext"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Instantiate.
@param outputPath output path (may be null)
@param context task context
@throws IOException on any failure.]]>
</doc>
</constructor>
<method name="getOutputPath" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getWorkPath" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="setupJob"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobContext" type="org.apache.hadoop.mapreduce.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="setupTask"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="needsTaskCommit" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="commitTask"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="abortTask"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="cleanupJob"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobContext" type="org.apache.hadoop.mapreduce.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="commitJob"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobContext" type="org.apache.hadoop.mapreduce.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="abortJob"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobContext" type="org.apache.hadoop.mapreduce.JobContext"/>
<param name="state" type="org.apache.hadoop.mapreduce.JobStatus.State"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="isRecoverySupported" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="isCommitJobRepeatable" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobContext" type="org.apache.hadoop.mapreduce.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="isRecoverySupported" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobContext" type="org.apache.hadoop.mapreduce.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="recoverTask"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="hasOutputPath" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="toString" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getCommitter" return="org.apache.hadoop.mapreduce.lib.output.PathOutputCommitter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the inner committer.
@return the bonded committer.]]>
</doc>
</method>
<field name="NAME" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[The classname for use in configurations.]]>
</doc>
</field>
<doc>
<![CDATA[This is a special committer which creates the factory for the committer and
runs off that. Why does it exist? So that you can explicitly instantiate
a committer by classname and yet still have the actual implementation
driven dynamically by the factory options and destination filesystem.
This simplifies integration
with existing code which takes the classname of a committer.
There's no factory for this, as that would lead to a loop.
All commit protocol methods and accessors are delegated to the
wrapped committer.
How to use:
<ol>
<li>
In applications which take a classname of committer in
a configuration option, set it to the canonical name of this class
(see {@link #NAME}). When this class is instantiated, it will
use the factory mechanism to locate the configured committer for the
destination.
</li>
<li>
In code, explicitly create an instance of this committer through
its constructor, then invoke commit lifecycle operations on it.
The dynamically configured committer will be created in the constructor
and have the lifecycle operations relayed to it.
</li>
</ol>]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.output.BindingPathOutputCommitter -->
<!-- start class org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter -->
<class name="FileOutputCommitter" extends="org.apache.hadoop.mapreduce.lib.output.PathOutputCommitter"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="FileOutputCommitter" type="org.apache.hadoop.fs.Path, org.apache.hadoop.mapreduce.TaskAttemptContext"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Create a file output committer
@param outputPath the job's output path, or null if you want the output
committer to act as a noop.
@param context the task's context
@throws IOException]]>
</doc>
</constructor>
<constructor name="FileOutputCommitter" type="org.apache.hadoop.fs.Path, org.apache.hadoop.mapreduce.JobContext"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Create a file output committer
@param outputPath the job's output path, or null if you want the output
committer to act as a noop.
@param context the task's context
@throws IOException]]>
</doc>
</constructor>
<method name="getOutputPath" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the path where final output of the job should be placed. This
could also be considered the committed application attempt path.]]>
</doc>
</method>
<method name="getJobAttemptPath" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
<doc>
<![CDATA[Compute the path where the output of a given job attempt will be placed.
@param context the context of the job. This is used to get the
application attempt id.
@return the path to store job attempt data.]]>
</doc>
</method>
<method name="getJobAttemptPath" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
<param name="out" type="org.apache.hadoop.fs.Path"/>
<doc>
<![CDATA[Compute the path where the output of a given job attempt will be placed.
@param context the context of the job. This is used to get the
application attempt id.
@param out the output path to place these in.
@return the path to store job attempt data.]]>
</doc>
</method>
<method name="getJobAttemptPath" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="appAttemptId" type="int"/>
<doc>
<![CDATA[Compute the path where the output of a given job attempt will be placed.
@param appAttemptId the ID of the application attempt for this job.
@return the path to store job attempt data.]]>
</doc>
</method>
<method name="getTaskAttemptPath" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<doc>
<![CDATA[Compute the path where the output of a task attempt is stored until
that task is committed.
@param context the context of the task attempt.
@return the path where a task attempt should be stored.]]>
</doc>
</method>
<method name="getTaskAttemptPath" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<param name="out" type="org.apache.hadoop.fs.Path"/>
<doc>
<![CDATA[Compute the path where the output of a task attempt is stored until
that task is committed.
@param context the context of the task attempt.
@param out The output path to put things in.
@return the path where a task attempt should be stored.]]>
</doc>
</method>
<method name="getCommittedTaskPath" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<doc>
<![CDATA[Compute the path where the output of a committed task is stored until
the entire job is committed.
@param context the context of the task attempt
@return the path where the output of a committed task is stored until
the entire job is committed.]]>
</doc>
</method>
<method name="getCommittedTaskPath" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<param name="out" type="org.apache.hadoop.fs.Path"/>
</method>
<method name="getCommittedTaskPath" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="appAttemptId" type="int"/>
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<doc>
<![CDATA[Compute the path where the output of a committed task is stored until the
entire job is committed for a specific application attempt.
@param appAttemptId the id of the application attempt to use
@param context the context of any task.
@return the path where the output of a committed task is stored.]]>
</doc>
</method>
<method name="getWorkPath" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get the directory that the task should write results into.
@return the work directory
@throws IOException]]>
</doc>
</method>
<method name="setupJob"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Create the temporary directory that is the root of all of the task
work directories.
@param context the job's context]]>
</doc>
</method>
<method name="commitJob"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[The job has completed, so do works in commitJobInternal().
Could retry on failure if using algorithm 2.
@param context the job's context]]>
</doc>
</method>
<method name="commitJobInternal"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[The job has completed, so do following commit job, include:
Move all committed tasks to the final output dir (algorithm 1 only).
Delete the temporary directory, including all of the work directories.
Create a _SUCCESS file to make it as successful.
@param context the job's context]]>
</doc>
</method>
<method name="cleanupJob"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="abortJob"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
<param name="state" type="org.apache.hadoop.mapreduce.JobStatus.State"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Delete the temporary directory, including all of the work directories.
@param context the job's context]]>
</doc>
</method>
<method name="setupTask"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[No task setup required.]]>
</doc>
</method>
<method name="commitTask"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Move the files from the work directory to the job output directory
@param context the task context]]>
</doc>
</method>
<method name="abortTask"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Delete the work directory
@throws IOException]]>
</doc>
</method>
<method name="needsTaskCommit" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Did this task write any files in the work directory?
@param context the task's context]]>
</doc>
</method>
<method name="isRecoverySupported" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="isCommitJobRepeatable" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="recoverTask"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="toString" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<field name="PENDING_DIR_NAME" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Name of directory where pending data is placed. Data that has not been
committed yet.]]>
</doc>
</field>
<field name="TEMP_DIR_NAME" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="protected"
deprecated="not deprecated">
<doc>
<![CDATA[Temporary directory name
The static variable to be compatible with M/R 1.x]]>
</doc>
</field>
<field name="SUCCEEDED_FILE_NAME" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="SUCCESSFUL_JOB_OUTPUT_DIR_MARKER" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="FILEOUTPUTCOMMITTER_ALGORITHM_VERSION" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="FILEOUTPUTCOMMITTER_ALGORITHM_VERSION_DEFAULT" type="int"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="FILEOUTPUTCOMMITTER_CLEANUP_SKIPPED" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="FILEOUTPUTCOMMITTER_CLEANUP_SKIPPED_DEFAULT" type="boolean"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="FILEOUTPUTCOMMITTER_CLEANUP_FAILURES_IGNORED" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="FILEOUTPUTCOMMITTER_CLEANUP_FAILURES_IGNORED_DEFAULT" type="boolean"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="FILEOUTPUTCOMMITTER_FAILURE_ATTEMPTS" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="FILEOUTPUTCOMMITTER_FAILURE_ATTEMPTS_DEFAULT" type="int"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="FILEOUTPUTCOMMITTER_TASK_CLEANUP_ENABLED" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="FILEOUTPUTCOMMITTER_TASK_CLEANUP_ENABLED_DEFAULT" type="boolean"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[An {@link OutputCommitter} that commits files specified
in job output directory i.e. ${mapreduce.output.fileoutputformat.outputdir}.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter -->
<!-- start class org.apache.hadoop.mapreduce.lib.output.FileOutputFormat -->
<class name="FileOutputFormat" extends="org.apache.hadoop.mapreduce.OutputFormat"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="FileOutputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="setCompressOutput"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
<param name="compress" type="boolean"/>
<doc>
<![CDATA[Set whether the output of the job is compressed.
@param job the job to modify
@param compress should the output of the job be compressed?]]>
</doc>
</method>
<method name="getCompressOutput" return="boolean"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
<doc>
<![CDATA[Is the job output compressed?
@param job the Job to look in
@return <code>true</code> if the job output should be compressed,
<code>false</code> otherwise]]>
</doc>
</method>
<method name="setOutputCompressorClass"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
<param name="codecClass" type="java.lang.Class"/>
<doc>
<![CDATA[Set the {@link CompressionCodec} to be used to compress job outputs.
@param job the job to modify
@param codecClass the {@link CompressionCodec} to be used to
compress the job outputs]]>
</doc>
</method>
<method name="getOutputCompressorClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
<param name="defaultValue" type="java.lang.Class"/>
<doc>
<![CDATA[Get the {@link CompressionCodec} for compressing the job outputs.
@param job the {@link Job} to look in
@param defaultValue the {@link CompressionCodec} to return if not set
@return the {@link CompressionCodec} to be used to compress the
job outputs
@throws IllegalArgumentException if the class was specified, but not found]]>
</doc>
</method>
<method name="getRecordWriter" return="org.apache.hadoop.mapreduce.RecordWriter"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<method name="checkOutputSpecs"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
<exception name="FileAlreadyExistsException" type="org.apache.hadoop.mapred.FileAlreadyExistsException"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="setOutputPath"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
<param name="outputDir" type="org.apache.hadoop.fs.Path"/>
<doc>
<![CDATA[Set the {@link Path} of the output directory for the map-reduce job.
@param job The job to modify
@param outputDir the {@link Path} of the output directory for
the map-reduce job.]]>
</doc>
</method>
<method name="getOutputPath" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
<doc>
<![CDATA[Get the {@link Path} to the output directory for the map-reduce job.
@return the {@link Path} to the output directory for the map-reduce job.
@see FileOutputFormat#getWorkOutputPath(TaskInputOutputContext)]]>
</doc>
</method>
<method name="getWorkOutputPath" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.TaskInputOutputContext"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Get the {@link Path} to the task's temporary output directory
for the map-reduce job
<b id="SideEffectFiles">Tasks' Side-Effect Files</b>
<p>Some applications need to create/write-to side-files, which differ from
the actual job-outputs.
<p>In such cases there could be issues with 2 instances of the same TIP
(running simultaneously e.g. speculative tasks) trying to open/write-to the
same file (path) on HDFS. Hence the application-writer will have to pick
unique names per task-attempt (e.g. using the attemptid, say
<tt>attempt_200709221812_0001_m_000000_0</tt>), not just per TIP.</p>
<p>To get around this the Map-Reduce framework helps the application-writer
out by maintaining a special
<tt>${mapreduce.output.fileoutputformat.outputdir}/_temporary/_${taskid}</tt>
sub-directory for each task-attempt on HDFS where the output of the
task-attempt goes. On successful completion of the task-attempt the files
in the <tt>${mapreduce.output.fileoutputformat.outputdir}/_temporary/_${taskid}</tt> (only)
are <i>promoted</i> to <tt>${mapreduce.output.fileoutputformat.outputdir}</tt>. Of course, the
framework discards the sub-directory of unsuccessful task-attempts. This
is completely transparent to the application.</p>
<p>The application-writer can take advantage of this by creating any
side-files required in a work directory during execution
of his task i.e. via
{@link #getWorkOutputPath(TaskInputOutputContext)}, and
the framework will move them out similarly - thus she doesn't have to pick
unique paths per task-attempt.</p>
<p>The entire discussion holds true for maps of jobs with
reducer=NONE (i.e. 0 reduces) since output of the map, in that case,
goes directly to HDFS.</p>
@return the {@link Path} to the task's temporary output directory
for the map-reduce job.]]>
</doc>
</method>
<method name="getPathForWorkFile" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.TaskInputOutputContext"/>
<param name="name" type="java.lang.String"/>
<param name="extension" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Helper function to generate a {@link Path} for a file that is unique for
the task within the job output directory.
<p>The path can be used to create custom files from within the map and
reduce tasks. The path name will be unique for each task. The path parent
will be the job output directory.</p>ls
<p>This method uses the {@link #getUniqueFile} method to make the file name
unique for the task.</p>
@param context the context for the task.
@param name the name for the file.
@param extension the extension for the file
@return a unique path accross all tasks of the job.]]>
</doc>
</method>
<method name="getUniqueFile" return="java.lang.String"
abstract="false" native="false" synchronized="true"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<param name="name" type="java.lang.String"/>
<param name="extension" type="java.lang.String"/>
<doc>
<![CDATA[Generate a unique filename, based on the task id, name, and extension
@param context the task that is calling this
@param name the base filename
@param extension the filename extension
@return a string like $name-[mrsct]-$id$extension]]>
</doc>
</method>
<method name="getDefaultWorkFile" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<param name="extension" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get the default path and filename for the output format.
@param context the task context
@param extension an extension to add to the filename
@return a full path $output/_temporary/$taskid/part-[mr]-$id
@throws IOException]]>
</doc>
</method>
<method name="getOutputName" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="protected"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
<doc>
<![CDATA[Get the base output name for the output file.]]>
</doc>
</method>
<method name="setOutputName"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="protected"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
<param name="name" type="java.lang.String"/>
<doc>
<![CDATA[Set the base output name for output file to be created.]]>
</doc>
</method>
<method name="getOutputCommitter" return="org.apache.hadoop.mapreduce.OutputCommitter"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<field name="BASE_OUTPUT_NAME" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="protected"
deprecated="not deprecated">
</field>
<field name="PART" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="protected"
deprecated="not deprecated">
</field>
<field name="COMPRESS" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Configuration option: should output be compressed? {@value}.]]>
</doc>
</field>
<field name="COMPRESS_CODEC" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[If compression is enabled, name of codec: {@value}.]]>
</doc>
</field>
<field name="COMPRESS_TYPE" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Type of compression {@value}: NONE, RECORD, BLOCK.
Generally only used in {@code SequenceFileOutputFormat}.]]>
</doc>
</field>
<field name="OUTDIR" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Destination directory of work: {@value}.]]>
</doc>
</field>
<doc>
<![CDATA[A base class for {@link OutputFormat}s that read from {@link FileSystem}s.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.output.FileOutputFormat -->
<!-- start class org.apache.hadoop.mapreduce.lib.output.FileOutputFormatCounter -->
<class name="FileOutputFormatCounter" extends="java.lang.Enum"
abstract="false"
static="false" final="true" visibility="public"
deprecated="not deprecated">
<method name="values" return="org.apache.hadoop.mapreduce.lib.output.FileOutputFormatCounter[]"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="valueOf" return="org.apache.hadoop.mapreduce.lib.output.FileOutputFormatCounter"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="name" type="java.lang.String"/>
</method>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.output.FileOutputFormatCounter -->
<!-- start class org.apache.hadoop.mapreduce.lib.output.FilterOutputFormat -->
<class name="FilterOutputFormat" extends="org.apache.hadoop.mapreduce.OutputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="FilterOutputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<constructor name="FilterOutputFormat" type="org.apache.hadoop.mapreduce.OutputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Create a FilterOutputFormat based on the underlying output format.
@param baseOut the underlying OutputFormat]]>
</doc>
</constructor>
<method name="getRecordWriter" return="org.apache.hadoop.mapreduce.RecordWriter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<method name="checkOutputSpecs"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<method name="getOutputCommitter" return="org.apache.hadoop.mapreduce.OutputCommitter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<field name="baseOut" type="org.apache.hadoop.mapreduce.OutputFormat"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[FilterOutputFormat is a convenience class that wraps OutputFormat.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.output.FilterOutputFormat -->
<!-- start class org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat -->
<class name="LazyOutputFormat" extends="org.apache.hadoop.mapreduce.lib.output.FilterOutputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="LazyOutputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="setOutputFormatClass"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
<param name="theClass" type="java.lang.Class"/>
<doc>
<![CDATA[Set the underlying output format for LazyOutputFormat.
@param job the {@link Job} to modify
@param theClass the underlying class]]>
</doc>
</method>
<method name="getRecordWriter" return="org.apache.hadoop.mapreduce.RecordWriter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<method name="checkOutputSpecs"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<method name="getOutputCommitter" return="org.apache.hadoop.mapreduce.OutputCommitter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<field name="OUTPUT_FORMAT" type="java.lang.String"
transient="false" volatile="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[A Convenience class that creates output lazily.
Use in conjuction with org.apache.hadoop.mapreduce.lib.output.MultipleOutputs to recreate the
behaviour of org.apache.hadoop.mapred.lib.MultipleTextOutputFormat (etc) of the old Hadoop API.
See {@link MultipleOutputs} documentation for more information.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat -->
<!-- start class org.apache.hadoop.mapreduce.lib.output.MapFileOutputFormat -->
<class name="MapFileOutputFormat" extends="org.apache.hadoop.mapreduce.lib.output.FileOutputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="MapFileOutputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getRecordWriter" return="org.apache.hadoop.mapreduce.RecordWriter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getReaders" return="org.apache.hadoop.io.MapFile.Reader[]"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="dir" type="org.apache.hadoop.fs.Path"/>
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Open the output generated by this format.]]>
</doc>
</method>
<method name="getEntry" return="org.apache.hadoop.io.Writable"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="readers" type="org.apache.hadoop.io.MapFile.Reader[]"/>
<param name="partitioner" type="org.apache.hadoop.mapreduce.Partitioner"/>
<param name="key" type="K"/>
<param name="value" type="V"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get an entry from output generated by this class.]]>
</doc>
</method>
<doc>
<![CDATA[An {@link org.apache.hadoop.mapreduce.OutputFormat} that writes
{@link MapFile}s.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.output.MapFileOutputFormat -->
<!-- start class org.apache.hadoop.mapreduce.lib.output.MultipleOutputs -->
<class name="MultipleOutputs" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="MultipleOutputs" type="org.apache.hadoop.mapreduce.TaskInputOutputContext"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Creates and initializes multiple outputs support,
it should be instantiated in the Mapper/Reducer setup method.
@param context the TaskInputOutputContext object]]>
</doc>
</constructor>
<method name="addNamedOutput"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
<param name="namedOutput" type="java.lang.String"/>
<param name="outputFormatClass" type="java.lang.Class"/>
<param name="keyClass" type="java.lang.Class"/>
<param name="valueClass" type="java.lang.Class"/>
<doc>
<![CDATA[Adds a named output for the job.
@param job job to add the named output
@param namedOutput named output name, it has to be a word, letters
and numbers only, cannot be the word 'part' as
that is reserved for the default output.
@param outputFormatClass OutputFormat class.
@param keyClass key class
@param valueClass value class]]>
</doc>
</method>
<method name="setCountersEnabled"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
<param name="enabled" type="boolean"/>
<doc>
<![CDATA[Enables or disables counters for the named outputs.
The counters group is the {@link MultipleOutputs} class name.
The names of the counters are the same as the named outputs. These
counters count the number records written to each output name.
By default these counters are disabled.
@param job job to enable counters
@param enabled indicates if the counters will be enabled or not.]]>
</doc>
</method>
<method name="getCountersEnabled" return="boolean"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
<doc>
<![CDATA[Returns if the counters for the named outputs are enabled or not.
By default these counters are disabled.
@param job the job
@return TRUE if the counters are enabled, FALSE if they are disabled.]]>
</doc>
</method>
<method name="write"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="namedOutput" type="java.lang.String"/>
<param name="key" type="K"/>
<param name="value" type="V"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Write key and value to the namedOutput.
Output path is a unique file generated for the namedOutput.
For example, {namedOutput}-(m|r)-{part-number}
@param namedOutput the named output name
@param key the key
@param value the value]]>
</doc>
</method>
<method name="write"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="namedOutput" type="java.lang.String"/>
<param name="key" type="K"/>
<param name="value" type="V"/>
<param name="baseOutputPath" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Write key and value to baseOutputPath using the namedOutput.
@param namedOutput the named output name
@param key the key
@param value the value
@param baseOutputPath base-output path to write the record to.
Note: Framework will generate unique filename for the baseOutputPath
<b>Warning</b>: when the baseOutputPath is a path that resolves
outside of the final job output directory, the directory is created
immediately and then persists through subsequent task retries, breaking
the concept of output committing.]]>
</doc>
</method>
<method name="write"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="KEYOUT"/>
<param name="value" type="VALUEOUT"/>
<param name="baseOutputPath" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Write key value to an output file name.
Gets the record writer from job's output format.
Job's output format should be a FileOutputFormat.
@param key the key
@param value the value
@param baseOutputPath base-output path to write the record to.
Note: Framework will generate unique filename for the baseOutputPath
<b>Warning</b>: when the baseOutputPath is a path that resolves
outside of the final job output directory, the directory is created
immediately and then persists through subsequent task retries, breaking
the concept of output committing.]]>
</doc>
</method>
<method name="close"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Closes all the opened outputs.
This should be called from cleanup method of map/reduce task.
If overridden subclasses must invoke <code>super.close()</code> at the
end of their <code>close()</code>]]>
</doc>
</method>
<doc>
<![CDATA[The MultipleOutputs class simplifies writing output data
to multiple outputs
<p>
Case one: writing to additional outputs other than the job default output.
Each additional output, or named output, may be configured with its own
<code>OutputFormat</code>, with its own key class and with its own value
class.
</p>
<p>
Case two: to write data to different files provided by user
</p>
<p>
MultipleOutputs supports counters, by default they are disabled. The
counters group is the {@link MultipleOutputs} class name. The names of the
counters are the same as the output name. These count the number records
written to each output name.
</p>
Usage pattern for job submission:
<pre>
Job job = new Job();
FileInputFormat.setInputPath(job, inDir);
FileOutputFormat.setOutputPath(job, outDir);
job.setMapperClass(MOMap.class);
job.setReducerClass(MOReduce.class);
...
// Defines additional single text based output 'text' for the job
MultipleOutputs.addNamedOutput(job, "text", TextOutputFormat.class,
LongWritable.class, Text.class);
// Defines additional sequence-file based output 'sequence' for the job
MultipleOutputs.addNamedOutput(job, "seq",
SequenceFileOutputFormat.class,
LongWritable.class, Text.class);
...
job.waitForCompletion(true);
...
</pre>
<p>
Usage in Reducer:
<pre>
&lt;K, V&gt; String generateFileName(K k, V v) {
return k.toString() + "_" + v.toString();
}
public class MOReduce extends
Reducer&lt;WritableComparable, Writable,WritableComparable, Writable&gt; {
private MultipleOutputs mos;
public void setup(Context context) {
...
mos = new MultipleOutputs(context);
}
public void reduce(WritableComparable key, Iterator&lt;Writable&gt; values,
Context context)
throws IOException {
...
mos.write("text", , key, new Text("Hello"));
mos.write("seq", LongWritable(1), new Text("Bye"), "seq_a");
mos.write("seq", LongWritable(2), key, new Text("Chau"), "seq_b");
mos.write(key, new Text("value"), generateFileName(key, new Text("value")));
...
}
public void cleanup(Context) throws IOException {
mos.close();
...
}
}
</pre>
<p>
When used in conjuction with org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat,
MultipleOutputs can mimic the behaviour of MultipleTextOutputFormat and MultipleSequenceFileOutputFormat
from the old Hadoop API - ie, output can be written from the Reducer to more than one location.
</p>
<p>
Use <code>MultipleOutputs.write(KEYOUT key, VALUEOUT value, String baseOutputPath)</code> to write key and
value to a path specified by <code>baseOutputPath</code>, with no need to specify a named output.
<b>Warning</b>: when the baseOutputPath passed to MultipleOutputs.write
is a path that resolves outside of the final job output directory, the
directory is created immediately and then persists through subsequent
task retries, breaking the concept of output committing:
</p>
<pre>
private MultipleOutputs&lt;Text, Text&gt; out;
public void setup(Context context) {
out = new MultipleOutputs&lt;Text, Text&gt;(context);
...
}
public void reduce(Text key, Iterable&lt;Text&gt; values, Context context) throws IOException, InterruptedException {
for (Text t : values) {
out.write(key, t, generateFileName(&lt;<i>parameter list...</i>&gt;));
}
}
protected void cleanup(Context context) throws IOException, InterruptedException {
out.close();
}
</pre>
<p>
Use your own code in <code>generateFileName()</code> to create a custom path to your results.
'/' characters in <code>baseOutputPath</code> will be translated into directory levels in your file system.
Also, append your custom-generated path with "part" or similar, otherwise your output will be -00000, -00001 etc.
No call to <code>context.write()</code> is necessary. See example <code>generateFileName()</code> code below.
</p>
<pre>
private String generateFileName(Text k) {
// expect Text k in format "Surname|Forename"
String[] kStr = k.toString().split("\\|");
String sName = kStr[0];
String fName = kStr[1];
// example for k = Smith|John
// output written to /user/hadoop/path/to/output/Smith/John-r-00000 (etc)
return sName + "/" + fName;
}
</pre>
<p>
Using MultipleOutputs in this way will still create zero-sized default output, eg part-00000.
To prevent this use <code>LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);</code>
instead of <code>job.setOutputFormatClass(TextOutputFormat.class);</code> in your Hadoop job configuration.
</p>]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.output.MultipleOutputs -->
<!-- start class org.apache.hadoop.mapreduce.lib.output.NullOutputFormat -->
<class name="NullOutputFormat" extends="org.apache.hadoop.mapreduce.OutputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="NullOutputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getRecordWriter" return="org.apache.hadoop.mapreduce.RecordWriter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
</method>
<method name="checkOutputSpecs"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
</method>
<method name="getOutputCommitter" return="org.apache.hadoop.mapreduce.OutputCommitter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
</method>
<doc>
<![CDATA[Consume all outputs and put them in /dev/null.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.output.NullOutputFormat -->
<!-- start class org.apache.hadoop.mapreduce.lib.output.PartialFileOutputCommitter -->
<class name="PartialFileOutputCommitter" extends="org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapreduce.lib.output.PartialOutputCommitter"/>
<constructor name="PartialFileOutputCommitter" type="org.apache.hadoop.fs.Path, org.apache.hadoop.mapreduce.TaskAttemptContext"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</constructor>
<constructor name="PartialFileOutputCommitter" type="org.apache.hadoop.fs.Path, org.apache.hadoop.mapreduce.JobContext"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</constructor>
<method name="getCommittedTaskPath" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="appAttemptId" type="int"/>
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
</method>
<method name="cleanUpPartialOutputForTask"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[An {@link OutputCommitter} that commits files specified
in job output directory i.e. ${mapreduce.output.fileoutputformat.outputdir}.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.output.PartialFileOutputCommitter -->
<!-- start interface org.apache.hadoop.mapreduce.lib.output.PartialOutputCommitter -->
<interface name="PartialOutputCommitter" abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<method name="cleanUpPartialOutputForTask"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Remove all previously committed outputs from prior executions of this task.
@param context Context for cleaning up previously promoted output.
@throws IOException If cleanup fails, then the state of the task my not be
well defined.]]>
</doc>
</method>
<doc>
<![CDATA[Interface for an {@link org.apache.hadoop.mapreduce.OutputCommitter}
implementing partial commit of task output, as during preemption.]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapreduce.lib.output.PartialOutputCommitter -->
<!-- start class org.apache.hadoop.mapreduce.lib.output.PathOutputCommitter -->
<class name="PathOutputCommitter" extends="org.apache.hadoop.mapreduce.OutputCommitter"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="PathOutputCommitter" type="org.apache.hadoop.fs.Path, org.apache.hadoop.mapreduce.TaskAttemptContext"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Constructor for a task attempt.
Subclasses should provide a public constructor with this signature.
@param outputPath output path: may be null
@param context task context
@throws IOException IO problem]]>
</doc>
</constructor>
<constructor name="PathOutputCommitter" type="org.apache.hadoop.fs.Path, org.apache.hadoop.mapreduce.JobContext"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Constructor for a job attempt.
Subclasses should provide a public constructor with this signature.
@param outputPath output path: may be null
@param context task context
@throws IOException IO problem]]>
</doc>
</constructor>
<method name="getOutputPath" return="org.apache.hadoop.fs.Path"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the final directory where work will be placed once the job
is committed. This may be null, in which case, there is no output
path to write data to.
@return the path where final output of the job should be placed.]]>
</doc>
</method>
<method name="hasOutputPath" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Predicate: is there an output path?
@return true if we have an output path set, else false.]]>
</doc>
</method>
<method name="getWorkPath" return="org.apache.hadoop.fs.Path"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get the directory that the task should write results into.
Warning: there's no guarantee that this work path is on the same
FS as the final output, or that it's visible across machines.
May be null.
@return the work directory
@throws IOException IO problem]]>
</doc>
</method>
<method name="toString" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<doc>
<![CDATA[A committer which somehow commits data written to a working directory
to the final directory during the commit process. The reference
implementation of this is the {@link FileOutputCommitter}.
There are two constructors, both of which do nothing but long and
validate their arguments.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.output.PathOutputCommitter -->
<!-- start class org.apache.hadoop.mapreduce.lib.output.SequenceFileAsBinaryOutputFormat -->
<class name="SequenceFileAsBinaryOutputFormat" extends="org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="SequenceFileAsBinaryOutputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="setSequenceFileOutputKeyClass"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
<param name="theClass" type="java.lang.Class"/>
<doc>
<![CDATA[Set the key class for the {@link SequenceFile}
<p>This allows the user to specify the key class to be different
from the actual class ({@link BytesWritable}) used for writing </p>
@param job the {@link Job} to modify
@param theClass the SequenceFile output key class.]]>
</doc>
</method>
<method name="setSequenceFileOutputValueClass"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
<param name="theClass" type="java.lang.Class"/>
<doc>
<![CDATA[Set the value class for the {@link SequenceFile}
<p>This allows the user to specify the value class to be different
from the actual class ({@link BytesWritable}) used for writing </p>
@param job the {@link Job} to modify
@param theClass the SequenceFile output key class.]]>
</doc>
</method>
<method name="getSequenceFileOutputKeyClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
<doc>
<![CDATA[Get the key class for the {@link SequenceFile}
@return the key class of the {@link SequenceFile}]]>
</doc>
</method>
<method name="getSequenceFileOutputValueClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
<doc>
<![CDATA[Get the value class for the {@link SequenceFile}
@return the value class of the {@link SequenceFile}]]>
</doc>
</method>
<method name="getRecordWriter" return="org.apache.hadoop.mapreduce.RecordWriter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="checkOutputSpecs"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<field name="KEY_CLASS" type="java.lang.String"
transient="false" volatile="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
</field>
<field name="VALUE_CLASS" type="java.lang.String"
transient="false" volatile="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[An {@link org.apache.hadoop.mapreduce.OutputFormat} that writes keys,
values to {@link SequenceFile}s in binary(raw) format]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.output.SequenceFileAsBinaryOutputFormat -->
<!-- start class org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat -->
<class name="SequenceFileOutputFormat" extends="org.apache.hadoop.mapreduce.lib.output.FileOutputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="SequenceFileOutputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getSequenceWriter" return="org.apache.hadoop.io.SequenceFile.Writer"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<param name="keyClass" type="java.lang.Class"/>
<param name="valueClass" type="java.lang.Class"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getRecordWriter" return="org.apache.hadoop.mapreduce.RecordWriter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<method name="getOutputCompressionType" return="org.apache.hadoop.io.SequenceFile.CompressionType"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
<doc>
<![CDATA[Get the {@link CompressionType} for the output {@link SequenceFile}.
@param job the {@link Job}
@return the {@link CompressionType} for the output {@link SequenceFile},
defaulting to {@link CompressionType#RECORD}]]>
</doc>
</method>
<method name="setOutputCompressionType"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
<param name="style" type="org.apache.hadoop.io.SequenceFile.CompressionType"/>
<doc>
<![CDATA[Set the {@link CompressionType} for the output {@link SequenceFile}.
@param job the {@link Job} to modify
@param style the {@link CompressionType} for the output
{@link SequenceFile}]]>
</doc>
</method>
<doc>
<![CDATA[An {@link OutputFormat} that writes {@link SequenceFile}s.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat -->
<!-- start class org.apache.hadoop.mapreduce.lib.output.TextOutputFormat -->
<class name="TextOutputFormat" extends="org.apache.hadoop.mapreduce.lib.output.FileOutputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="TextOutputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getRecordWriter" return="org.apache.hadoop.mapreduce.RecordWriter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<field name="SEPARATOR" type="java.lang.String"
transient="false" volatile="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
</field>
<field name="SEPERATOR" type="java.lang.String"
transient="false" volatile="false"
static="true" final="false" visibility="public"
deprecated="Use {@link #SEPARATOR}">
<doc>
<![CDATA[@deprecated Use {@link #SEPARATOR}]]>
</doc>
</field>
<doc>
<![CDATA[An {@link OutputFormat} that writes plain text files.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.output.TextOutputFormat -->
</package>
<package name="org.apache.hadoop.mapreduce.lib.partition">
<!-- start class org.apache.hadoop.mapreduce.lib.partition.BinaryPartitioner -->
<class name="BinaryPartitioner" extends="org.apache.hadoop.mapreduce.Partitioner"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.conf.Configurable"/>
<constructor name="BinaryPartitioner"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="setOffsets"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<param name="left" type="int"/>
<param name="right" type="int"/>
<doc>
<![CDATA[Set the subarray to be used for partitioning to
<code>bytes[left:(right+1)]</code> in Python syntax.
@param conf configuration object
@param left left Python-style offset
@param right right Python-style offset]]>
</doc>
</method>
<method name="setLeftOffset"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<param name="offset" type="int"/>
<doc>
<![CDATA[Set the subarray to be used for partitioning to
<code>bytes[offset:]</code> in Python syntax.
@param conf configuration object
@param offset left Python-style offset]]>
</doc>
</method>
<method name="setRightOffset"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<param name="offset" type="int"/>
<doc>
<![CDATA[Set the subarray to be used for partitioning to
<code>bytes[:(offset+1)]</code> in Python syntax.
@param conf configuration object
@param offset right Python-style offset]]>
</doc>
</method>
<method name="setConf"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
</method>
<method name="getConf" return="org.apache.hadoop.conf.Configuration"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getPartition" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="org.apache.hadoop.io.BinaryComparable"/>
<param name="value" type="V"/>
<param name="numPartitions" type="int"/>
<doc>
<![CDATA[Use (the specified slice of the array returned by)
{@link BinaryComparable#getBytes()} to partition.]]>
</doc>
</method>
<field name="LEFT_OFFSET_PROPERTY_NAME" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="RIGHT_OFFSET_PROPERTY_NAME" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[<p>Partition {@link BinaryComparable} keys using a configurable part of
the bytes array returned by {@link BinaryComparable#getBytes()}.</p>
<p>The subarray to be used for the partitioning can be defined by means
of the following properties:
<ul>
<li>
<i>mapreduce.partition.binarypartitioner.left.offset</i>:
left offset in array (0 by default)
</li>
<li>
<i>mapreduce.partition.binarypartitioner.right.offset</i>:
right offset in array (-1 by default)
</li>
</ul>
Like in Python, both negative and positive offsets are allowed, but
the meaning is slightly different. In case of an array of length 5,
for instance, the possible offsets are:
<pre><code>
+---+---+---+---+---+
| B | B | B | B | B |
+---+---+---+---+---+
0 1 2 3 4
-5 -4 -3 -2 -1
</code></pre>
The first row of numbers gives the position of the offsets 0...5 in
the array; the second row gives the corresponding negative offsets.
Contrary to Python, the specified subarray has byte <code>i</code>
and <code>j</code> as first and last element, repectively, when
<code>i</code> and <code>j</code> are the left and right offset.
<p>For Hadoop programs written in Java, it is advisable to use one of
the following static convenience methods for setting the offsets:
<ul>
<li>{@link #setOffsets}</li>
<li>{@link #setLeftOffset}</li>
<li>{@link #setRightOffset}</li>
</ul>]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.partition.BinaryPartitioner -->
<!-- start class org.apache.hadoop.mapreduce.lib.partition.HashPartitioner -->
<class name="HashPartitioner" extends="org.apache.hadoop.mapreduce.Partitioner"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="HashPartitioner"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getPartition" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="K"/>
<param name="value" type="V"/>
<param name="numReduceTasks" type="int"/>
<doc>
<![CDATA[Use {@link Object#hashCode()} to partition.]]>
</doc>
</method>
<doc>
<![CDATA[Partition keys by their {@link Object#hashCode()}.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.partition.HashPartitioner -->
<!-- start class org.apache.hadoop.mapreduce.lib.partition.InputSampler -->
<class name="InputSampler" extends="org.apache.hadoop.conf.Configured"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.util.Tool"/>
<constructor name="InputSampler" type="org.apache.hadoop.conf.Configuration"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="writePartitionFile"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
<param name="sampler" type="org.apache.hadoop.mapreduce.lib.partition.InputSampler.Sampler"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Write a partition file for the given job, using the Sampler provided.
Queries the sampler for a sample keyset, sorts by the output key
comparator, selects the keys for each rank, and writes to the destination
returned from {@link TotalOrderPartitioner#getPartitionFile}.]]>
</doc>
</method>
<method name="run" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="args" type="java.lang.String[]"/>
<exception name="Exception" type="java.lang.Exception"/>
<doc>
<![CDATA[Driver for InputSampler from the command line.
Configures a JobConf instance and calls {@link #writePartitionFile}.]]>
</doc>
</method>
<method name="main"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="args" type="java.lang.String[]"/>
<exception name="Exception" type="java.lang.Exception"/>
</method>
<doc>
<![CDATA[Utility for collecting samples and writing a partition file for
{@link TotalOrderPartitioner}.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.partition.InputSampler -->
<!-- start class org.apache.hadoop.mapreduce.lib.partition.KeyFieldBasedComparator -->
<class name="KeyFieldBasedComparator" extends="org.apache.hadoop.io.WritableComparator"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.conf.Configurable"/>
<constructor name="KeyFieldBasedComparator"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="setConf"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
</method>
<method name="getConf" return="org.apache.hadoop.conf.Configuration"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="compare" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="b1" type="byte[]"/>
<param name="s1" type="int"/>
<param name="l1" type="int"/>
<param name="b2" type="byte[]"/>
<param name="s2" type="int"/>
<param name="l2" type="int"/>
</method>
<method name="setKeyFieldComparatorOptions"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
<param name="keySpec" type="java.lang.String"/>
<doc>
<![CDATA[Set the {@link KeyFieldBasedComparator} options used to compare keys.
@param keySpec the key specification of the form -k pos1[,pos2], where,
pos is of the form f[.c][opts], where f is the number
of the key field to use, and c is the number of the first character from
the beginning of the field. Fields and character posns are numbered
starting with 1; a character position of zero in pos2 indicates the
field's last character. If '.c' is omitted from pos1, it defaults to 1
(the beginning of the field); if omitted from pos2, it defaults to 0
(the end of the field). opts are ordering options. The supported options
are:
-n, (Sort numerically)
-r, (Reverse the result of comparison)]]>
</doc>
</method>
<method name="getKeyFieldComparatorOption" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
<doc>
<![CDATA[Get the {@link KeyFieldBasedComparator} options]]>
</doc>
</method>
<field name="COMPARATOR_OPTIONS" type="java.lang.String"
transient="false" volatile="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[This comparator implementation provides a subset of the features provided
by the Unix/GNU Sort. In particular, the supported features are:
-n, (Sort numerically)
-r, (Reverse the result of comparison)
-k pos1[,pos2], where pos is of the form f[.c][opts], where f is the number
of the field to use, and c is the number of the first character from the
beginning of the field. Fields and character posns are numbered starting
with 1; a character position of zero in pos2 indicates the field's last
character. If '.c' is omitted from pos1, it defaults to 1 (the beginning
of the field); if omitted from pos2, it defaults to 0 (the end of the
field). opts are ordering options (any of 'nr' as described above).
We assume that the fields in the key are separated by
{@link JobContext#MAP_OUTPUT_KEY_FIELD_SEPARATOR}.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.partition.KeyFieldBasedComparator -->
<!-- start class org.apache.hadoop.mapreduce.lib.partition.KeyFieldBasedPartitioner -->
<class name="KeyFieldBasedPartitioner" extends="org.apache.hadoop.mapreduce.Partitioner"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.conf.Configurable"/>
<constructor name="KeyFieldBasedPartitioner"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="setConf"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
</method>
<method name="getConf" return="org.apache.hadoop.conf.Configuration"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getPartition" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="K2"/>
<param name="value" type="V2"/>
<param name="numReduceTasks" type="int"/>
</method>
<method name="hashCode" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="b" type="byte[]"/>
<param name="start" type="int"/>
<param name="end" type="int"/>
<param name="currentHash" type="int"/>
</method>
<method name="getPartition" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="hash" type="int"/>
<param name="numReduceTasks" type="int"/>
</method>
<method name="setKeyFieldPartitionerOptions"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
<param name="keySpec" type="java.lang.String"/>
<doc>
<![CDATA[Set the {@link KeyFieldBasedPartitioner} options used for
{@link Partitioner}
@param keySpec the key specification of the form -k pos1[,pos2], where,
pos is of the form f[.c][opts], where f is the number
of the key field to use, and c is the number of the first character from
the beginning of the field. Fields and character posns are numbered
starting with 1; a character position of zero in pos2 indicates the
field's last character. If '.c' is omitted from pos1, it defaults to 1
(the beginning of the field); if omitted from pos2, it defaults to 0
(the end of the field).]]>
</doc>
</method>
<method name="getKeyFieldPartitionerOption" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
<doc>
<![CDATA[Get the {@link KeyFieldBasedPartitioner} options]]>
</doc>
</method>
<field name="PARTITIONER_OPTIONS" type="java.lang.String"
transient="false" volatile="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[Defines a way to partition keys based on certain key fields (also see
{@link KeyFieldBasedComparator}.
The key specification supported is of the form -k pos1[,pos2], where,
pos is of the form f[.c][opts], where f is the number
of the key field to use, and c is the number of the first character from
the beginning of the field. Fields and character posns are numbered
starting with 1; a character position of zero in pos2 indicates the
field's last character. If '.c' is omitted from pos1, it defaults to 1
(the beginning of the field); if omitted from pos2, it defaults to 0
(the end of the field).]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.partition.KeyFieldBasedPartitioner -->
<!-- start class org.apache.hadoop.mapreduce.lib.partition.RehashPartitioner -->
<class name="RehashPartitioner" extends="org.apache.hadoop.mapreduce.Partitioner"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="RehashPartitioner"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getPartition" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="K"/>
<param name="value" type="V"/>
<param name="numReduceTasks" type="int"/>
<doc>
<![CDATA[Rehash {@link Object#hashCode()} to partition.]]>
</doc>
</method>
<doc>
<![CDATA[This partitioner rehashes values returned by {@link Object#hashCode()}
to get smoother distribution between partitions which may improve
reduce reduce time in some cases and should harm things in no cases.
This partitioner is suggested with Integer and Long keys with simple
patterns in their distributions.
@since 2.0.3]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.partition.RehashPartitioner -->
<!-- start class org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner -->
<class name="TotalOrderPartitioner" extends="org.apache.hadoop.mapreduce.Partitioner"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.conf.Configurable"/>
<constructor name="TotalOrderPartitioner"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="setConf"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<doc>
<![CDATA[Read in the partition file and build indexing data structures.
If the keytype is {@link org.apache.hadoop.io.BinaryComparable} and
<tt>total.order.partitioner.natural.order</tt> is not false, a trie
of the first <tt>total.order.partitioner.max.trie.depth</tt>(2) + 1 bytes
will be built. Otherwise, keys will be located using a binary search of
the partition keyset using the {@link org.apache.hadoop.io.RawComparator}
defined for this job. The input file must be sorted with the same
comparator and contain {@link Job#getNumReduceTasks()} - 1 keys.]]>
</doc>
</method>
<method name="getConf" return="org.apache.hadoop.conf.Configuration"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getPartition" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="K"/>
<param name="value" type="V"/>
<param name="numPartitions" type="int"/>
</method>
<method name="setPartitionFile"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<param name="p" type="org.apache.hadoop.fs.Path"/>
<doc>
<![CDATA[Set the path to the SequenceFile storing the sorted partition keyset.
It must be the case that for <tt>R</tt> reduces, there are <tt>R-1</tt>
keys in the SequenceFile.]]>
</doc>
</method>
<method name="getPartitionFile" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<doc>
<![CDATA[Get the path to the SequenceFile storing the sorted partition keyset.
@see #setPartitionFile(Configuration, Path)]]>
</doc>
</method>
<field name="DEFAULT_PATH" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="PARTITIONER_PATH" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="MAX_TRIE_DEPTH" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="NATURAL_ORDER" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[Partitioner effecting a total order by reading split points from
an externally generated source.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner -->
</package>
<package name="org.apache.hadoop.mapreduce.lib.reduce">
<!-- start class org.apache.hadoop.mapreduce.lib.reduce.IntSumReducer -->
<class name="IntSumReducer" extends="org.apache.hadoop.mapreduce.Reducer"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="IntSumReducer"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="reduce"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="Key"/>
<param name="values" type="java.lang.Iterable"/>
<param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.reduce.IntSumReducer -->
<!-- start class org.apache.hadoop.mapreduce.lib.reduce.LongSumReducer -->
<class name="LongSumReducer" extends="org.apache.hadoop.mapreduce.Reducer"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="LongSumReducer"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="reduce"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="KEY"/>
<param name="values" type="java.lang.Iterable"/>
<param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.reduce.LongSumReducer -->
<!-- start class org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer -->
<class name="WrappedReducer" extends="org.apache.hadoop.mapreduce.Reducer"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="WrappedReducer"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getReducerContext" return="org.apache.hadoop.mapreduce.Reducer.Context"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="reduceContext" type="org.apache.hadoop.mapreduce.ReduceContext"/>
<doc>
<![CDATA[A a wrapped {@link Reducer.Context} for custom implementations.
@param reduceContext <code>ReduceContext</code> to be wrapped
@return a wrapped <code>Reducer.Context</code> for custom implementations]]>
</doc>
</method>
<doc>
<![CDATA[A {@link Reducer} which wraps a given one to allow for custom
{@link Reducer.Context} implementations.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer -->
</package>
<package name="org.apache.hadoop.mapreduce.security">
<!-- start class org.apache.hadoop.mapreduce.security.TokenCache -->
<class name="TokenCache" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="TokenCache"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getSecretKey" return="byte[]"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="credentials" type="org.apache.hadoop.security.Credentials"/>
<param name="alias" type="org.apache.hadoop.io.Text"/>
<doc>
<![CDATA[auxiliary method to get user's secret keys..
@param alias
@return secret key from the storage]]>
</doc>
</method>
<method name="obtainTokensForNamenodes"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="credentials" type="org.apache.hadoop.security.Credentials"/>
<param name="ps" type="org.apache.hadoop.fs.Path[]"/>
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Convenience method to obtain delegation tokens from namenodes
corresponding to the paths passed.
@param credentials
@param ps array of paths
@param conf configuration
@throws IOException]]>
</doc>
</method>
<method name="cleanUpTokenReferral"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<doc>
<![CDATA[Remove jobtoken referrals which don't make sense in the context
of the task execution.
@param conf]]>
</doc>
</method>
<field name="JOB_TOKEN_HDFS_FILE" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[file name used on HDFS for generated job token]]>
</doc>
</field>
<field name="JOB_TOKENS_FILENAME" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[conf setting for job tokens cache file name]]>
</doc>
</field>
<doc>
<![CDATA[This class provides user facing APIs for transferring secrets from
the job client to the tasks.
The secrets can be stored just before submission of jobs and read during
the task execution.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.security.TokenCache -->
</package>
<package name="org.apache.hadoop.mapreduce.server.jobtracker">
</package>
<package name="org.apache.hadoop.mapreduce.server.tasktracker">
</package>
<package name="org.apache.hadoop.mapreduce.task.annotation">
<!-- start class org.apache.hadoop.mapreduce.task.annotation.Checkpointable -->
<class name="Checkpointable" abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="java.lang.annotation.Annotation"/>
<doc>
<![CDATA[Contract representing to the framework that the task can be safely preempted
and restarted between invocations of the user-defined function.
This is often true when the result of a function does not rely on state
derived from previous elements in the record stream, but the guarantee is
left as an exercise to the implementor.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.task.annotation.Checkpointable -->
</package>
<package name="org.apache.hadoop.mapreduce.tools">
<!-- start class org.apache.hadoop.mapreduce.tools.CLI -->
<class name="CLI" extends="org.apache.hadoop.conf.Configured"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.util.Tool"/>
<constructor name="CLI"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<constructor name="CLI" type="org.apache.hadoop.conf.Configuration"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="run" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="argv" type="java.lang.String[]"/>
<exception name="Exception" type="java.lang.Exception"/>
</method>
<method name="getCounter" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="counters" type="org.apache.hadoop.mapreduce.Counters"/>
<param name="counterGroupName" type="java.lang.String"/>
<param name="counterName" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getTaskLogURL" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="protected"
deprecated="not deprecated">
<param name="taskId" type="org.apache.hadoop.mapreduce.TaskAttemptID"/>
<param name="baseUrl" type="java.lang.String"/>
</method>
<method name="displayTasks"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
<param name="type" type="java.lang.String"/>
<param name="state" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Display the information about a job's tasks, of a particular type and
in a particular state
@param job the job
@param type the type of the task (map/reduce/setup/cleanup)
@param state the state of the task
(pending/running/completed/failed/killed)
@throws IOException when there is an error communicating with the master
@throws InterruptedException
@throws IllegalArgumentException if an invalid type/state is passed]]>
</doc>
</method>
<method name="displayJobList"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobs" type="org.apache.hadoop.mapreduce.JobStatus[]"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<method name="main"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="argv" type="java.lang.String[]"/>
<exception name="Exception" type="java.lang.Exception"/>
</method>
<field name="cluster" type="org.apache.hadoop.mapreduce.Cluster"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<field name="headerPattern" type="java.lang.String"
transient="false" volatile="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
</field>
<field name="dataPattern" type="java.lang.String"
transient="false" volatile="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[Interprets the map reduce cli options]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.tools.CLI -->
</package>
<package name="org.apache.hadoop.mapreduce.v2">
</package>
</api>