| <?xml version="1.0" encoding="iso-8859-1" standalone="no"?> |
| <!-- Generated by the JDiff Javadoc doclet --> |
| <!-- (http://www.jdiff.org) --> |
| <!-- on Tue Jan 29 03:58:46 UTC 2019 --> |
| |
| <api |
| xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' |
| xsi:noNamespaceSchemaLocation='api.xsd' |
| name="Apache Hadoop MapReduce Core 3.1.2" |
| jdversion="1.0.9"> |
| |
| <!-- Command line arguments = -doclet org.apache.hadoop.classification.tools.IncludePublicAnnotationsJDiffDoclet -docletpath /build/source/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/target/hadoop-annotations.jar:/build/source/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/target/jdiff.jar -verbose -classpath /build/source/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/target/classes:/build/source/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/target/hadoop-yarn-client-3.1.2.jar:/maven/log4j/log4j/1.2.17/log4j-1.2.17.jar:/build/source/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/target/hadoop-yarn-api-3.1.2.jar:/build/source/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/target/hadoop-yarn-common-3.1.2.jar:/build/source/hadoop-common-project/hadoop-auth/target/hadoop-auth-3.1.2.jar:/maven/com/nimbusds/nimbus-jose-jwt/4.41.1/nimbus-jose-jwt-4.41.1.jar:/maven/com/github/stephenc/jcip/jcip-annotations/1.0-1/jcip-annotations-1.0-1.jar:/maven/net/minidev/json-smart/2.3/json-smart-2.3.jar:/maven/net/minidev/accessors-smart/1.2/accessors-smart-1.2.jar:/maven/org/ow2/asm/asm/5.0.4/asm-5.0.4.jar:/maven/org/apache/curator/curator-framework/2.13.0/curator-framework-2.13.0.jar:/maven/javax/xml/bind/jaxb-api/2.2.11/jaxb-api-2.2.11.jar:/maven/org/apache/commons/commons-compress/1.18/commons-compress-1.18.jar:/maven/javax/servlet/javax.servlet-api/3.1.0/javax.servlet-api-3.1.0.jar:/maven/org/eclipse/jetty/jetty-util/9.3.24.v20180605/jetty-util-9.3.24.v20180605.jar:/maven/com/sun/jersey/jersey-core/1.19/jersey-core-1.19.jar:/maven/javax/ws/rs/jsr311-api/1.1.1/jsr311-api-1.1.1.jar:/maven/com/sun/jersey/jersey-client/1.19/jersey-client-1.19.jar:/maven/commons-io/commons-io/2.5/commons-io-2.5.jar:/maven/com/google/inject/guice/4.0/guice-4.0.jar:/maven/javax/inject/javax.inject/1/javax.inject-1.jar:/maven/aopalliance/aopalliance/1.0/aopalliance-1.0.jar:/maven/com/sun/jersey/jersey-server/1.19/jersey-server-1.19.jar:/maven/com/sun/jersey/jersey-json/1.19/jersey-json-1.19.jar:/maven/org/codehaus/jettison/jettison/1.1/jettison-1.1.jar:/maven/com/sun/xml/bind/jaxb-impl/2.2.3-1/jaxb-impl-2.2.3-1.jar:/maven/org/codehaus/jackson/jackson-jaxrs/1.9.13/jackson-jaxrs-1.9.13.jar:/maven/org/codehaus/jackson/jackson-xc/1.9.13/jackson-xc-1.9.13.jar:/maven/com/sun/jersey/contribs/jersey-guice/1.19/jersey-guice-1.19.jar:/maven/com/fasterxml/jackson/core/jackson-core/2.7.8/jackson-core-2.7.8.jar:/maven/com/fasterxml/jackson/module/jackson-module-jaxb-annotations/2.7.8/jackson-module-jaxb-annotations-2.7.8.jar:/maven/com/fasterxml/jackson/jaxrs/jackson-jaxrs-json-provider/2.7.8/jackson-jaxrs-json-provider-2.7.8.jar:/maven/com/fasterxml/jackson/jaxrs/jackson-jaxrs-base/2.7.8/jackson-jaxrs-base-2.7.8.jar:/build/source/hadoop-hdfs-project/hadoop-hdfs-client/target/hadoop-hdfs-client-3.1.2.jar:/maven/com/squareup/okhttp/okhttp/2.7.5/okhttp-2.7.5.jar:/maven/com/squareup/okio/okio/1.6.0/okio-1.6.0.jar:/maven/com/fasterxml/jackson/core/jackson-annotations/2.7.8/jackson-annotations-2.7.8.jar:/maven/org/eclipse/jetty/jetty-server/9.3.24.v20180605/jetty-server-9.3.24.v20180605.jar:/maven/org/eclipse/jetty/jetty-http/9.3.24.v20180605/jetty-http-9.3.24.v20180605.jar:/maven/org/eclipse/jetty/jetty-io/9.3.24.v20180605/jetty-io-9.3.24.v20180605.jar:/maven/org/apache/htrace/htrace-core4/4.1.0-incubating/htrace-core4-4.1.0-incubating.jar:/maven/com/fasterxml/jackson/core/jackson-databind/2.7.8/jackson-databind-2.7.8.jar:/maven/com/google/protobuf/protobuf-java/2.5.0/protobuf-java-2.5.0.jar:/maven/org/apache/avro/avro/1.7.7/avro-1.7.7.jar:/maven/org/codehaus/jackson/jackson-core-asl/1.9.13/jackson-core-asl-1.9.13.jar:/maven/org/codehaus/jackson/jackson-mapper-asl/1.9.13/jackson-mapper-asl-1.9.13.jar:/maven/com/thoughtworks/paranamer/paranamer/2.3/paranamer-2.3.jar:/maven/org/xerial/snappy/snappy-java/1.0.5/snappy-java-1.0.5.jar:/build/source/hadoop-common-project/hadoop-common/target/hadoop-common-3.1.2.jar:/maven/org/apache/commons/commons-math3/3.1.1/commons-math3-3.1.1.jar:/maven/org/apache/httpcomponents/httpclient/4.5.2/httpclient-4.5.2.jar:/maven/org/apache/httpcomponents/httpcore/4.4.4/httpcore-4.4.4.jar:/maven/commons-net/commons-net/3.6/commons-net-3.6.jar:/maven/org/eclipse/jetty/jetty-servlet/9.3.24.v20180605/jetty-servlet-9.3.24.v20180605.jar:/maven/org/eclipse/jetty/jetty-security/9.3.24.v20180605/jetty-security-9.3.24.v20180605.jar:/maven/org/eclipse/jetty/jetty-webapp/9.3.24.v20180605/jetty-webapp-9.3.24.v20180605.jar:/maven/org/eclipse/jetty/jetty-xml/9.3.24.v20180605/jetty-xml-9.3.24.v20180605.jar:/maven/javax/servlet/jsp/jsp-api/2.1/jsp-api-2.1.jar:/maven/com/sun/jersey/jersey-servlet/1.19/jersey-servlet-1.19.jar:/maven/commons-beanutils/commons-beanutils/1.9.3/commons-beanutils-1.9.3.jar:/maven/org/apache/commons/commons-configuration2/2.1.1/commons-configuration2-2.1.1.jar:/maven/org/apache/commons/commons-lang3/3.4/commons-lang3-3.4.jar:/maven/com/google/re2j/re2j/1.1/re2j-1.1.jar:/maven/com/google/code/gson/gson/2.2.4/gson-2.2.4.jar:/maven/com/jcraft/jsch/0.1.54/jsch-0.1.54.jar:/maven/org/apache/curator/curator-client/2.13.0/curator-client-2.13.0.jar:/maven/org/apache/curator/curator-recipes/2.13.0/curator-recipes-2.13.0.jar:/maven/com/google/code/findbugs/jsr305/3.0.0/jsr305-3.0.0.jar:/maven/org/apache/zookeeper/zookeeper/3.4.13/zookeeper-3.4.13.jar:/maven/org/apache/yetus/audience-annotations/0.5.0/audience-annotations-0.5.0.jar:/maven/org/apache/kerby/kerb-simplekdc/1.0.1/kerb-simplekdc-1.0.1.jar:/maven/org/apache/kerby/kerb-client/1.0.1/kerb-client-1.0.1.jar:/maven/org/apache/kerby/kerby-config/1.0.1/kerby-config-1.0.1.jar:/maven/org/apache/kerby/kerb-core/1.0.1/kerb-core-1.0.1.jar:/maven/org/apache/kerby/kerby-pkix/1.0.1/kerby-pkix-1.0.1.jar:/maven/org/apache/kerby/kerby-asn1/1.0.1/kerby-asn1-1.0.1.jar:/maven/org/apache/kerby/kerby-util/1.0.1/kerby-util-1.0.1.jar:/maven/org/apache/kerby/kerb-common/1.0.1/kerb-common-1.0.1.jar:/maven/org/apache/kerby/kerb-crypto/1.0.1/kerb-crypto-1.0.1.jar:/maven/org/apache/kerby/kerb-util/1.0.1/kerb-util-1.0.1.jar:/maven/org/apache/kerby/token-provider/1.0.1/token-provider-1.0.1.jar:/maven/org/apache/kerby/kerb-admin/1.0.1/kerb-admin-1.0.1.jar:/maven/org/apache/kerby/kerb-server/1.0.1/kerb-server-1.0.1.jar:/maven/org/apache/kerby/kerb-identity/1.0.1/kerb-identity-1.0.1.jar:/maven/org/apache/kerby/kerby-xdr/1.0.1/kerby-xdr-1.0.1.jar:/maven/org/codehaus/woodstox/stax2-api/3.1.4/stax2-api-3.1.4.jar:/maven/com/fasterxml/woodstox/woodstox-core/5.0.3/woodstox-core-5.0.3.jar:/maven/org/slf4j/slf4j-api/1.7.25/slf4j-api-1.7.25.jar:/maven/org/slf4j/slf4j-log4j12/1.7.25/slf4j-log4j12-1.7.25.jar:/build/source/hadoop-common-project/hadoop-annotations/target/hadoop-annotations-3.1.2.jar:/usr/lib/jvm/java-8-openjdk-amd64/lib/tools.jar:/maven/com/google/inject/extensions/guice-servlet/4.0/guice-servlet-4.0.jar:/maven/io/netty/netty/3.10.5.Final/netty-3.10.5.Final.jar:/maven/commons-logging/commons-logging/1.1.3/commons-logging-1.1.3.jar:/maven/com/google/guava/guava/11.0.2/guava-11.0.2.jar:/maven/commons-codec/commons-codec/1.11/commons-codec-1.11.jar:/maven/commons-cli/commons-cli/1.2/commons-cli-1.2.jar:/maven/commons-lang/commons-lang/2.6/commons-lang-2.6.jar:/maven/commons-collections/commons-collections/3.2.2/commons-collections-3.2.2.jar:/maven/xerces/xercesImpl/2.11.0/xercesImpl-2.11.0.jar:/maven/xml-apis/xml-apis/1.4.01/xml-apis-1.4.01.jar -sourcepath /build/source/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java -doclet org.apache.hadoop.classification.tools.IncludePublicAnnotationsJDiffDoclet -docletpath /build/source/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/target/hadoop-annotations.jar:/build/source/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/target/jdiff.jar -apidir /build/source/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/target/site/jdiff/xml -apiname Apache Hadoop MapReduce Core 3.1.2 --> |
| <package name="org.apache.hadoop.filecache"> |
| <!-- start class org.apache.hadoop.filecache.DistributedCache --> |
| <class name="DistributedCache" extends="org.apache.hadoop.mapreduce.filecache.DistributedCache" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="DistributedCache" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="addLocalArchives" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <param name="str" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Add a archive that has been localized to the conf. Used |
| by internal DistributedCache code. |
| @param conf The conf to modify to contain the localized caches |
| @param str a comma separated list of local archives]]> |
| </doc> |
| </method> |
| <method name="addLocalFiles" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <param name="str" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Add a file that has been localized to the conf.. Used |
| by internal DistributedCache code. |
| @param conf The conf to modify to contain the localized caches |
| @param str a comma separated list of local files]]> |
| </doc> |
| </method> |
| <method name="createAllSymlink" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="Internal to MapReduce framework. Use DistributedCacheManager |
| instead."> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <param name="jobCacheDir" type="java.io.File"/> |
| <param name="workDir" type="java.io.File"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[This method create symlinks for all files in a given dir in another |
| directory. Currently symlinks cannot be disabled. This is a NO-OP. |
| |
| @param conf the configuration |
| @param jobCacheDir the target directory for creating symlinks |
| @param workDir the directory in which the symlinks are created |
| @throws IOException |
| @deprecated Internal to MapReduce framework. Use DistributedCacheManager |
| instead.]]> |
| </doc> |
| </method> |
| <method name="getFileStatus" return="org.apache.hadoop.fs.FileStatus" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <param name="cache" type="java.net.URI"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Returns {@link FileStatus} of a given cache file on hdfs. Internal to |
| MapReduce. |
| @param conf configuration |
| @param cache cache file |
| @return <code>FileStatus</code> of a given cache file on hdfs |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getTimestamp" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <param name="cache" type="java.net.URI"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Returns mtime of a given cache file on hdfs. Internal to MapReduce. |
| @param conf configuration |
| @param cache cache file |
| @return mtime of a given cache file on hdfs |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="setArchiveTimestamps" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <param name="timestamps" type="java.lang.String"/> |
| <doc> |
| <![CDATA[This is to check the timestamp of the archives to be localized. |
| Used by internal MapReduce code. |
| @param conf Configuration which stores the timestamp's |
| @param timestamps comma separated list of timestamps of archives. |
| The order should be the same as the order in which the archives are added.]]> |
| </doc> |
| </method> |
| <method name="setFileTimestamps" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <param name="timestamps" type="java.lang.String"/> |
| <doc> |
| <![CDATA[This is to check the timestamp of the files to be localized. |
| Used by internal MapReduce code. |
| @param conf Configuration which stores the timestamp's |
| @param timestamps comma separated list of timestamps of files. |
| The order should be the same as the order in which the files are added.]]> |
| </doc> |
| </method> |
| <method name="setLocalArchives" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <param name="str" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the conf to contain the location for localized archives. Used |
| by internal DistributedCache code. |
| @param conf The conf to modify to contain the localized caches |
| @param str a comma separated list of local archives]]> |
| </doc> |
| </method> |
| <method name="setLocalFiles" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <param name="str" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the conf to contain the location for localized files. Used |
| by internal DistributedCache code. |
| @param conf The conf to modify to contain the localized caches |
| @param str a comma separated list of local files]]> |
| </doc> |
| </method> |
| <field name="CACHE_FILES_SIZES" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Warning: {@link #CACHE_FILES_SIZES} is not a *public* constant. |
| The variable is kept for M/R 1.x applications, M/R 2.x applications should |
| use {@link MRJobConfig#CACHE_FILES_SIZES}]]> |
| </doc> |
| </field> |
| <field name="CACHE_ARCHIVES_SIZES" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Warning: {@link #CACHE_ARCHIVES_SIZES} is not a *public* constant. |
| The variable is kept for M/R 1.x applications, M/R 2.x applications should |
| use {@link MRJobConfig#CACHE_ARCHIVES_SIZES}]]> |
| </doc> |
| </field> |
| <field name="CACHE_ARCHIVES_TIMESTAMPS" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Warning: {@link #CACHE_ARCHIVES_TIMESTAMPS} is not a *public* constant. |
| The variable is kept for M/R 1.x applications, M/R 2.x applications should |
| use {@link MRJobConfig#CACHE_ARCHIVES_TIMESTAMPS}]]> |
| </doc> |
| </field> |
| <field name="CACHE_FILES_TIMESTAMPS" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Warning: {@link #CACHE_FILES_TIMESTAMPS} is not a *public* constant. |
| The variable is kept for M/R 1.x applications, M/R 2.x applications should |
| use {@link MRJobConfig#CACHE_FILE_TIMESTAMPS}]]> |
| </doc> |
| </field> |
| <field name="CACHE_ARCHIVES" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Warning: {@link #CACHE_ARCHIVES} is not a *public* constant. |
| The variable is kept for M/R 1.x applications, M/R 2.x applications should |
| use {@link MRJobConfig#CACHE_ARCHIVES}]]> |
| </doc> |
| </field> |
| <field name="CACHE_FILES" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Warning: {@link #CACHE_FILES} is not a *public* constant. |
| The variable is kept for M/R 1.x applications, M/R 2.x applications should |
| use {@link MRJobConfig#CACHE_FILES}]]> |
| </doc> |
| </field> |
| <field name="CACHE_LOCALARCHIVES" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Warning: {@link #CACHE_LOCALARCHIVES} is not a *public* constant. |
| The variable is kept for M/R 1.x applications, M/R 2.x applications should |
| use {@link MRJobConfig#CACHE_LOCALARCHIVES}]]> |
| </doc> |
| </field> |
| <field name="CACHE_LOCALFILES" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Warning: {@link #CACHE_LOCALFILES} is not a *public* constant. |
| The variable is kept for M/R 1.x applications, M/R 2.x applications should |
| use {@link MRJobConfig#CACHE_LOCALFILES}]]> |
| </doc> |
| </field> |
| <field name="CACHE_SYMLINK" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Warning: {@link #CACHE_SYMLINK} is not a *public* constant. |
| The variable is kept for M/R 1.x applications, M/R 2.x applications should |
| use {@link MRJobConfig#CACHE_SYMLINK}]]> |
| </doc> |
| </field> |
| <doc> |
| <![CDATA[Distribute application-specific large, read-only files efficiently. |
| |
| <p><code>DistributedCache</code> is a facility provided by the Map-Reduce |
| framework to cache files (text, archives, jars etc.) needed by applications. |
| </p> |
| |
| <p>Applications specify the files, via urls (hdfs:// or http://) to be cached |
| via the {@link org.apache.hadoop.mapred.JobConf}. The |
| <code>DistributedCache</code> assumes that the files specified via urls are |
| already present on the {@link FileSystem} at the path specified by the url |
| and are accessible by every machine in the cluster.</p> |
| |
| <p>The framework will copy the necessary files on to the worker node before |
| any tasks for the job are executed on that node. Its efficiency stems from |
| the fact that the files are only copied once per job and the ability to |
| cache archives which are un-archived on the workers.</p> |
| |
| <p><code>DistributedCache</code> can be used to distribute simple, read-only |
| data/text files and/or more complex types such as archives, jars etc. |
| Archives (zip, tar and tgz/tar.gz files) are un-archived at the worker nodes. |
| Jars may be optionally added to the classpath of the tasks, a rudimentary |
| software distribution mechanism. Files have execution permissions. |
| In older version of Hadoop Map/Reduce users could optionally ask for symlinks |
| to be created in the working directory of the child task. In the current |
| version symlinks are always created. If the URL does not have a fragment |
| the name of the file or directory will be used. If multiple files or |
| directories map to the same link name, the last one added, will be used. All |
| others will not even be downloaded.</p> |
| |
| <p><code>DistributedCache</code> tracks modification timestamps of the cache |
| files. Clearly the cache files should not be modified by the application |
| or externally while the job is executing.</p> |
| |
| <p>Here is an illustrative example on how to use the |
| <code>DistributedCache</code>:</p> |
| <p><blockquote><pre> |
| // Setting up the cache for the application |
| |
| 1. Copy the requisite files to the <code>FileSystem</code>: |
| |
| $ bin/hadoop fs -copyFromLocal lookup.dat /myapp/lookup.dat |
| $ bin/hadoop fs -copyFromLocal map.zip /myapp/map.zip |
| $ bin/hadoop fs -copyFromLocal mylib.jar /myapp/mylib.jar |
| $ bin/hadoop fs -copyFromLocal mytar.tar /myapp/mytar.tar |
| $ bin/hadoop fs -copyFromLocal mytgz.tgz /myapp/mytgz.tgz |
| $ bin/hadoop fs -copyFromLocal mytargz.tar.gz /myapp/mytargz.tar.gz |
| |
| 2. Setup the application's <code>JobConf</code>: |
| |
| JobConf job = new JobConf(); |
| DistributedCache.addCacheFile(new URI("/myapp/lookup.dat#lookup.dat"), |
| job); |
| DistributedCache.addCacheArchive(new URI("/myapp/map.zip"), job); |
| DistributedCache.addFileToClassPath(new Path("/myapp/mylib.jar"), job); |
| DistributedCache.addCacheArchive(new URI("/myapp/mytar.tar"), job); |
| DistributedCache.addCacheArchive(new URI("/myapp/mytgz.tgz"), job); |
| DistributedCache.addCacheArchive(new URI("/myapp/mytargz.tar.gz"), job); |
| |
| 3. Use the cached files in the {@link org.apache.hadoop.mapred.Mapper} |
| or {@link org.apache.hadoop.mapred.Reducer}: |
| |
| public static class MapClass extends MapReduceBase |
| implements Mapper<K, V, K, V> { |
| |
| private Path[] localArchives; |
| private Path[] localFiles; |
| |
| public void configure(JobConf job) { |
| // Get the cached archives/files |
| File f = new File("./map.zip/some/file/in/zip.txt"); |
| } |
| |
| public void map(K key, V value, |
| OutputCollector<K, V> output, Reporter reporter) |
| throws IOException { |
| // Use data from the cached archives/files here |
| // ... |
| // ... |
| output.collect(k, v); |
| } |
| } |
| |
| </pre></blockquote> |
| |
| It is also very common to use the DistributedCache by using |
| {@link org.apache.hadoop.util.GenericOptionsParser}. |
| |
| This class includes methods that should be used by users |
| (specifically those mentioned in the example above, as well |
| as {@link DistributedCache#addArchiveToClassPath(Path, Configuration)}), |
| as well as methods intended for use by the MapReduce framework |
| (e.g., {@link org.apache.hadoop.mapred.JobClient}). |
| |
| @see org.apache.hadoop.mapred.JobConf |
| @see org.apache.hadoop.mapred.JobClient |
| @see org.apache.hadoop.mapreduce.Job]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.filecache.DistributedCache --> |
| </package> |
| <package name="org.apache.hadoop.mapred"> |
| <!-- start class org.apache.hadoop.mapred.ClusterStatus --> |
| <class name="ClusterStatus" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.io.Writable"/> |
| <method name="getTaskTrackers" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the number of task trackers in the cluster. |
| |
| @return the number of task trackers in the cluster.]]> |
| </doc> |
| </method> |
| <method name="getActiveTrackerNames" return="java.util.Collection" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the names of task trackers in the cluster. |
| |
| @return the active task trackers in the cluster.]]> |
| </doc> |
| </method> |
| <method name="getBlacklistedTrackerNames" return="java.util.Collection" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the names of task trackers in the cluster. |
| |
| @return the blacklisted task trackers in the cluster.]]> |
| </doc> |
| </method> |
| <method name="getGraylistedTrackerNames" return="java.util.Collection" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the names of graylisted task trackers in the cluster. |
| |
| The gray list of trackers is no longer available on M/R 2.x. The function |
| is kept to be compatible with M/R 1.x applications. |
| |
| @return an empty graylisted task trackers in the cluster.]]> |
| </doc> |
| </method> |
| <method name="getGraylistedTrackers" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the number of graylisted task trackers in the cluster. |
| |
| The gray list of trackers is no longer available on M/R 2.x. The function |
| is kept to be compatible with M/R 1.x applications. |
| |
| @return 0 graylisted task trackers in the cluster.]]> |
| </doc> |
| </method> |
| <method name="getBlacklistedTrackers" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the number of blacklisted task trackers in the cluster. |
| |
| @return the number of blacklisted task trackers in the cluster.]]> |
| </doc> |
| </method> |
| <method name="getNumExcludedNodes" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the number of excluded hosts in the cluster. |
| @return the number of excluded hosts in the cluster.]]> |
| </doc> |
| </method> |
| <method name="getTTExpiryInterval" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the tasktracker expiry interval for the cluster |
| @return the expiry interval in msec]]> |
| </doc> |
| </method> |
| <method name="getMapTasks" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the number of currently running map tasks in the cluster. |
| |
| @return the number of currently running map tasks in the cluster.]]> |
| </doc> |
| </method> |
| <method name="getReduceTasks" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the number of currently running reduce tasks in the cluster. |
| |
| @return the number of currently running reduce tasks in the cluster.]]> |
| </doc> |
| </method> |
| <method name="getMaxMapTasks" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the maximum capacity for running map tasks in the cluster. |
| |
| @return the maximum capacity for running map tasks in the cluster.]]> |
| </doc> |
| </method> |
| <method name="getMaxReduceTasks" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the maximum capacity for running reduce tasks in the cluster. |
| |
| @return the maximum capacity for running reduce tasks in the cluster.]]> |
| </doc> |
| </method> |
| <method name="getJobTrackerStatus" return="org.apache.hadoop.mapreduce.Cluster.JobTrackerStatus" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the JobTracker's status. |
| |
| @return {@link JobTrackerStatus} of the JobTracker]]> |
| </doc> |
| </method> |
| <method name="getMaxMemory" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns UNINITIALIZED_MEMORY_VALUE (-1)]]> |
| </doc> |
| </method> |
| <method name="getUsedMemory" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns UNINITIALIZED_MEMORY_VALUE (-1)]]> |
| </doc> |
| </method> |
| <method name="getBlackListedTrackersInfo" return="java.util.Collection" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Gets the list of blacklisted trackers along with reasons for blacklisting. |
| |
| @return the collection of {@link BlackListInfo} objects.]]> |
| </doc> |
| </method> |
| <method name="getJobTrackerState" return="org.apache.hadoop.mapred.JobTracker.State" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the current state of the <code>JobTracker</code>, |
| as {@link JobTracker.State} |
| |
| {@link JobTracker.State} should no longer be used on M/R 2.x. The function |
| is kept to be compatible with M/R 1.x applications. |
| |
| @return the invalid state of the <code>JobTracker</code>.]]> |
| </doc> |
| </method> |
| <method name="write" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="out" type="java.io.DataOutput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="readFields" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="in" type="java.io.DataInput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <field name="UNINITIALIZED_MEMORY_VALUE" type="long" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[Status information on the current state of the Map-Reduce cluster. |
| |
| <p><code>ClusterStatus</code> provides clients with information such as: |
| <ol> |
| <li> |
| Size of the cluster. |
| </li> |
| <li> |
| Name of the trackers. |
| </li> |
| <li> |
| Task capacity of the cluster. |
| </li> |
| <li> |
| The number of currently running map and reduce tasks. |
| </li> |
| <li> |
| State of the <code>JobTracker</code>. |
| </li> |
| <li> |
| Details regarding black listed trackers. |
| </li> |
| </ol> |
| |
| <p>Clients can query for the latest <code>ClusterStatus</code>, via |
| {@link JobClient#getClusterStatus()}.</p> |
| |
| @see JobClient]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.ClusterStatus --> |
| <!-- start class org.apache.hadoop.mapred.Counters --> |
| <class name="Counters" extends="org.apache.hadoop.mapreduce.counters.AbstractCounters" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="Counters" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <constructor name="Counters" type="org.apache.hadoop.mapreduce.Counters" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getGroup" return="org.apache.hadoop.mapred.Counters.Group" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="groupName" type="java.lang.String"/> |
| </method> |
| <method name="getGroupNames" return="java.util.Collection" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="makeCompactString" return="java.lang.String" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="findCounter" return="org.apache.hadoop.mapred.Counters.Counter" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="group" type="java.lang.String"/> |
| <param name="name" type="java.lang.String"/> |
| </method> |
| <method name="findCounter" return="org.apache.hadoop.mapred.Counters.Counter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="use {@link #findCounter(String, String)} instead"> |
| <param name="group" type="java.lang.String"/> |
| <param name="id" type="int"/> |
| <param name="name" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Find a counter by using strings |
| @param group the name of the group |
| @param id the id of the counter within the group (0 to N-1) |
| @param name the internal name of the counter |
| @return the counter for that name |
| @deprecated use {@link #findCounter(String, String)} instead]]> |
| </doc> |
| </method> |
| <method name="incrCounter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="java.lang.Enum"/> |
| <param name="amount" type="long"/> |
| <doc> |
| <![CDATA[Increments the specified counter by the specified amount, creating it if |
| it didn't already exist. |
| @param key identifies a counter |
| @param amount amount by which counter is to be incremented]]> |
| </doc> |
| </method> |
| <method name="incrCounter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="group" type="java.lang.String"/> |
| <param name="counter" type="java.lang.String"/> |
| <param name="amount" type="long"/> |
| <doc> |
| <![CDATA[Increments the specified counter by the specified amount, creating it if |
| it didn't already exist. |
| @param group the name of the group |
| @param counter the internal name of the counter |
| @param amount amount by which counter is to be incremented]]> |
| </doc> |
| </method> |
| <method name="getCounter" return="long" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="java.lang.Enum"/> |
| <doc> |
| <![CDATA[Returns current value of the specified counter, or 0 if the counter |
| does not exist. |
| @param key the counter enum to lookup |
| @return the counter value or 0 if counter not found]]> |
| </doc> |
| </method> |
| <method name="incrAllCounters" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="other" type="org.apache.hadoop.mapred.Counters"/> |
| <doc> |
| <![CDATA[Increments multiple counters by their amounts in another Counters |
| instance. |
| @param other the other Counters instance]]> |
| </doc> |
| </method> |
| <method name="size" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="use {@link #countCounters()} instead"> |
| <doc> |
| <![CDATA[@return the total number of counters |
| @deprecated use {@link #countCounters()} instead]]> |
| </doc> |
| </method> |
| <method name="sum" return="org.apache.hadoop.mapred.Counters" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="a" type="org.apache.hadoop.mapred.Counters"/> |
| <param name="b" type="org.apache.hadoop.mapred.Counters"/> |
| <doc> |
| <![CDATA[Convenience method for computing the sum of two sets of counters. |
| @param a the first counters |
| @param b the second counters |
| @return a new summed counters object]]> |
| </doc> |
| </method> |
| <method name="log" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="log" type="org.slf4j.Logger"/> |
| <doc> |
| <![CDATA[Logs the current counter values. |
| @param log The log to use.]]> |
| </doc> |
| </method> |
| <method name="makeEscapedCompactString" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Represent the counter in a textual format that can be converted back to |
| its object form |
| @return the string in the following format |
| {(groupName)(group-displayName)[(counterName)(displayName)(value)][]*}*]]> |
| </doc> |
| </method> |
| <method name="fromEscapedCompactString" return="org.apache.hadoop.mapred.Counters" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="compactString" type="java.lang.String"/> |
| <exception name="ParseException" type="java.text.ParseException"/> |
| <doc> |
| <![CDATA[Convert a stringified (by {@link #makeEscapedCompactString()} counter |
| representation into a counter object. |
| @param compactString to parse |
| @return a new counters object |
| @throws ParseException]]> |
| </doc> |
| </method> |
| <field name="MAX_COUNTER_LIMIT" type="int" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="MAX_GROUP_LIMIT" type="int" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[A set of named counters. |
| |
| <p><code>Counters</code> represent global counters, defined either by the |
| Map-Reduce framework or applications. Each <code>Counter</code> can be of |
| any {@link Enum} type.</p> |
| |
| <p><code>Counters</code> are bunched into {@link Group}s, each comprising of |
| counters from a particular <code>Enum</code> class.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.Counters --> |
| <!-- start class org.apache.hadoop.mapred.Counters.Counter --> |
| <class name="Counters.Counter" extends="java.lang.Object" |
| abstract="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapreduce.Counter"/> |
| <constructor name="Counter" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="setDisplayName" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="displayName" type="java.lang.String"/> |
| </method> |
| <method name="getName" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getDisplayName" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getValue" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="setValue" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="value" type="long"/> |
| </method> |
| <method name="increment" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="incr" type="long"/> |
| </method> |
| <method name="write" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="out" type="java.io.DataOutput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="readFields" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="in" type="java.io.DataInput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="makeEscapedCompactString" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns the compact stringified version of the counter in the format |
| [(actual-name)(display-name)(value)] |
| @return the stringified result]]> |
| </doc> |
| </method> |
| <method name="contentEquals" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="deprecated, no comment"> |
| <param name="counter" type="org.apache.hadoop.mapred.Counters.Counter"/> |
| <doc> |
| <![CDATA[Checks for (content) equality of two (basic) counters |
| @param counter to compare |
| @return true if content equals |
| @deprecated]]> |
| </doc> |
| </method> |
| <method name="getCounter" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the value of the counter]]> |
| </doc> |
| </method> |
| <method name="getUnderlyingCounter" return="org.apache.hadoop.mapreduce.Counter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="equals" return="boolean" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="genericRight" type="java.lang.Object"/> |
| </method> |
| <method name="hashCode" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <doc> |
| <![CDATA[A counter record, comprising its name and value.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.Counters.Counter --> |
| <!-- start class org.apache.hadoop.mapred.Counters.Group --> |
| <class name="Counters.Group" extends="java.lang.Object" |
| abstract="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapreduce.counters.CounterGroupBase"/> |
| <constructor name="Group" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getCounter" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="counterName" type="java.lang.String"/> |
| <doc> |
| <![CDATA[@param counterName the name of the counter |
| @return the value of the specified counter, or 0 if the counter does |
| not exist.]]> |
| </doc> |
| </method> |
| <method name="makeEscapedCompactString" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the compact stringified version of the group in the format |
| {(actual-name)(display-name)(value)[][][]} where [] are compact strings |
| for the counters within.]]> |
| </doc> |
| </method> |
| <method name="getCounter" return="org.apache.hadoop.mapred.Counters.Counter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="use {@link #findCounter(String)} instead"> |
| <param name="id" type="int"/> |
| <param name="name" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Get the counter for the given id and create it if it doesn't exist. |
| @param id the numeric id of the counter within the group |
| @param name the internal counter name |
| @return the counter |
| @deprecated use {@link #findCounter(String)} instead]]> |
| </doc> |
| </method> |
| <method name="getCounterForName" return="org.apache.hadoop.mapred.Counters.Counter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="name" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Get the counter for the given name and create it if it doesn't exist. |
| @param name the internal counter name |
| @return the counter]]> |
| </doc> |
| </method> |
| <method name="write" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="out" type="java.io.DataOutput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="readFields" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="in" type="java.io.DataInput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="iterator" return="java.util.Iterator" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getName" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getDisplayName" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="setDisplayName" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="displayName" type="java.lang.String"/> |
| </method> |
| <method name="addCounter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="counter" type="org.apache.hadoop.mapred.Counters.Counter"/> |
| </method> |
| <method name="addCounter" return="org.apache.hadoop.mapred.Counters.Counter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="name" type="java.lang.String"/> |
| <param name="displayName" type="java.lang.String"/> |
| <param name="value" type="long"/> |
| </method> |
| <method name="findCounter" return="org.apache.hadoop.mapred.Counters.Counter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="counterName" type="java.lang.String"/> |
| <param name="displayName" type="java.lang.String"/> |
| </method> |
| <method name="findCounter" return="org.apache.hadoop.mapred.Counters.Counter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="counterName" type="java.lang.String"/> |
| <param name="create" type="boolean"/> |
| </method> |
| <method name="findCounter" return="org.apache.hadoop.mapred.Counters.Counter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="counterName" type="java.lang.String"/> |
| </method> |
| <method name="size" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="incrAllCounters" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="rightGroup" type="org.apache.hadoop.mapreduce.counters.CounterGroupBase"/> |
| </method> |
| <method name="getUnderlyingGroup" return="org.apache.hadoop.mapreduce.counters.CounterGroupBase" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="equals" return="boolean" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="genericRight" type="java.lang.Object"/> |
| </method> |
| <method name="hashCode" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <doc> |
| <![CDATA[<code>Group</code> of counters, comprising of counters from a particular |
| counter {@link Enum} class. |
| |
| <p><code>Group</code>handles localization of the class name and the |
| counter names.</p>]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.Counters.Group --> |
| <!-- start class org.apache.hadoop.mapred.FileAlreadyExistsException --> |
| <class name="FileAlreadyExistsException" extends="java.io.IOException" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="FileAlreadyExistsException" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <constructor name="FileAlreadyExistsException" type="java.lang.String" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <doc> |
| <![CDATA[Used when target file already exists for any operation and |
| is not configured to be overwritten.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.FileAlreadyExistsException --> |
| <!-- start class org.apache.hadoop.mapred.FileInputFormat --> |
| <class name="FileInputFormat" extends="java.lang.Object" |
| abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.InputFormat"/> |
| <constructor name="FileInputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="setMinSplitSize" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="minSplitSize" type="long"/> |
| </method> |
| <method name="isSplitable" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="fs" type="org.apache.hadoop.fs.FileSystem"/> |
| <param name="filename" type="org.apache.hadoop.fs.Path"/> |
| <doc> |
| <![CDATA[Is the given filename splittable? Usually, true, but if the file is |
| stream compressed, it will not be. |
| |
| The default implementation in <code>FileInputFormat</code> always returns |
| true. Implementations that may deal with non-splittable files <i>must</i> |
| override this method. |
| |
| <code>FileInputFormat</code> implementations can override this and return |
| <code>false</code> to ensure that individual input files are never split-up |
| so that {@link Mapper}s process entire files. |
| |
| @param fs the file system that the file is on |
| @param filename the file name to check |
| @return is this file splitable?]]> |
| </doc> |
| </method> |
| <method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="split" type="org.apache.hadoop.mapred.InputSplit"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="setInputPathFilter" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="filter" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Set a PathFilter to be applied to the input paths for the map-reduce job. |
| |
| @param filter the PathFilter class use for filtering the input paths.]]> |
| </doc> |
| </method> |
| <method name="getInputPathFilter" return="org.apache.hadoop.fs.PathFilter" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <doc> |
| <![CDATA[Get a PathFilter instance of the filter set for the input paths. |
| |
| @return the PathFilter instance set for the job, NULL if none has been set.]]> |
| </doc> |
| </method> |
| <method name="addInputPathRecursively" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="result" type="java.util.List"/> |
| <param name="fs" type="org.apache.hadoop.fs.FileSystem"/> |
| <param name="path" type="org.apache.hadoop.fs.Path"/> |
| <param name="inputFilter" type="org.apache.hadoop.fs.PathFilter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Add files in the input path recursively into the results. |
| @param result |
| The List to store all files. |
| @param fs |
| The FileSystem. |
| @param path |
| The input path. |
| @param inputFilter |
| The input filter that can be used to filter files/dirs. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="listStatus" return="org.apache.hadoop.fs.FileStatus[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[List input directories. |
| Subclasses may override to, e.g., select only files matching a regular |
| expression. |
| |
| @param job the job to list input paths for |
| @return array of FileStatus objects |
| @throws IOException if zero items.]]> |
| </doc> |
| </method> |
| <method name="makeSplit" return="org.apache.hadoop.mapred.FileSplit" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="file" type="org.apache.hadoop.fs.Path"/> |
| <param name="start" type="long"/> |
| <param name="length" type="long"/> |
| <param name="hosts" type="java.lang.String[]"/> |
| <doc> |
| <![CDATA[A factory that makes the split for this class. It can be overridden |
| by sub-classes to make sub-types]]> |
| </doc> |
| </method> |
| <method name="makeSplit" return="org.apache.hadoop.mapred.FileSplit" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="file" type="org.apache.hadoop.fs.Path"/> |
| <param name="start" type="long"/> |
| <param name="length" type="long"/> |
| <param name="hosts" type="java.lang.String[]"/> |
| <param name="inMemoryHosts" type="java.lang.String[]"/> |
| <doc> |
| <![CDATA[A factory that makes the split for this class. It can be overridden |
| by sub-classes to make sub-types]]> |
| </doc> |
| </method> |
| <method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="numSplits" type="int"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Splits files returned by {@link #listStatus(JobConf)} when |
| they're too big.]]> |
| </doc> |
| </method> |
| <method name="computeSplitSize" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="goalSize" type="long"/> |
| <param name="minSize" type="long"/> |
| <param name="blockSize" type="long"/> |
| </method> |
| <method name="getBlockIndex" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="blkLocations" type="org.apache.hadoop.fs.BlockLocation[]"/> |
| <param name="offset" type="long"/> |
| </method> |
| <method name="setInputPaths" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="commaSeparatedPaths" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Sets the given comma separated paths as the list of inputs |
| for the map-reduce job. |
| |
| @param conf Configuration of the job |
| @param commaSeparatedPaths Comma separated paths to be set as |
| the list of inputs for the map-reduce job.]]> |
| </doc> |
| </method> |
| <method name="addInputPaths" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="commaSeparatedPaths" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Add the given comma separated paths to the list of inputs for |
| the map-reduce job. |
| |
| @param conf The configuration of the job |
| @param commaSeparatedPaths Comma separated paths to be added to |
| the list of inputs for the map-reduce job.]]> |
| </doc> |
| </method> |
| <method name="setInputPaths" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="inputPaths" type="org.apache.hadoop.fs.Path[]"/> |
| <doc> |
| <![CDATA[Set the array of {@link Path}s as the list of inputs |
| for the map-reduce job. |
| |
| @param conf Configuration of the job. |
| @param inputPaths the {@link Path}s of the input directories/files |
| for the map-reduce job.]]> |
| </doc> |
| </method> |
| <method name="addInputPath" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="path" type="org.apache.hadoop.fs.Path"/> |
| <doc> |
| <![CDATA[Add a {@link Path} to the list of inputs for the map-reduce job. |
| |
| @param conf The configuration of the job |
| @param path {@link Path} to be added to the list of inputs for |
| the map-reduce job.]]> |
| </doc> |
| </method> |
| <method name="getInputPaths" return="org.apache.hadoop.fs.Path[]" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <doc> |
| <![CDATA[Get the list of input {@link Path}s for the map-reduce job. |
| |
| @param conf The configuration of the job |
| @return the list of input {@link Path}s for the map-reduce job.]]> |
| </doc> |
| </method> |
| <method name="getSplitHosts" return="java.lang.String[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="blkLocations" type="org.apache.hadoop.fs.BlockLocation[]"/> |
| <param name="offset" type="long"/> |
| <param name="splitSize" type="long"/> |
| <param name="clusterMap" type="org.apache.hadoop.net.NetworkTopology"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[This function identifies and returns the hosts that contribute |
| most for a given split. For calculating the contribution, rack |
| locality is treated on par with host locality, so hosts from racks |
| that contribute the most are preferred over hosts on racks that |
| contribute less |
| @param blkLocations The list of block locations |
| @param offset |
| @param splitSize |
| @return an array of hosts that contribute most to this split |
| @throws IOException]]> |
| </doc> |
| </method> |
| <field name="LOG" type="org.slf4j.Logger" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="NUM_INPUT_FILES" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="INPUT_DIR_RECURSIVE" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="INPUT_DIR_NONRECURSIVE_IGNORE_SUBDIRS" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[A base class for file-based {@link InputFormat}. |
| |
| <p><code>FileInputFormat</code> is the base class for all file-based |
| <code>InputFormat</code>s. This provides a generic implementation of |
| {@link #getSplits(JobConf, int)}. |
| |
| Implementations of <code>FileInputFormat</code> can also override the |
| {@link #isSplitable(FileSystem, Path)} method to prevent input files |
| from being split-up in certain situations. Implementations that may |
| deal with non-splittable files <i>must</i> override this method, since |
| the default implementation assumes splitting is always possible.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.FileInputFormat --> |
| <!-- start class org.apache.hadoop.mapred.FileOutputCommitter --> |
| <class name="FileOutputCommitter" extends="org.apache.hadoop.mapred.OutputCommitter" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="FileOutputCommitter" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getWorkPath" return="org.apache.hadoop.fs.Path" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapred.TaskAttemptContext"/> |
| <param name="outputPath" type="org.apache.hadoop.fs.Path"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="setupJob" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapred.JobContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="commitJob" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapred.JobContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="cleanupJob" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapred.JobContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="abortJob" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapred.JobContext"/> |
| <param name="runState" type="int"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="setupTask" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapred.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="commitTask" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapred.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="abortTask" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapred.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="needsTaskCommit" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapred.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="isRecoverySupported" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="isCommitJobRepeatable" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapred.JobContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="isRecoverySupported" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapred.JobContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="recoverTask" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapred.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <field name="LOG" type="org.slf4j.Logger" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="TEMP_DIR_NAME" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Temporary directory name]]> |
| </doc> |
| </field> |
| <field name="SUCCEEDED_FILE_NAME" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[An {@link OutputCommitter} that commits files specified |
| in job output directory i.e. ${mapreduce.output.fileoutputformat.outputdir}.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.FileOutputCommitter --> |
| <!-- start class org.apache.hadoop.mapred.FileOutputFormat --> |
| <class name="FileOutputFormat" extends="java.lang.Object" |
| abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.OutputFormat"/> |
| <constructor name="FileOutputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="setCompressOutput" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="compress" type="boolean"/> |
| <doc> |
| <![CDATA[Set whether the output of the job is compressed. |
| @param conf the {@link JobConf} to modify |
| @param compress should the output of the job be compressed?]]> |
| </doc> |
| </method> |
| <method name="getCompressOutput" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <doc> |
| <![CDATA[Is the job output compressed? |
| @param conf the {@link JobConf} to look in |
| @return <code>true</code> if the job output should be compressed, |
| <code>false</code> otherwise]]> |
| </doc> |
| </method> |
| <method name="setOutputCompressorClass" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="codecClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Set the {@link CompressionCodec} to be used to compress job outputs. |
| @param conf the {@link JobConf} to modify |
| @param codecClass the {@link CompressionCodec} to be used to |
| compress the job outputs]]> |
| </doc> |
| </method> |
| <method name="getOutputCompressorClass" return="java.lang.Class" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="defaultValue" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Get the {@link CompressionCodec} for compressing the job outputs. |
| @param conf the {@link JobConf} to look in |
| @param defaultValue the {@link CompressionCodec} to return if not set |
| @return the {@link CompressionCodec} to be used to compress the |
| job outputs |
| @throws IllegalArgumentException if the class was specified, but not found]]> |
| </doc> |
| </method> |
| <method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="ignored" type="org.apache.hadoop.fs.FileSystem"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="name" type="java.lang.String"/> |
| <param name="progress" type="org.apache.hadoop.util.Progressable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="checkOutputSpecs" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="ignored" type="org.apache.hadoop.fs.FileSystem"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <exception name="FileAlreadyExistsException" type="org.apache.hadoop.mapred.FileAlreadyExistsException"/> |
| <exception name="InvalidJobConfException" type="org.apache.hadoop.mapred.InvalidJobConfException"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="setOutputPath" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="outputDir" type="org.apache.hadoop.fs.Path"/> |
| <doc> |
| <![CDATA[Set the {@link Path} of the output directory for the map-reduce job. |
| |
| @param conf The configuration of the job. |
| @param outputDir the {@link Path} of the output directory for |
| the map-reduce job.]]> |
| </doc> |
| </method> |
| <method name="getOutputPath" return="org.apache.hadoop.fs.Path" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <doc> |
| <![CDATA[Get the {@link Path} to the output directory for the map-reduce job. |
| |
| @return the {@link Path} to the output directory for the map-reduce job. |
| @see FileOutputFormat#getWorkOutputPath(JobConf)]]> |
| </doc> |
| </method> |
| <method name="getWorkOutputPath" return="org.apache.hadoop.fs.Path" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <doc> |
| <![CDATA[Get the {@link Path} to the task's temporary output directory |
| for the map-reduce job |
| |
| <b id="SideEffectFiles">Tasks' Side-Effect Files</b> |
| |
| <p><i>Note:</i> The following is valid only if the {@link OutputCommitter} |
| is {@link FileOutputCommitter}. If <code>OutputCommitter</code> is not |
| a <code>FileOutputCommitter</code>, the task's temporary output |
| directory is same as {@link #getOutputPath(JobConf)} i.e. |
| <tt>${mapreduce.output.fileoutputformat.outputdir}$</tt></p> |
| |
| <p>Some applications need to create/write-to side-files, which differ from |
| the actual job-outputs. |
| |
| <p>In such cases there could be issues with 2 instances of the same TIP |
| (running simultaneously e.g. speculative tasks) trying to open/write-to the |
| same file (path) on HDFS. Hence the application-writer will have to pick |
| unique names per task-attempt (e.g. using the attemptid, say |
| <tt>attempt_200709221812_0001_m_000000_0</tt>), not just per TIP.</p> |
| |
| <p>To get around this the Map-Reduce framework helps the application-writer |
| out by maintaining a special |
| <tt>${mapreduce.output.fileoutputformat.outputdir}/_temporary/_${taskid}</tt> |
| sub-directory for each task-attempt on HDFS where the output of the |
| task-attempt goes. On successful completion of the task-attempt the files |
| in the <tt>${mapreduce.output.fileoutputformat.outputdir}/_temporary/_${taskid}</tt> (only) |
| are <i>promoted</i> to <tt>${mapreduce.output.fileoutputformat.outputdir}</tt>. Of course, the |
| framework discards the sub-directory of unsuccessful task-attempts. This |
| is completely transparent to the application.</p> |
| |
| <p>The application-writer can take advantage of this by creating any |
| side-files required in <tt>${mapreduce.task.output.dir}</tt> during execution |
| of his reduce-task i.e. via {@link #getWorkOutputPath(JobConf)}, and the |
| framework will move them out similarly - thus she doesn't have to pick |
| unique paths per task-attempt.</p> |
| |
| <p><i>Note</i>: the value of <tt>${mapreduce.task.output.dir}</tt> during |
| execution of a particular task-attempt is actually |
| <tt>${mapreduce.output.fileoutputformat.outputdir}/_temporary/_{$taskid}</tt>, and this value is |
| set by the map-reduce framework. So, just create any side-files in the |
| path returned by {@link #getWorkOutputPath(JobConf)} from map/reduce |
| task to take advantage of this feature.</p> |
| |
| <p>The entire discussion holds true for maps of jobs with |
| reducer=NONE (i.e. 0 reduces) since output of the map, in that case, |
| goes directly to HDFS.</p> |
| |
| @return the {@link Path} to the task's temporary output directory |
| for the map-reduce job.]]> |
| </doc> |
| </method> |
| <method name="getTaskOutputPath" return="org.apache.hadoop.fs.Path" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="name" type="java.lang.String"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Helper function to create the task's temporary output directory and |
| return the path to the task's output file. |
| |
| @param conf job-configuration |
| @param name temporary task-output filename |
| @return path to the task's temporary output file |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getUniqueName" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="name" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Helper function to generate a name that is unique for the task. |
| |
| <p>The generated name can be used to create custom files from within the |
| different tasks for the job, the names for different tasks will not collide |
| with each other.</p> |
| |
| <p>The given name is postfixed with the task type, 'm' for maps, 'r' for |
| reduces and the task partition number. For example, give a name 'test' |
| running on the first map o the job the generated name will be |
| 'test-m-00000'.</p> |
| |
| @param conf the configuration for the job. |
| @param name the name to make unique. |
| @return a unique name accross all tasks of the job.]]> |
| </doc> |
| </method> |
| <method name="getPathForCustomFile" return="org.apache.hadoop.fs.Path" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="name" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Helper function to generate a {@link Path} for a file that is unique for |
| the task within the job output directory. |
| |
| <p>The path can be used to create custom files from within the map and |
| reduce tasks. The path name will be unique for each task. The path parent |
| will be the job output directory.</p>ls |
| |
| <p>This method uses the {@link #getUniqueName} method to make the file name |
| unique for the task.</p> |
| |
| @param conf the configuration for the job. |
| @param name the name for the file. |
| @return a unique path accross all tasks of the job.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[A base class for {@link OutputFormat}.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.FileOutputFormat --> |
| <!-- start class org.apache.hadoop.mapred.FileSplit --> |
| <class name="FileSplit" extends="org.apache.hadoop.mapreduce.InputSplit" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.InputSplitWithLocationInfo"/> |
| <constructor name="FileSplit" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </constructor> |
| <constructor name="FileSplit" type="org.apache.hadoop.fs.Path, long, long, org.apache.hadoop.mapred.JobConf" |
| static="false" final="false" visibility="public" |
| deprecated="deprecated, no comment"> |
| <doc> |
| <![CDATA[Constructs a split. |
| @deprecated |
| @param file the file name |
| @param start the position of the first byte in the file to process |
| @param length the number of bytes in the file to process]]> |
| </doc> |
| </constructor> |
| <constructor name="FileSplit" type="org.apache.hadoop.fs.Path, long, long, java.lang.String[]" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Constructs a split with host information |
| |
| @param file the file name |
| @param start the position of the first byte in the file to process |
| @param length the number of bytes in the file to process |
| @param hosts the list of hosts containing the block, possibly null]]> |
| </doc> |
| </constructor> |
| <constructor name="FileSplit" type="org.apache.hadoop.fs.Path, long, long, java.lang.String[], java.lang.String[]" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Constructs a split with host information |
| |
| @param file the file name |
| @param start the position of the first byte in the file to process |
| @param length the number of bytes in the file to process |
| @param hosts the list of hosts containing the block, possibly null |
| @param inMemoryHosts the list of hosts containing the block in memory]]> |
| </doc> |
| </constructor> |
| <constructor name="FileSplit" type="org.apache.hadoop.mapreduce.lib.input.FileSplit" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getPath" return="org.apache.hadoop.fs.Path" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[The file containing this split's data.]]> |
| </doc> |
| </method> |
| <method name="getStart" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[The position of the first byte in the file to process.]]> |
| </doc> |
| </method> |
| <method name="getLength" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[The number of bytes in the file to process.]]> |
| </doc> |
| </method> |
| <method name="toString" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="write" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="out" type="java.io.DataOutput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="readFields" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="in" type="java.io.DataInput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getLocations" return="java.lang.String[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getLocationInfo" return="org.apache.hadoop.mapred.SplitLocationInfo[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[A section of an input file. Returned by {@link |
| InputFormat#getSplits(JobConf, int)} and passed to |
| {@link InputFormat#getRecordReader(InputSplit,JobConf,Reporter)}.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.FileSplit --> |
| <!-- start class org.apache.hadoop.mapred.FixedLengthInputFormat --> |
| <class name="FixedLengthInputFormat" extends="org.apache.hadoop.mapred.FileInputFormat" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.JobConfigurable"/> |
| <constructor name="FixedLengthInputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="setRecordLength" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <param name="recordLength" type="int"/> |
| <doc> |
| <![CDATA[Set the length of each record |
| @param conf configuration |
| @param recordLength the length of a record]]> |
| </doc> |
| </method> |
| <method name="getRecordLength" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <doc> |
| <![CDATA[Get record length value |
| @param conf configuration |
| @return the record length, zero means none was set]]> |
| </doc> |
| </method> |
| <method name="configure" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| </method> |
| <method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="genericSplit" type="org.apache.hadoop.mapred.InputSplit"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="isSplitable" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="fs" type="org.apache.hadoop.fs.FileSystem"/> |
| <param name="file" type="org.apache.hadoop.fs.Path"/> |
| </method> |
| <field name="FIXED_RECORD_LENGTH" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[FixedLengthInputFormat is an input format used to read input files |
| which contain fixed length records. The content of a record need not be |
| text. It can be arbitrary binary data. Users must configure the record |
| length property by calling: |
| FixedLengthInputFormat.setRecordLength(conf, recordLength);<br><br> or |
| conf.setInt(FixedLengthInputFormat.FIXED_RECORD_LENGTH, recordLength); |
| <br><br> |
| @see FixedLengthRecordReader]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.FixedLengthInputFormat --> |
| <!-- start class org.apache.hadoop.mapred.ID --> |
| <class name="ID" extends="org.apache.hadoop.mapreduce.ID" |
| abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="ID" type="int" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[constructs an ID object from the given int]]> |
| </doc> |
| </constructor> |
| <constructor name="ID" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </constructor> |
| <doc> |
| <![CDATA[A general identifier, which internally stores the id |
| as an integer. This is the super class of {@link JobID}, |
| {@link TaskID} and {@link TaskAttemptID}. |
| |
| @see JobID |
| @see TaskID |
| @see TaskAttemptID]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.ID --> |
| <!-- start interface org.apache.hadoop.mapred.InputFormat --> |
| <interface name="InputFormat" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="numSplits" type="int"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Logically split the set of input files for the job. |
| |
| <p>Each {@link InputSplit} is then assigned to an individual {@link Mapper} |
| for processing.</p> |
| |
| <p><i>Note</i>: The split is a <i>logical</i> split of the inputs and the |
| input files are not physically split into chunks. For e.g. a split could |
| be <i><input-file-path, start, offset></i> tuple. |
| |
| @param job job configuration. |
| @param numSplits the desired number of splits, a hint. |
| @return an array of {@link InputSplit}s for the job.]]> |
| </doc> |
| </method> |
| <method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="split" type="org.apache.hadoop.mapred.InputSplit"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get the {@link RecordReader} for the given {@link InputSplit}. |
| |
| <p>It is the responsibility of the <code>RecordReader</code> to respect |
| record boundaries while processing the logical split to present a |
| record-oriented view to the individual task.</p> |
| |
| @param split the {@link InputSplit} |
| @param job the job that this split belongs to |
| @return a {@link RecordReader}]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[<code>InputFormat</code> describes the input-specification for a |
| Map-Reduce job. |
| |
| <p>The Map-Reduce framework relies on the <code>InputFormat</code> of the |
| job to:<p> |
| <ol> |
| <li> |
| Validate the input-specification of the job. |
| <li> |
| Split-up the input file(s) into logical {@link InputSplit}s, each of |
| which is then assigned to an individual {@link Mapper}. |
| </li> |
| <li> |
| Provide the {@link RecordReader} implementation to be used to glean |
| input records from the logical <code>InputSplit</code> for processing by |
| the {@link Mapper}. |
| </li> |
| </ol> |
| |
| <p>The default behavior of file-based {@link InputFormat}s, typically |
| sub-classes of {@link FileInputFormat}, is to split the |
| input into <i>logical</i> {@link InputSplit}s based on the total size, in |
| bytes, of the input files. However, the {@link FileSystem} blocksize of |
| the input files is treated as an upper bound for input splits. A lower bound |
| on the split size can be set via |
| <a href="{@docRoot}/../hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml#mapreduce.input.fileinputformat.split.minsize"> |
| mapreduce.input.fileinputformat.split.minsize</a>.</p> |
| |
| <p>Clearly, logical splits based on input-size is insufficient for many |
| applications since record boundaries are to be respected. In such cases, the |
| application has to also implement a {@link RecordReader} on whom lies the |
| responsibilty to respect record-boundaries and present a record-oriented |
| view of the logical <code>InputSplit</code> to the individual task. |
| |
| @see InputSplit |
| @see RecordReader |
| @see JobClient |
| @see FileInputFormat]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapred.InputFormat --> |
| <!-- start interface org.apache.hadoop.mapred.InputSplit --> |
| <interface name="InputSplit" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.io.Writable"/> |
| <method name="getLength" return="long" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get the total number of bytes in the data of the <code>InputSplit</code>. |
| |
| @return the number of bytes in the input split. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getLocations" return="java.lang.String[]" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get the list of hostnames where the input split is located. |
| |
| @return list of hostnames where data of the <code>InputSplit</code> is |
| located as an array of <code>String</code>s. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[<code>InputSplit</code> represents the data to be processed by an |
| individual {@link Mapper}. |
| |
| <p>Typically, it presents a byte-oriented view on the input and is the |
| responsibility of {@link RecordReader} of the job to process this and present |
| a record-oriented view. |
| |
| @see InputFormat |
| @see RecordReader]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapred.InputSplit --> |
| <!-- start interface org.apache.hadoop.mapred.InputSplitWithLocationInfo --> |
| <interface name="InputSplitWithLocationInfo" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.InputSplit"/> |
| <method name="getLocationInfo" return="org.apache.hadoop.mapred.SplitLocationInfo[]" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Gets info about which nodes the input split is stored on and how it is |
| stored at each location. |
| |
| @return list of <code>SplitLocationInfo</code>s describing how the split |
| data is stored at each location. A null value indicates that all the |
| locations have the data stored on disk. |
| @throws IOException]]> |
| </doc> |
| </method> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapred.InputSplitWithLocationInfo --> |
| <!-- start class org.apache.hadoop.mapred.InvalidFileTypeException --> |
| <class name="InvalidFileTypeException" extends="java.io.IOException" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="InvalidFileTypeException" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <constructor name="InvalidFileTypeException" type="java.lang.String" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <doc> |
| <![CDATA[Used when file type differs from the desired file type. like |
| getting a file when a directory is expected. Or a wrong file type.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.InvalidFileTypeException --> |
| <!-- start class org.apache.hadoop.mapred.InvalidInputException --> |
| <class name="InvalidInputException" extends="java.io.IOException" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="InvalidInputException" type="java.util.List" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Create the exception with the given list. |
| @param probs the list of problems to report. this list is not copied.]]> |
| </doc> |
| </constructor> |
| <method name="getProblems" return="java.util.List" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the complete list of the problems reported. |
| @return the list of problems, which must not be modified]]> |
| </doc> |
| </method> |
| <method name="getMessage" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get a summary message of the problems found. |
| @return the concatenated messages from all of the problems.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[This class wraps a list of problems with the input, so that the user |
| can get a list of problems together instead of finding and fixing them one |
| by one.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.InvalidInputException --> |
| <!-- start class org.apache.hadoop.mapred.InvalidJobConfException --> |
| <class name="InvalidJobConfException" extends="java.io.IOException" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="InvalidJobConfException" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <constructor name="InvalidJobConfException" type="java.lang.String" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <constructor name="InvalidJobConfException" type="java.lang.String, java.lang.Throwable" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <constructor name="InvalidJobConfException" type="java.lang.Throwable" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <doc> |
| <![CDATA[This exception is thrown when jobconf misses some mendatory attributes |
| or value of some attributes is invalid.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.InvalidJobConfException --> |
| <!-- start class org.apache.hadoop.mapred.JobClient --> |
| <class name="JobClient" extends="org.apache.hadoop.mapreduce.tools.CLI" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="java.lang.AutoCloseable"/> |
| <constructor name="JobClient" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Create a job client.]]> |
| </doc> |
| </constructor> |
| <constructor name="JobClient" type="org.apache.hadoop.mapred.JobConf" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Build a job client with the given {@link JobConf}, and connect to the |
| default cluster |
| |
| @param conf the job configuration. |
| @throws IOException]]> |
| </doc> |
| </constructor> |
| <constructor name="JobClient" type="org.apache.hadoop.conf.Configuration" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Build a job client with the given {@link Configuration}, |
| and connect to the default cluster |
| |
| @param conf the configuration. |
| @throws IOException]]> |
| </doc> |
| </constructor> |
| <constructor name="JobClient" type="java.net.InetSocketAddress, org.apache.hadoop.conf.Configuration" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Build a job client, connect to the indicated job tracker. |
| |
| @param jobTrackAddr the job tracker to connect to. |
| @param conf configuration.]]> |
| </doc> |
| </constructor> |
| <method name="init" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Connect to the default cluster |
| @param conf the job configuration. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="close" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Close the <code>JobClient</code>.]]> |
| </doc> |
| </method> |
| <method name="getFs" return="org.apache.hadoop.fs.FileSystem" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get a filesystem handle. We need this to prepare jobs |
| for submission to the MapReduce system. |
| |
| @return the filesystem handle.]]> |
| </doc> |
| </method> |
| <method name="getClusterHandle" return="org.apache.hadoop.mapreduce.Cluster" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get a handle to the Cluster]]> |
| </doc> |
| </method> |
| <method name="submitJob" return="org.apache.hadoop.mapred.RunningJob" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobFile" type="java.lang.String"/> |
| <exception name="FileNotFoundException" type="java.io.FileNotFoundException"/> |
| <exception name="InvalidJobConfException" type="org.apache.hadoop.mapred.InvalidJobConfException"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Submit a job to the MR system. |
| |
| This returns a handle to the {@link RunningJob} which can be used to track |
| the running-job. |
| |
| @param jobFile the job configuration. |
| @return a handle to the {@link RunningJob} which can be used to track the |
| running-job. |
| @throws FileNotFoundException |
| @throws InvalidJobConfException |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="submitJob" return="org.apache.hadoop.mapred.RunningJob" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <exception name="FileNotFoundException" type="java.io.FileNotFoundException"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Submit a job to the MR system. |
| This returns a handle to the {@link RunningJob} which can be used to track |
| the running-job. |
| |
| @param conf the job configuration. |
| @return a handle to the {@link RunningJob} which can be used to track the |
| running-job. |
| @throws FileNotFoundException |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getJobInner" return="org.apache.hadoop.mapred.RunningJob" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="jobid" type="org.apache.hadoop.mapred.JobID"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getJob" return="org.apache.hadoop.mapred.RunningJob" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobid" type="org.apache.hadoop.mapred.JobID"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get an {@link RunningJob} object to track an ongoing job. Returns |
| null if the id does not correspond to any known job. |
| |
| @param jobid the jobid of the job. |
| @return the {@link RunningJob} handle to track the job, null if the |
| <code>jobid</code> doesn't correspond to any known job. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getJob" return="org.apache.hadoop.mapred.RunningJob" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="Applications should rather use {@link #getJob(JobID)}."> |
| <param name="jobid" type="java.lang.String"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[@deprecated Applications should rather use {@link #getJob(JobID)}.]]> |
| </doc> |
| </method> |
| <method name="getMapTaskReports" return="org.apache.hadoop.mapred.TaskReport[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobId" type="org.apache.hadoop.mapred.JobID"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get the information of the current state of the map tasks of a job. |
| |
| @param jobId the job to query. |
| @return the list of all of the map tips. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getMapTaskReports" return="org.apache.hadoop.mapred.TaskReport[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="Applications should rather use {@link #getMapTaskReports(JobID)}"> |
| <param name="jobId" type="java.lang.String"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[@deprecated Applications should rather use {@link #getMapTaskReports(JobID)}]]> |
| </doc> |
| </method> |
| <method name="getReduceTaskReports" return="org.apache.hadoop.mapred.TaskReport[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobId" type="org.apache.hadoop.mapred.JobID"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get the information of the current state of the reduce tasks of a job. |
| |
| @param jobId the job to query. |
| @return the list of all of the reduce tips. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getCleanupTaskReports" return="org.apache.hadoop.mapred.TaskReport[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobId" type="org.apache.hadoop.mapred.JobID"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get the information of the current state of the cleanup tasks of a job. |
| |
| @param jobId the job to query. |
| @return the list of all of the cleanup tips. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getSetupTaskReports" return="org.apache.hadoop.mapred.TaskReport[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobId" type="org.apache.hadoop.mapred.JobID"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get the information of the current state of the setup tasks of a job. |
| |
| @param jobId the job to query. |
| @return the list of all of the setup tips. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getReduceTaskReports" return="org.apache.hadoop.mapred.TaskReport[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="Applications should rather use {@link #getReduceTaskReports(JobID)}"> |
| <param name="jobId" type="java.lang.String"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[@deprecated Applications should rather use {@link #getReduceTaskReports(JobID)}]]> |
| </doc> |
| </method> |
| <method name="displayTasks" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobId" type="org.apache.hadoop.mapred.JobID"/> |
| <param name="type" type="java.lang.String"/> |
| <param name="state" type="java.lang.String"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Display the information about a job's tasks, of a particular type and |
| in a particular state |
| |
| @param jobId the ID of the job |
| @param type the type of the task (map/reduce/setup/cleanup) |
| @param state the state of the task |
| (pending/running/completed/failed/killed) |
| @throws IOException when there is an error communicating with the master |
| @throws IllegalArgumentException if an invalid type/state is passed]]> |
| </doc> |
| </method> |
| <method name="getClusterStatus" return="org.apache.hadoop.mapred.ClusterStatus" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get status information about the Map-Reduce cluster. |
| |
| @return the status information about the Map-Reduce cluster as an object |
| of {@link ClusterStatus}. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getClusterStatus" return="org.apache.hadoop.mapred.ClusterStatus" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="detailed" type="boolean"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get status information about the Map-Reduce cluster. |
| |
| @param detailed if true then get a detailed status including the |
| tracker names |
| @return the status information about the Map-Reduce cluster as an object |
| of {@link ClusterStatus}. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="jobsToComplete" return="org.apache.hadoop.mapred.JobStatus[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get the jobs that are not completed and not failed. |
| |
| @return array of {@link JobStatus} for the running/to-be-run jobs. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getAllJobs" return="org.apache.hadoop.mapred.JobStatus[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get the jobs that are submitted. |
| |
| @return array of {@link JobStatus} for the submitted jobs. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="runJob" return="org.apache.hadoop.mapred.RunningJob" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Utility that submits a job, then polls for progress until the job is |
| complete. |
| |
| @param job the job configuration. |
| @throws IOException if the job fails]]> |
| </doc> |
| </method> |
| <method name="monitorAndPrintJob" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="job" type="org.apache.hadoop.mapred.RunningJob"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Monitor a job and print status in real-time as progress is made and tasks |
| fail. |
| @param conf the job's configuration |
| @param job the job to track |
| @return true if the job succeeded |
| @throws IOException if communication to the JobTracker fails]]> |
| </doc> |
| </method> |
| <method name="setTaskOutputFilter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="newValue" type="org.apache.hadoop.mapred.JobClient.TaskStatusFilter"/> |
| <doc> |
| <![CDATA[Sets the output filter for tasks. only those tasks are printed whose |
| output matches the filter. |
| @param newValue task filter.]]> |
| </doc> |
| </method> |
| <method name="getTaskOutputFilter" return="org.apache.hadoop.mapred.JobClient.TaskStatusFilter" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <doc> |
| <![CDATA[Get the task output filter out of the JobConf. |
| |
| @param job the JobConf to examine. |
| @return the filter level.]]> |
| </doc> |
| </method> |
| <method name="setTaskOutputFilter" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="newValue" type="org.apache.hadoop.mapred.JobClient.TaskStatusFilter"/> |
| <doc> |
| <![CDATA[Modify the JobConf to set the task output filter. |
| |
| @param job the JobConf to modify. |
| @param newValue the value to set.]]> |
| </doc> |
| </method> |
| <method name="getTaskOutputFilter" return="org.apache.hadoop.mapred.JobClient.TaskStatusFilter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns task output filter. |
| @return task filter.]]> |
| </doc> |
| </method> |
| <method name="getCounter" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="cntrs" type="org.apache.hadoop.mapreduce.Counters"/> |
| <param name="counterGroupName" type="java.lang.String"/> |
| <param name="counterName" type="java.lang.String"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getDefaultMaps" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get status information about the max available Maps in the cluster. |
| |
| @return the max available Maps in the cluster |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getDefaultReduces" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get status information about the max available Reduces in the cluster. |
| |
| @return the max available Reduces in the cluster |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getSystemDir" return="org.apache.hadoop.fs.Path" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Grab the jobtracker system directory path where job-specific files are to be placed. |
| |
| @return the system directory where job-specific files are to be placed.]]> |
| </doc> |
| </method> |
| <method name="isJobDirValid" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobDirPath" type="org.apache.hadoop.fs.Path"/> |
| <param name="fs" type="org.apache.hadoop.fs.FileSystem"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Checks if the job directory is clean and has all the required components |
| for (re) starting the job]]> |
| </doc> |
| </method> |
| <method name="getStagingAreaDir" return="org.apache.hadoop.fs.Path" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Fetch the staging area directory for the application |
| |
| @return path to staging area directory |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getRootQueues" return="org.apache.hadoop.mapred.JobQueueInfo[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Returns an array of queue information objects about root level queues |
| configured |
| |
| @return the array of root level JobQueueInfo objects |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getChildQueues" return="org.apache.hadoop.mapred.JobQueueInfo[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="queueName" type="java.lang.String"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Returns an array of queue information objects about immediate children |
| of queue queueName. |
| |
| @param queueName |
| @return the array of immediate children JobQueueInfo objects |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getQueues" return="org.apache.hadoop.mapred.JobQueueInfo[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Return an array of queue information objects about all the Job Queues |
| configured. |
| |
| @return Array of JobQueueInfo objects |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getJobsFromQueue" return="org.apache.hadoop.mapred.JobStatus[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="queueName" type="java.lang.String"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Gets all the jobs which were added to particular Job Queue |
| |
| @param queueName name of the Job Queue |
| @return Array of jobs present in the job queue |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getQueueInfo" return="org.apache.hadoop.mapred.JobQueueInfo" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="queueName" type="java.lang.String"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Gets the queue information associated to a particular Job Queue |
| |
| @param queueName name of the job queue. |
| @return Queue information associated to particular queue. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getQueueAclsForCurrentUser" return="org.apache.hadoop.mapred.QueueAclsInfo[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Gets the Queue ACLs for current user |
| @return array of QueueAclsInfo object for current user. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getDelegationToken" return="org.apache.hadoop.security.token.Token" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="renewer" type="org.apache.hadoop.io.Text"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Get a delegation token for the user from the JobTracker. |
| @param renewer the user who can renew the token |
| @return the new token |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="renewDelegationToken" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="Use {@link Token#renew} instead"> |
| <param name="token" type="org.apache.hadoop.security.token.Token"/> |
| <exception name="SecretManager.InvalidToken" type="org.apache.hadoop.security.token.SecretManager.InvalidToken"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Renew a delegation token |
| @param token the token to renew |
| @return true if the renewal went well |
| @throws InvalidToken |
| @throws IOException |
| @deprecated Use {@link Token#renew} instead]]> |
| </doc> |
| </method> |
| <method name="cancelDelegationToken" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="Use {@link Token#cancel} instead"> |
| <param name="token" type="org.apache.hadoop.security.token.Token"/> |
| <exception name="SecretManager.InvalidToken" type="org.apache.hadoop.security.token.SecretManager.InvalidToken"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Cancel a delegation token from the JobTracker |
| @param token the token to cancel |
| @throws IOException |
| @deprecated Use {@link Token#cancel} instead]]> |
| </doc> |
| </method> |
| <method name="main" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="argv" type="java.lang.String[]"/> |
| <exception name="Exception" type="java.lang.Exception"/> |
| </method> |
| <field name="MAPREDUCE_CLIENT_RETRY_POLICY_ENABLED_KEY" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="MAPREDUCE_CLIENT_RETRY_POLICY_ENABLED_DEFAULT" type="boolean" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="MAPREDUCE_CLIENT_RETRY_POLICY_SPEC_KEY" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="MAPREDUCE_CLIENT_RETRY_POLICY_SPEC_DEFAULT" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[<code>JobClient</code> is the primary interface for the user-job to interact |
| with the cluster. |
| |
| <code>JobClient</code> provides facilities to submit jobs, track their |
| progress, access component-tasks' reports/logs, get the Map-Reduce cluster |
| status information etc. |
| |
| <p>The job submission process involves: |
| <ol> |
| <li> |
| Checking the input and output specifications of the job. |
| </li> |
| <li> |
| Computing the {@link InputSplit}s for the job. |
| </li> |
| <li> |
| Setup the requisite accounting information for the {@link DistributedCache} |
| of the job, if necessary. |
| </li> |
| <li> |
| Copying the job's jar and configuration to the map-reduce system directory |
| on the distributed file-system. |
| </li> |
| <li> |
| Submitting the job to the cluster and optionally monitoring |
| it's status. |
| </li> |
| </ol> |
| |
| Normally the user creates the application, describes various facets of the |
| job via {@link JobConf} and then uses the <code>JobClient</code> to submit |
| the job and monitor its progress. |
| |
| <p>Here is an example on how to use <code>JobClient</code>:</p> |
| <p><blockquote><pre> |
| // Create a new JobConf |
| JobConf job = new JobConf(new Configuration(), MyJob.class); |
| |
| // Specify various job-specific parameters |
| job.setJobName("myjob"); |
| |
| job.setInputPath(new Path("in")); |
| job.setOutputPath(new Path("out")); |
| |
| job.setMapperClass(MyJob.MyMapper.class); |
| job.setReducerClass(MyJob.MyReducer.class); |
| |
| // Submit the job, then poll for progress until the job is complete |
| JobClient.runJob(job); |
| </pre></blockquote> |
| |
| <b id="JobControl">Job Control</b> |
| |
| <p>At times clients would chain map-reduce jobs to accomplish complex tasks |
| which cannot be done via a single map-reduce job. This is fairly easy since |
| the output of the job, typically, goes to distributed file-system and that |
| can be used as the input for the next job.</p> |
| |
| <p>However, this also means that the onus on ensuring jobs are complete |
| (success/failure) lies squarely on the clients. In such situations the |
| various job-control options are: |
| <ol> |
| <li> |
| {@link #runJob(JobConf)} : submits the job and returns only after |
| the job has completed. |
| </li> |
| <li> |
| {@link #submitJob(JobConf)} : only submits the job, then poll the |
| returned handle to the {@link RunningJob} to query status and make |
| scheduling decisions. |
| </li> |
| <li> |
| {@link JobConf#setJobEndNotificationURI(String)} : setup a notification |
| on job-completion, thus avoiding polling. |
| </li> |
| </ol> |
| |
| @see JobConf |
| @see ClusterStatus |
| @see Tool |
| @see DistributedCache]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.JobClient --> |
| <!-- start class org.apache.hadoop.mapred.JobConf --> |
| <class name="JobConf" extends="org.apache.hadoop.conf.Configuration" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="JobConf" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Construct a map/reduce job configuration.]]> |
| </doc> |
| </constructor> |
| <constructor name="JobConf" type="java.lang.Class" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Construct a map/reduce job configuration. |
| |
| @param exampleClass a class whose containing jar is used as the job's jar.]]> |
| </doc> |
| </constructor> |
| <constructor name="JobConf" type="org.apache.hadoop.conf.Configuration" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Construct a map/reduce job configuration. |
| |
| @param conf a Configuration whose settings will be inherited.]]> |
| </doc> |
| </constructor> |
| <constructor name="JobConf" type="org.apache.hadoop.conf.Configuration, java.lang.Class" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Construct a map/reduce job configuration. |
| |
| @param conf a Configuration whose settings will be inherited. |
| @param exampleClass a class whose containing jar is used as the job's jar.]]> |
| </doc> |
| </constructor> |
| <constructor name="JobConf" type="java.lang.String" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Construct a map/reduce configuration. |
| |
| @param config a Configuration-format XML job description file.]]> |
| </doc> |
| </constructor> |
| <constructor name="JobConf" type="org.apache.hadoop.fs.Path" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Construct a map/reduce configuration. |
| |
| @param config a Configuration-format XML job description file.]]> |
| </doc> |
| </constructor> |
| <constructor name="JobConf" type="boolean" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[A new map/reduce configuration where the behavior of reading from the |
| default resources can be turned off. |
| <p> |
| If the parameter {@code loadDefaults} is false, the new instance |
| will not load resources from the default files. |
| |
| @param loadDefaults specifies whether to load from the default files]]> |
| </doc> |
| </constructor> |
| <method name="getCredentials" return="org.apache.hadoop.security.Credentials" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get credentials for the job. |
| @return credentials for the job]]> |
| </doc> |
| </method> |
| <method name="getJar" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the user jar for the map-reduce job. |
| |
| @return the user jar for the map-reduce job.]]> |
| </doc> |
| </method> |
| <method name="setJar" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jar" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the user jar for the map-reduce job. |
| |
| @param jar the user jar for the map-reduce job.]]> |
| </doc> |
| </method> |
| <method name="getJarUnpackPattern" return="java.util.regex.Pattern" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the pattern for jar contents to unpack on the tasktracker]]> |
| </doc> |
| </method> |
| <method name="setJarByClass" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="cls" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Set the job's jar file by finding an example class location. |
| |
| @param cls the example class.]]> |
| </doc> |
| </method> |
| <method name="getLocalDirs" return="java.lang.String[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="deleteLocalFiles" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Use MRAsyncDiskService.moveAndDeleteAllVolumes instead.]]> |
| </doc> |
| </method> |
| <method name="deleteLocalFiles" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="subdir" type="java.lang.String"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getLocalPath" return="org.apache.hadoop.fs.Path" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="pathString" type="java.lang.String"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Constructs a local file name. Files are distributed among configured |
| local directories.]]> |
| </doc> |
| </method> |
| <method name="getUser" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the reported username for this job. |
| |
| @return the username]]> |
| </doc> |
| </method> |
| <method name="setUser" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="user" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the reported username for this job. |
| |
| @param user the username for this job.]]> |
| </doc> |
| </method> |
| <method name="setKeepFailedTaskFiles" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="keep" type="boolean"/> |
| <doc> |
| <![CDATA[Set whether the framework should keep the intermediate files for |
| failed tasks. |
| |
| @param keep <code>true</code> if framework should keep the intermediate files |
| for failed tasks, <code>false</code> otherwise.]]> |
| </doc> |
| </method> |
| <method name="getKeepFailedTaskFiles" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Should the temporary files for failed tasks be kept? |
| |
| @return should the files be kept?]]> |
| </doc> |
| </method> |
| <method name="setKeepTaskFilesPattern" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="pattern" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set a regular expression for task names that should be kept. |
| The regular expression ".*_m_000123_0" would keep the files |
| for the first instance of map 123 that ran. |
| |
| @param pattern the java.util.regex.Pattern to match against the |
| task names.]]> |
| </doc> |
| </method> |
| <method name="getKeepTaskFilesPattern" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the regular expression that is matched against the task names |
| to see if we need to keep the files. |
| |
| @return the pattern as a string, if it was set, othewise null.]]> |
| </doc> |
| </method> |
| <method name="setWorkingDirectory" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="dir" type="org.apache.hadoop.fs.Path"/> |
| <doc> |
| <![CDATA[Set the current working directory for the default file system. |
| |
| @param dir the new current working directory.]]> |
| </doc> |
| </method> |
| <method name="getWorkingDirectory" return="org.apache.hadoop.fs.Path" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the current working directory for the default file system. |
| |
| @return the directory name.]]> |
| </doc> |
| </method> |
| <method name="setNumTasksToExecutePerJvm" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="numTasks" type="int"/> |
| <doc> |
| <![CDATA[Sets the number of tasks that a spawned task JVM should run |
| before it exits |
| @param numTasks the number of tasks to execute; defaults to 1; |
| -1 signifies no limit]]> |
| </doc> |
| </method> |
| <method name="getNumTasksToExecutePerJvm" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the number of tasks that a spawned JVM should execute]]> |
| </doc> |
| </method> |
| <method name="getInputFormat" return="org.apache.hadoop.mapred.InputFormat" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the {@link InputFormat} implementation for the map-reduce job, |
| defaults to {@link TextInputFormat} if not specified explicity. |
| |
| @return the {@link InputFormat} implementation for the map-reduce job.]]> |
| </doc> |
| </method> |
| <method name="setInputFormat" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="theClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Set the {@link InputFormat} implementation for the map-reduce job. |
| |
| @param theClass the {@link InputFormat} implementation for the map-reduce |
| job.]]> |
| </doc> |
| </method> |
| <method name="getOutputFormat" return="org.apache.hadoop.mapred.OutputFormat" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the {@link OutputFormat} implementation for the map-reduce job, |
| defaults to {@link TextOutputFormat} if not specified explicity. |
| |
| @return the {@link OutputFormat} implementation for the map-reduce job.]]> |
| </doc> |
| </method> |
| <method name="getOutputCommitter" return="org.apache.hadoop.mapred.OutputCommitter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the {@link OutputCommitter} implementation for the map-reduce job, |
| defaults to {@link FileOutputCommitter} if not specified explicitly. |
| |
| @return the {@link OutputCommitter} implementation for the map-reduce job.]]> |
| </doc> |
| </method> |
| <method name="setOutputCommitter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="theClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Set the {@link OutputCommitter} implementation for the map-reduce job. |
| |
| @param theClass the {@link OutputCommitter} implementation for the map-reduce |
| job.]]> |
| </doc> |
| </method> |
| <method name="setOutputFormat" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="theClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Set the {@link OutputFormat} implementation for the map-reduce job. |
| |
| @param theClass the {@link OutputFormat} implementation for the map-reduce |
| job.]]> |
| </doc> |
| </method> |
| <method name="setCompressMapOutput" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="compress" type="boolean"/> |
| <doc> |
| <![CDATA[Should the map outputs be compressed before transfer? |
| |
| @param compress should the map outputs be compressed?]]> |
| </doc> |
| </method> |
| <method name="getCompressMapOutput" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Are the outputs of the maps be compressed? |
| |
| @return <code>true</code> if the outputs of the maps are to be compressed, |
| <code>false</code> otherwise.]]> |
| </doc> |
| </method> |
| <method name="setMapOutputCompressorClass" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="codecClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Set the given class as the {@link CompressionCodec} for the map outputs. |
| |
| @param codecClass the {@link CompressionCodec} class that will compress |
| the map outputs.]]> |
| </doc> |
| </method> |
| <method name="getMapOutputCompressorClass" return="java.lang.Class" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="defaultValue" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Get the {@link CompressionCodec} for compressing the map outputs. |
| |
| @param defaultValue the {@link CompressionCodec} to return if not set |
| @return the {@link CompressionCodec} class that should be used to compress the |
| map outputs. |
| @throws IllegalArgumentException if the class was specified, but not found]]> |
| </doc> |
| </method> |
| <method name="getMapOutputKeyClass" return="java.lang.Class" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the key class for the map output data. If it is not set, use the |
| (final) output key class. This allows the map output key class to be |
| different than the final output key class. |
| |
| @return the map output key class.]]> |
| </doc> |
| </method> |
| <method name="setMapOutputKeyClass" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="theClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Set the key class for the map output data. This allows the user to |
| specify the map output key class to be different than the final output |
| value class. |
| |
| @param theClass the map output key class.]]> |
| </doc> |
| </method> |
| <method name="getMapOutputValueClass" return="java.lang.Class" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the value class for the map output data. If it is not set, use the |
| (final) output value class This allows the map output value class to be |
| different than the final output value class. |
| |
| @return the map output value class.]]> |
| </doc> |
| </method> |
| <method name="setMapOutputValueClass" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="theClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Set the value class for the map output data. This allows the user to |
| specify the map output value class to be different than the final output |
| value class. |
| |
| @param theClass the map output value class.]]> |
| </doc> |
| </method> |
| <method name="getOutputKeyClass" return="java.lang.Class" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the key class for the job output data. |
| |
| @return the key class for the job output data.]]> |
| </doc> |
| </method> |
| <method name="setOutputKeyClass" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="theClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Set the key class for the job output data. |
| |
| @param theClass the key class for the job output data.]]> |
| </doc> |
| </method> |
| <method name="getOutputKeyComparator" return="org.apache.hadoop.io.RawComparator" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the {@link RawComparator} comparator used to compare keys. |
| |
| @return the {@link RawComparator} comparator used to compare keys.]]> |
| </doc> |
| </method> |
| <method name="setOutputKeyComparatorClass" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="theClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Set the {@link RawComparator} comparator used to compare keys. |
| |
| @param theClass the {@link RawComparator} comparator used to |
| compare keys. |
| @see #setOutputValueGroupingComparator(Class)]]> |
| </doc> |
| </method> |
| <method name="setKeyFieldComparatorOptions" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="keySpec" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the {@link KeyFieldBasedComparator} options used to compare keys. |
| |
| @param keySpec the key specification of the form -k pos1[,pos2], where, |
| pos is of the form f[.c][opts], where f is the number |
| of the key field to use, and c is the number of the first character from |
| the beginning of the field. Fields and character posns are numbered |
| starting with 1; a character position of zero in pos2 indicates the |
| field's last character. If '.c' is omitted from pos1, it defaults to 1 |
| (the beginning of the field); if omitted from pos2, it defaults to 0 |
| (the end of the field). opts are ordering options. The supported options |
| are: |
| -n, (Sort numerically) |
| -r, (Reverse the result of comparison)]]> |
| </doc> |
| </method> |
| <method name="getKeyFieldComparatorOption" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the {@link KeyFieldBasedComparator} options]]> |
| </doc> |
| </method> |
| <method name="setKeyFieldPartitionerOptions" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="keySpec" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the {@link KeyFieldBasedPartitioner} options used for |
| {@link Partitioner} |
| |
| @param keySpec the key specification of the form -k pos1[,pos2], where, |
| pos is of the form f[.c][opts], where f is the number |
| of the key field to use, and c is the number of the first character from |
| the beginning of the field. Fields and character posns are numbered |
| starting with 1; a character position of zero in pos2 indicates the |
| field's last character. If '.c' is omitted from pos1, it defaults to 1 |
| (the beginning of the field); if omitted from pos2, it defaults to 0 |
| (the end of the field).]]> |
| </doc> |
| </method> |
| <method name="getKeyFieldPartitionerOption" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the {@link KeyFieldBasedPartitioner} options]]> |
| </doc> |
| </method> |
| <method name="getCombinerKeyGroupingComparator" return="org.apache.hadoop.io.RawComparator" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the user defined {@link WritableComparable} comparator for |
| grouping keys of inputs to the combiner. |
| |
| @return comparator set by the user for grouping values. |
| @see #setCombinerKeyGroupingComparator(Class) for details.]]> |
| </doc> |
| </method> |
| <method name="getOutputValueGroupingComparator" return="org.apache.hadoop.io.RawComparator" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the user defined {@link WritableComparable} comparator for |
| grouping keys of inputs to the reduce. |
| |
| @return comparator set by the user for grouping values. |
| @see #setOutputValueGroupingComparator(Class) for details.]]> |
| </doc> |
| </method> |
| <method name="setCombinerKeyGroupingComparator" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="theClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Set the user defined {@link RawComparator} comparator for |
| grouping keys in the input to the combiner. |
| |
| <p>This comparator should be provided if the equivalence rules for keys |
| for sorting the intermediates are different from those for grouping keys |
| before each call to |
| {@link Reducer#reduce(Object, java.util.Iterator, OutputCollector, Reporter)}.</p> |
| |
| <p>For key-value pairs (K1,V1) and (K2,V2), the values (V1, V2) are passed |
| in a single call to the reduce function if K1 and K2 compare as equal.</p> |
| |
| <p>Since {@link #setOutputKeyComparatorClass(Class)} can be used to control |
| how keys are sorted, this can be used in conjunction to simulate |
| <i>secondary sort on values</i>.</p> |
| |
| <p><i>Note</i>: This is not a guarantee of the combiner sort being |
| <i>stable</i> in any sense. (In any case, with the order of available |
| map-outputs to the combiner being non-deterministic, it wouldn't make |
| that much sense.)</p> |
| |
| @param theClass the comparator class to be used for grouping keys for the |
| combiner. It should implement <code>RawComparator</code>. |
| @see #setOutputKeyComparatorClass(Class)]]> |
| </doc> |
| </method> |
| <method name="setOutputValueGroupingComparator" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="theClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Set the user defined {@link RawComparator} comparator for |
| grouping keys in the input to the reduce. |
| |
| <p>This comparator should be provided if the equivalence rules for keys |
| for sorting the intermediates are different from those for grouping keys |
| before each call to |
| {@link Reducer#reduce(Object, java.util.Iterator, OutputCollector, Reporter)}.</p> |
| |
| <p>For key-value pairs (K1,V1) and (K2,V2), the values (V1, V2) are passed |
| in a single call to the reduce function if K1 and K2 compare as equal.</p> |
| |
| <p>Since {@link #setOutputKeyComparatorClass(Class)} can be used to control |
| how keys are sorted, this can be used in conjunction to simulate |
| <i>secondary sort on values</i>.</p> |
| |
| <p><i>Note</i>: This is not a guarantee of the reduce sort being |
| <i>stable</i> in any sense. (In any case, with the order of available |
| map-outputs to the reduce being non-deterministic, it wouldn't make |
| that much sense.)</p> |
| |
| @param theClass the comparator class to be used for grouping keys. |
| It should implement <code>RawComparator</code>. |
| @see #setOutputKeyComparatorClass(Class) |
| @see #setCombinerKeyGroupingComparator(Class)]]> |
| </doc> |
| </method> |
| <method name="getUseNewMapper" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Should the framework use the new context-object code for running |
| the mapper? |
| @return true, if the new api should be used]]> |
| </doc> |
| </method> |
| <method name="setUseNewMapper" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="flag" type="boolean"/> |
| <doc> |
| <![CDATA[Set whether the framework should use the new api for the mapper. |
| This is the default for jobs submitted with the new Job api. |
| @param flag true, if the new api should be used]]> |
| </doc> |
| </method> |
| <method name="getUseNewReducer" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Should the framework use the new context-object code for running |
| the reducer? |
| @return true, if the new api should be used]]> |
| </doc> |
| </method> |
| <method name="setUseNewReducer" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="flag" type="boolean"/> |
| <doc> |
| <![CDATA[Set whether the framework should use the new api for the reducer. |
| This is the default for jobs submitted with the new Job api. |
| @param flag true, if the new api should be used]]> |
| </doc> |
| </method> |
| <method name="getOutputValueClass" return="java.lang.Class" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the value class for job outputs. |
| |
| @return the value class for job outputs.]]> |
| </doc> |
| </method> |
| <method name="setOutputValueClass" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="theClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Set the value class for job outputs. |
| |
| @param theClass the value class for job outputs.]]> |
| </doc> |
| </method> |
| <method name="getMapperClass" return="java.lang.Class" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the {@link Mapper} class for the job. |
| |
| @return the {@link Mapper} class for the job.]]> |
| </doc> |
| </method> |
| <method name="setMapperClass" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="theClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Set the {@link Mapper} class for the job. |
| |
| @param theClass the {@link Mapper} class for the job.]]> |
| </doc> |
| </method> |
| <method name="getMapRunnerClass" return="java.lang.Class" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the {@link MapRunnable} class for the job. |
| |
| @return the {@link MapRunnable} class for the job.]]> |
| </doc> |
| </method> |
| <method name="setMapRunnerClass" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="theClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Expert: Set the {@link MapRunnable} class for the job. |
| |
| Typically used to exert greater control on {@link Mapper}s. |
| |
| @param theClass the {@link MapRunnable} class for the job.]]> |
| </doc> |
| </method> |
| <method name="getPartitionerClass" return="java.lang.Class" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the {@link Partitioner} used to partition {@link Mapper}-outputs |
| to be sent to the {@link Reducer}s. |
| |
| @return the {@link Partitioner} used to partition map-outputs.]]> |
| </doc> |
| </method> |
| <method name="setPartitionerClass" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="theClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Set the {@link Partitioner} class used to partition |
| {@link Mapper}-outputs to be sent to the {@link Reducer}s. |
| |
| @param theClass the {@link Partitioner} used to partition map-outputs.]]> |
| </doc> |
| </method> |
| <method name="getReducerClass" return="java.lang.Class" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the {@link Reducer} class for the job. |
| |
| @return the {@link Reducer} class for the job.]]> |
| </doc> |
| </method> |
| <method name="setReducerClass" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="theClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Set the {@link Reducer} class for the job. |
| |
| @param theClass the {@link Reducer} class for the job.]]> |
| </doc> |
| </method> |
| <method name="getCombinerClass" return="java.lang.Class" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the user-defined <i>combiner</i> class used to combine map-outputs |
| before being sent to the reducers. Typically the combiner is same as the |
| the {@link Reducer} for the job i.e. {@link #getReducerClass()}. |
| |
| @return the user-defined combiner class used to combine map-outputs.]]> |
| </doc> |
| </method> |
| <method name="setCombinerClass" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="theClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Set the user-defined <i>combiner</i> class used to combine map-outputs |
| before being sent to the reducers. |
| |
| <p>The combiner is an application-specified aggregation operation, which |
| can help cut down the amount of data transferred between the |
| {@link Mapper} and the {@link Reducer}, leading to better performance.</p> |
| |
| <p>The framework may invoke the combiner 0, 1, or multiple times, in both |
| the mapper and reducer tasks. In general, the combiner is called as the |
| sort/merge result is written to disk. The combiner must: |
| <ul> |
| <li> be side-effect free</li> |
| <li> have the same input and output key types and the same input and |
| output value types</li> |
| </ul> |
| |
| <p>Typically the combiner is same as the <code>Reducer</code> for the |
| job i.e. {@link #setReducerClass(Class)}.</p> |
| |
| @param theClass the user-defined combiner class used to combine |
| map-outputs.]]> |
| </doc> |
| </method> |
| <method name="getSpeculativeExecution" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Should speculative execution be used for this job? |
| Defaults to <code>true</code>. |
| |
| @return <code>true</code> if speculative execution be used for this job, |
| <code>false</code> otherwise.]]> |
| </doc> |
| </method> |
| <method name="setSpeculativeExecution" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="speculativeExecution" type="boolean"/> |
| <doc> |
| <![CDATA[Turn speculative execution on or off for this job. |
| |
| @param speculativeExecution <code>true</code> if speculative execution |
| should be turned on, else <code>false</code>.]]> |
| </doc> |
| </method> |
| <method name="getMapSpeculativeExecution" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Should speculative execution be used for this job for map tasks? |
| Defaults to <code>true</code>. |
| |
| @return <code>true</code> if speculative execution be |
| used for this job for map tasks, |
| <code>false</code> otherwise.]]> |
| </doc> |
| </method> |
| <method name="setMapSpeculativeExecution" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="speculativeExecution" type="boolean"/> |
| <doc> |
| <![CDATA[Turn speculative execution on or off for this job for map tasks. |
| |
| @param speculativeExecution <code>true</code> if speculative execution |
| should be turned on for map tasks, |
| else <code>false</code>.]]> |
| </doc> |
| </method> |
| <method name="getReduceSpeculativeExecution" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Should speculative execution be used for this job for reduce tasks? |
| Defaults to <code>true</code>. |
| |
| @return <code>true</code> if speculative execution be used |
| for reduce tasks for this job, |
| <code>false</code> otherwise.]]> |
| </doc> |
| </method> |
| <method name="setReduceSpeculativeExecution" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="speculativeExecution" type="boolean"/> |
| <doc> |
| <![CDATA[Turn speculative execution on or off for this job for reduce tasks. |
| |
| @param speculativeExecution <code>true</code> if speculative execution |
| should be turned on for reduce tasks, |
| else <code>false</code>.]]> |
| </doc> |
| </method> |
| <method name="getNumMapTasks" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the configured number of map tasks for this job. |
| Defaults to <code>1</code>. |
| |
| @return the number of map tasks for this job.]]> |
| </doc> |
| </method> |
| <method name="setNumMapTasks" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="n" type="int"/> |
| <doc> |
| <![CDATA[Set the number of map tasks for this job. |
| |
| <p><i>Note</i>: This is only a <i>hint</i> to the framework. The actual |
| number of spawned map tasks depends on the number of {@link InputSplit}s |
| generated by the job's {@link InputFormat#getSplits(JobConf, int)}. |
| |
| A custom {@link InputFormat} is typically used to accurately control |
| the number of map tasks for the job.</p> |
| |
| <b id="NoOfMaps">How many maps?</b> |
| |
| <p>The number of maps is usually driven by the total size of the inputs |
| i.e. total number of blocks of the input files.</p> |
| |
| <p>The right level of parallelism for maps seems to be around 10-100 maps |
| per-node, although it has been set up to 300 or so for very cpu-light map |
| tasks. Task setup takes awhile, so it is best if the maps take at least a |
| minute to execute.</p> |
| |
| <p>The default behavior of file-based {@link InputFormat}s is to split the |
| input into <i>logical</i> {@link InputSplit}s based on the total size, in |
| bytes, of input files. However, the {@link FileSystem} blocksize of the |
| input files is treated as an upper bound for input splits. A lower bound |
| on the split size can be set via |
| <a href="{@docRoot}/../hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml#mapreduce.input.fileinputformat.split.minsize"> |
| mapreduce.input.fileinputformat.split.minsize</a>.</p> |
| |
| <p>Thus, if you expect 10TB of input data and have a blocksize of 128MB, |
| you'll end up with 82,000 maps, unless {@link #setNumMapTasks(int)} is |
| used to set it even higher.</p> |
| |
| @param n the number of map tasks for this job. |
| @see InputFormat#getSplits(JobConf, int) |
| @see FileInputFormat |
| @see FileSystem#getDefaultBlockSize() |
| @see FileStatus#getBlockSize()]]> |
| </doc> |
| </method> |
| <method name="getNumReduceTasks" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the configured number of reduce tasks for this job. Defaults to |
| <code>1</code>. |
| |
| @return the number of reduce tasks for this job.]]> |
| </doc> |
| </method> |
| <method name="setNumReduceTasks" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="n" type="int"/> |
| <doc> |
| <![CDATA[Set the requisite number of reduce tasks for this job. |
| |
| <b id="NoOfReduces">How many reduces?</b> |
| |
| <p>The right number of reduces seems to be <code>0.95</code> or |
| <code>1.75</code> multiplied by ( |
| <i>available memory for reduce tasks</i> |
| (The value of this should be smaller than |
| numNodes * yarn.nodemanager.resource.memory-mb |
| since the resource of memory is shared by map tasks and other |
| applications) / |
| <a href="{@docRoot}/../hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml#mapreduce.reduce.memory.mb"> |
| mapreduce.reduce.memory.mb</a>). |
| </p> |
| |
| <p>With <code>0.95</code> all of the reduces can launch immediately and |
| start transfering map outputs as the maps finish. With <code>1.75</code> |
| the faster nodes will finish their first round of reduces and launch a |
| second wave of reduces doing a much better job of load balancing.</p> |
| |
| <p>Increasing the number of reduces increases the framework overhead, but |
| increases load balancing and lowers the cost of failures.</p> |
| |
| <p>The scaling factors above are slightly less than whole numbers to |
| reserve a few reduce slots in the framework for speculative-tasks, failures |
| etc.</p> |
| |
| <b id="ReducerNone">Reducer NONE</b> |
| |
| <p>It is legal to set the number of reduce-tasks to <code>zero</code>.</p> |
| |
| <p>In this case the output of the map-tasks directly go to distributed |
| file-system, to the path set by |
| {@link FileOutputFormat#setOutputPath(JobConf, Path)}. Also, the |
| framework doesn't sort the map-outputs before writing it out to HDFS.</p> |
| |
| @param n the number of reduce tasks for this job.]]> |
| </doc> |
| </method> |
| <method name="getMaxMapAttempts" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the configured number of maximum attempts that will be made to run a |
| map task, as specified by the <code>mapreduce.map.maxattempts</code> |
| property. If this property is not already set, the default is 4 attempts. |
| |
| @return the max number of attempts per map task.]]> |
| </doc> |
| </method> |
| <method name="setMaxMapAttempts" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="n" type="int"/> |
| <doc> |
| <![CDATA[Expert: Set the number of maximum attempts that will be made to run a |
| map task. |
| |
| @param n the number of attempts per map task.]]> |
| </doc> |
| </method> |
| <method name="getMaxReduceAttempts" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the configured number of maximum attempts that will be made to run a |
| reduce task, as specified by the <code>mapreduce.reduce.maxattempts</code> |
| property. If this property is not already set, the default is 4 attempts. |
| |
| @return the max number of attempts per reduce task.]]> |
| </doc> |
| </method> |
| <method name="setMaxReduceAttempts" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="n" type="int"/> |
| <doc> |
| <![CDATA[Expert: Set the number of maximum attempts that will be made to run a |
| reduce task. |
| |
| @param n the number of attempts per reduce task.]]> |
| </doc> |
| </method> |
| <method name="getJobName" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the user-specified job name. This is only used to identify the |
| job to the user. |
| |
| @return the job's name, defaulting to "".]]> |
| </doc> |
| </method> |
| <method name="setJobName" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="name" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the user-specified job name. |
| |
| @param name the job's new name.]]> |
| </doc> |
| </method> |
| <method name="getSessionId" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the user-specified session identifier. The default is the empty string. |
| |
| The session identifier is used to tag metric data that is reported to some |
| performance metrics system via the org.apache.hadoop.metrics API. The |
| session identifier is intended, in particular, for use by Hadoop-On-Demand |
| (HOD) which allocates a virtual Hadoop cluster dynamically and transiently. |
| HOD will set the session identifier by modifying the mapred-site.xml file |
| before starting the cluster. |
| |
| When not running under HOD, this identifer is expected to remain set to |
| the empty string. |
| |
| @return the session identifier, defaulting to "".]]> |
| </doc> |
| </method> |
| <method name="setSessionId" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="sessionId" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the user-specified session identifier. |
| |
| @param sessionId the new session id.]]> |
| </doc> |
| </method> |
| <method name="setMaxTaskFailuresPerTracker" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="noFailures" type="int"/> |
| <doc> |
| <![CDATA[Set the maximum no. of failures of a given job per tasktracker. |
| If the no. of task failures exceeds <code>noFailures</code>, the |
| tasktracker is <i>blacklisted</i> for this job. |
| |
| @param noFailures maximum no. of failures of a given job per tasktracker.]]> |
| </doc> |
| </method> |
| <method name="getMaxTaskFailuresPerTracker" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Expert: Get the maximum no. of failures of a given job per tasktracker. |
| If the no. of task failures exceeds this, the tasktracker is |
| <i>blacklisted</i> for this job. |
| |
| @return the maximum no. of failures of a given job per tasktracker.]]> |
| </doc> |
| </method> |
| <method name="getMaxMapTaskFailuresPercent" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the maximum percentage of map tasks that can fail without |
| the job being aborted. |
| |
| Each map task is executed a minimum of {@link #getMaxMapAttempts()} |
| attempts before being declared as <i>failed</i>. |
| |
| Defaults to <code>zero</code>, i.e. <i>any</i> failed map-task results in |
| the job being declared as {@link JobStatus#FAILED}. |
| |
| @return the maximum percentage of map tasks that can fail without |
| the job being aborted.]]> |
| </doc> |
| </method> |
| <method name="setMaxMapTaskFailuresPercent" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="percent" type="int"/> |
| <doc> |
| <![CDATA[Expert: Set the maximum percentage of map tasks that can fail without the |
| job being aborted. |
| |
| Each map task is executed a minimum of {@link #getMaxMapAttempts} attempts |
| before being declared as <i>failed</i>. |
| |
| @param percent the maximum percentage of map tasks that can fail without |
| the job being aborted.]]> |
| </doc> |
| </method> |
| <method name="getMaxReduceTaskFailuresPercent" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the maximum percentage of reduce tasks that can fail without |
| the job being aborted. |
| |
| Each reduce task is executed a minimum of {@link #getMaxReduceAttempts()} |
| attempts before being declared as <i>failed</i>. |
| |
| Defaults to <code>zero</code>, i.e. <i>any</i> failed reduce-task results |
| in the job being declared as {@link JobStatus#FAILED}. |
| |
| @return the maximum percentage of reduce tasks that can fail without |
| the job being aborted.]]> |
| </doc> |
| </method> |
| <method name="setMaxReduceTaskFailuresPercent" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="percent" type="int"/> |
| <doc> |
| <![CDATA[Set the maximum percentage of reduce tasks that can fail without the job |
| being aborted. |
| |
| Each reduce task is executed a minimum of {@link #getMaxReduceAttempts()} |
| attempts before being declared as <i>failed</i>. |
| |
| @param percent the maximum percentage of reduce tasks that can fail without |
| the job being aborted.]]> |
| </doc> |
| </method> |
| <method name="setJobPriority" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="prio" type="org.apache.hadoop.mapred.JobPriority"/> |
| <doc> |
| <![CDATA[Set {@link JobPriority} for this job. |
| |
| @param prio the {@link JobPriority} for this job.]]> |
| </doc> |
| </method> |
| <method name="setJobPriorityAsInteger" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="prio" type="int"/> |
| <doc> |
| <![CDATA[Set {@link JobPriority} for this job. |
| |
| @param prio the {@link JobPriority} for this job.]]> |
| </doc> |
| </method> |
| <method name="getJobPriority" return="org.apache.hadoop.mapred.JobPriority" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the {@link JobPriority} for this job. |
| |
| @return the {@link JobPriority} for this job.]]> |
| </doc> |
| </method> |
| <method name="getJobPriorityAsInteger" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the priority for this job. |
| |
| @return the priority for this job.]]> |
| </doc> |
| </method> |
| <method name="getProfileEnabled" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get whether the task profiling is enabled. |
| @return true if some tasks will be profiled]]> |
| </doc> |
| </method> |
| <method name="setProfileEnabled" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="newValue" type="boolean"/> |
| <doc> |
| <![CDATA[Set whether the system should collect profiler information for some of |
| the tasks in this job? The information is stored in the user log |
| directory. |
| @param newValue true means it should be gathered]]> |
| </doc> |
| </method> |
| <method name="getProfileParams" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the profiler configuration arguments. |
| |
| The default value for this property is |
| "-agentlib:hprof=cpu=samples,heap=sites,force=n,thread=y,verbose=n,file=%s" |
| |
| @return the parameters to pass to the task child to configure profiling]]> |
| </doc> |
| </method> |
| <method name="setProfileParams" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="value" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the profiler configuration arguments. If the string contains a '%s' it |
| will be replaced with the name of the profiling output file when the task |
| runs. |
| |
| This value is passed to the task child JVM on the command line. |
| |
| @param value the configuration string]]> |
| </doc> |
| </method> |
| <method name="getProfileTaskRange" return="org.apache.hadoop.conf.Configuration.IntegerRanges" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="isMap" type="boolean"/> |
| <doc> |
| <![CDATA[Get the range of maps or reduces to profile. |
| @param isMap is the task a map? |
| @return the task ranges]]> |
| </doc> |
| </method> |
| <method name="setProfileTaskRange" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="isMap" type="boolean"/> |
| <param name="newValue" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the ranges of maps or reduces to profile. setProfileEnabled(true) |
| must also be called. |
| @param newValue a set of integer ranges of the map ids]]> |
| </doc> |
| </method> |
| <method name="setMapDebugScript" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="mDbgScript" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the debug script to run when the map tasks fail. |
| |
| <p>The debug script can aid debugging of failed map tasks. The script is |
| given task's stdout, stderr, syslog, jobconf files as arguments.</p> |
| |
| <p>The debug command, run on the node where the map failed, is:</p> |
| <p><blockquote><pre> |
| $script $stdout $stderr $syslog $jobconf. |
| </pre></blockquote> |
| |
| <p> The script file is distributed through {@link DistributedCache} |
| APIs. The script needs to be symlinked. </p> |
| |
| <p>Here is an example on how to submit a script |
| <p><blockquote><pre> |
| job.setMapDebugScript("./myscript"); |
| DistributedCache.createSymlink(job); |
| DistributedCache.addCacheFile("/debug/scripts/myscript#myscript"); |
| </pre></blockquote> |
| |
| @param mDbgScript the script name]]> |
| </doc> |
| </method> |
| <method name="getMapDebugScript" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the map task's debug script. |
| |
| @return the debug Script for the mapred job for failed map tasks. |
| @see #setMapDebugScript(String)]]> |
| </doc> |
| </method> |
| <method name="setReduceDebugScript" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="rDbgScript" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the debug script to run when the reduce tasks fail. |
| |
| <p>The debug script can aid debugging of failed reduce tasks. The script |
| is given task's stdout, stderr, syslog, jobconf files as arguments.</p> |
| |
| <p>The debug command, run on the node where the map failed, is:</p> |
| <p><blockquote><pre> |
| $script $stdout $stderr $syslog $jobconf. |
| </pre></blockquote> |
| |
| <p> The script file is distributed through {@link DistributedCache} |
| APIs. The script file needs to be symlinked </p> |
| |
| <p>Here is an example on how to submit a script |
| <p><blockquote><pre> |
| job.setReduceDebugScript("./myscript"); |
| DistributedCache.createSymlink(job); |
| DistributedCache.addCacheFile("/debug/scripts/myscript#myscript"); |
| </pre></blockquote> |
| |
| @param rDbgScript the script name]]> |
| </doc> |
| </method> |
| <method name="getReduceDebugScript" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the reduce task's debug Script |
| |
| @return the debug script for the mapred job for failed reduce tasks. |
| @see #setReduceDebugScript(String)]]> |
| </doc> |
| </method> |
| <method name="getJobEndNotificationURI" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the uri to be invoked in-order to send a notification after the job |
| has completed (success/failure). |
| |
| @return the job end notification uri, <code>null</code> if it hasn't |
| been set. |
| @see #setJobEndNotificationURI(String)]]> |
| </doc> |
| </method> |
| <method name="setJobEndNotificationURI" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="uri" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the uri to be invoked in-order to send a notification after the job |
| has completed (success/failure). |
| |
| <p>The uri can contain 2 special parameters: <tt>$jobId</tt> and |
| <tt>$jobStatus</tt>. Those, if present, are replaced by the job's |
| identifier and completion-status respectively.</p> |
| |
| <p>This is typically used by application-writers to implement chaining of |
| Map-Reduce jobs in an <i>asynchronous manner</i>.</p> |
| |
| @param uri the job end notification uri |
| @see JobStatus]]> |
| </doc> |
| </method> |
| <method name="getJobLocalDir" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get job-specific shared directory for use as scratch space |
| |
| <p> |
| When a job starts, a shared directory is created at location |
| <code> |
| ${mapreduce.cluster.local.dir}/taskTracker/$user/jobcache/$jobid/work/ </code>. |
| This directory is exposed to the users through |
| <code>mapreduce.job.local.dir </code>. |
| So, the tasks can use this space |
| as scratch space and share files among them. </p> |
| This value is available as System property also. |
| |
| @return The localized job specific shared directory]]> |
| </doc> |
| </method> |
| <method name="getMemoryForMapTask" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get memory required to run a map task of the job, in MB. |
| |
| If a value is specified in the configuration, it is returned. |
| Else, it returns {@link JobContext#DEFAULT_MAP_MEMORY_MB}. |
| <p> |
| For backward compatibility, if the job configuration sets the |
| key {@link #MAPRED_TASK_MAXVMEM_PROPERTY} to a value different |
| from {@link #DISABLED_MEMORY_LIMIT}, that value will be used |
| after converting it from bytes to MB. |
| @return memory required to run a map task of the job, in MB,]]> |
| </doc> |
| </method> |
| <method name="setMemoryForMapTask" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="mem" type="long"/> |
| </method> |
| <method name="getMemoryForReduceTask" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get memory required to run a reduce task of the job, in MB. |
| |
| If a value is specified in the configuration, it is returned. |
| Else, it returns {@link JobContext#DEFAULT_REDUCE_MEMORY_MB}. |
| <p> |
| For backward compatibility, if the job configuration sets the |
| key {@link #MAPRED_TASK_MAXVMEM_PROPERTY} to a value different |
| from {@link #DISABLED_MEMORY_LIMIT}, that value will be used |
| after converting it from bytes to MB. |
| @return memory required to run a reduce task of the job, in MB.]]> |
| </doc> |
| </method> |
| <method name="setMemoryForReduceTask" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="mem" type="long"/> |
| </method> |
| <method name="getQueueName" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Return the name of the queue to which this job is submitted. |
| Defaults to 'default'. |
| |
| @return name of the queue]]> |
| </doc> |
| </method> |
| <method name="setQueueName" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="queueName" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the name of the queue to which this job should be submitted. |
| |
| @param queueName Name of the queue]]> |
| </doc> |
| </method> |
| <method name="normalizeMemoryConfigValue" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="val" type="long"/> |
| <doc> |
| <![CDATA[Normalize the negative values in configuration |
| |
| @param val |
| @return normalized value]]> |
| </doc> |
| </method> |
| <method name="findContainingJar" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="my_class" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Find a jar that contains a class of the same name, if any. |
| It will return a jar file, even if that is not the first thing |
| on the class path that has a class with the same name. |
| |
| @param my_class the class to find. |
| @return a jar file that contains the class, or null.]]> |
| </doc> |
| </method> |
| <method name="getMaxVirtualMemoryForTask" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="Use {@link #getMemoryForMapTask()} and |
| {@link #getMemoryForReduceTask()}"> |
| <doc> |
| <![CDATA[Get the memory required to run a task of this job, in bytes. See |
| {@link #MAPRED_TASK_MAXVMEM_PROPERTY} |
| <p> |
| This method is deprecated. Now, different memory limits can be |
| set for map and reduce tasks of a job, in MB. |
| <p> |
| For backward compatibility, if the job configuration sets the |
| key {@link #MAPRED_TASK_MAXVMEM_PROPERTY}, that value is returned. |
| Otherwise, this method will return the larger of the values returned by |
| {@link #getMemoryForMapTask()} and {@link #getMemoryForReduceTask()} |
| after converting them into bytes. |
| |
| @return Memory required to run a task of this job, in bytes. |
| @see #setMaxVirtualMemoryForTask(long) |
| @deprecated Use {@link #getMemoryForMapTask()} and |
| {@link #getMemoryForReduceTask()}]]> |
| </doc> |
| </method> |
| <method name="setMaxVirtualMemoryForTask" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="Use {@link #setMemoryForMapTask(long mem)} and |
| Use {@link #setMemoryForReduceTask(long mem)}"> |
| <param name="vmem" type="long"/> |
| <doc> |
| <![CDATA[Set the maximum amount of memory any task of this job can use. See |
| {@link #MAPRED_TASK_MAXVMEM_PROPERTY} |
| <p> |
| mapred.task.maxvmem is split into |
| mapreduce.map.memory.mb |
| and mapreduce.map.memory.mb,mapred |
| each of the new key are set |
| as mapred.task.maxvmem / 1024 |
| as new values are in MB |
| |
| @param vmem Maximum amount of virtual memory in bytes any task of this job |
| can use. |
| @see #getMaxVirtualMemoryForTask() |
| @deprecated |
| Use {@link #setMemoryForMapTask(long mem)} and |
| Use {@link #setMemoryForReduceTask(long mem)}]]> |
| </doc> |
| </method> |
| <method name="getMaxPhysicalMemoryForTask" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="this variable is deprecated and nolonger in use."> |
| <doc> |
| <![CDATA[@deprecated this variable is deprecated and nolonger in use.]]> |
| </doc> |
| </method> |
| <method name="setMaxPhysicalMemoryForTask" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="mem" type="long"/> |
| </method> |
| <method name="main" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="args" type="java.lang.String[]"/> |
| <exception name="Exception" type="java.lang.Exception"/> |
| </method> |
| <field name="MAPRED_TASK_MAXVMEM_PROPERTY" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="Use {@link #MAPREDUCE_JOB_MAP_MEMORY_MB_PROPERTY} and |
| {@link #MAPREDUCE_JOB_REDUCE_MEMORY_MB_PROPERTY}"> |
| <doc> |
| <![CDATA[@deprecated Use {@link #MAPREDUCE_JOB_MAP_MEMORY_MB_PROPERTY} and |
| {@link #MAPREDUCE_JOB_REDUCE_MEMORY_MB_PROPERTY}]]> |
| </doc> |
| </field> |
| <field name="UPPER_LIMIT_ON_TASK_VMEM_PROPERTY" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="deprecated, no comment"> |
| <doc> |
| <![CDATA[@deprecated]]> |
| </doc> |
| </field> |
| <field name="MAPRED_TASK_DEFAULT_MAXVMEM_PROPERTY" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="deprecated, no comment"> |
| <doc> |
| <![CDATA[@deprecated]]> |
| </doc> |
| </field> |
| <field name="MAPRED_TASK_MAXPMEM_PROPERTY" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="deprecated, no comment"> |
| <doc> |
| <![CDATA[@deprecated]]> |
| </doc> |
| </field> |
| <field name="DISABLED_MEMORY_LIMIT" type="long" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[A value which if set for memory related configuration options, |
| indicates that the options are turned off. |
| Deprecated because it makes no sense in the context of MR2.]]> |
| </doc> |
| </field> |
| <field name="MAPRED_LOCAL_DIR_PROPERTY" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Property name for the configuration property mapreduce.cluster.local.dir]]> |
| </doc> |
| </field> |
| <field name="DEFAULT_QUEUE_NAME" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Name of the queue to which jobs will be submitted, if no queue |
| name is mentioned.]]> |
| </doc> |
| </field> |
| <field name="MAPRED_JOB_MAP_MEMORY_MB_PROPERTY" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[The variable is kept for M/R 1.x applications, while M/R 2.x applications |
| should use {@link #MAPREDUCE_JOB_MAP_MEMORY_MB_PROPERTY}]]> |
| </doc> |
| </field> |
| <field name="MAPRED_JOB_REDUCE_MEMORY_MB_PROPERTY" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[The variable is kept for M/R 1.x applications, while M/R 2.x applications |
| should use {@link #MAPREDUCE_JOB_REDUCE_MEMORY_MB_PROPERTY}]]> |
| </doc> |
| </field> |
| <field name="UNPACK_JAR_PATTERN_DEFAULT" type="java.util.regex.Pattern" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Pattern for the default unpacking behavior for job jars]]> |
| </doc> |
| </field> |
| <field name="MAPRED_TASK_JAVA_OPTS" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="Use {@link #MAPRED_MAP_TASK_JAVA_OPTS} or |
| {@link #MAPRED_REDUCE_TASK_JAVA_OPTS}"> |
| <doc> |
| <![CDATA[Configuration key to set the java command line options for the child |
| map and reduce tasks. |
| |
| Java opts for the task tracker child processes. |
| The following symbol, if present, will be interpolated: @taskid@. |
| It is replaced by current TaskID. Any other occurrences of '@' will go |
| unchanged. |
| For example, to enable verbose gc logging to a file named for the taskid in |
| /tmp and to set the heap maximum to be a gigabyte, pass a 'value' of: |
| -Xmx1024m -verbose:gc -Xloggc:/tmp/@taskid@.gc |
| |
| The configuration variable {@link #MAPRED_TASK_ENV} can be used to pass |
| other environment variables to the child processes. |
| |
| @deprecated Use {@link #MAPRED_MAP_TASK_JAVA_OPTS} or |
| {@link #MAPRED_REDUCE_TASK_JAVA_OPTS}]]> |
| </doc> |
| </field> |
| <field name="MAPRED_MAP_TASK_JAVA_OPTS" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Configuration key to set the java command line options for the map tasks. |
| |
| Java opts for the task tracker child map processes. |
| The following symbol, if present, will be interpolated: @taskid@. |
| It is replaced by current TaskID. Any other occurrences of '@' will go |
| unchanged. |
| For example, to enable verbose gc logging to a file named for the taskid in |
| /tmp and to set the heap maximum to be a gigabyte, pass a 'value' of: |
| -Xmx1024m -verbose:gc -Xloggc:/tmp/@taskid@.gc |
| |
| The configuration variable {@link #MAPRED_MAP_TASK_ENV} can be used to pass |
| other environment variables to the map processes.]]> |
| </doc> |
| </field> |
| <field name="MAPRED_REDUCE_TASK_JAVA_OPTS" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Configuration key to set the java command line options for the reduce tasks. |
| |
| Java opts for the task tracker child reduce processes. |
| The following symbol, if present, will be interpolated: @taskid@. |
| It is replaced by current TaskID. Any other occurrences of '@' will go |
| unchanged. |
| For example, to enable verbose gc logging to a file named for the taskid in |
| /tmp and to set the heap maximum to be a gigabyte, pass a 'value' of: |
| -Xmx1024m -verbose:gc -Xloggc:/tmp/@taskid@.gc |
| |
| The configuration variable {@link #MAPRED_REDUCE_TASK_ENV} can be used to |
| pass process environment variables to the reduce processes.]]> |
| </doc> |
| </field> |
| <field name="DEFAULT_MAPRED_TASK_JAVA_OPTS" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="MAPRED_TASK_ULIMIT" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="Configuration key to set the maximum virtual memory available to the child |
| map and reduce tasks (in kilo-bytes). This has been deprecated and will no |
| longer have any effect."> |
| <doc> |
| <![CDATA[@deprecated |
| Configuration key to set the maximum virtual memory available to the child |
| map and reduce tasks (in kilo-bytes). This has been deprecated and will no |
| longer have any effect.]]> |
| </doc> |
| </field> |
| <field name="MAPRED_MAP_TASK_ULIMIT" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="Configuration key to set the maximum virtual memory available to the |
| map tasks (in kilo-bytes). This has been deprecated and will no |
| longer have any effect."> |
| <doc> |
| <![CDATA[@deprecated |
| Configuration key to set the maximum virtual memory available to the |
| map tasks (in kilo-bytes). This has been deprecated and will no |
| longer have any effect.]]> |
| </doc> |
| </field> |
| <field name="MAPRED_REDUCE_TASK_ULIMIT" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="Configuration key to set the maximum virtual memory available to the |
| reduce tasks (in kilo-bytes). This has been deprecated and will no |
| longer have any effect."> |
| <doc> |
| <![CDATA[@deprecated |
| Configuration key to set the maximum virtual memory available to the |
| reduce tasks (in kilo-bytes). This has been deprecated and will no |
| longer have any effect.]]> |
| </doc> |
| </field> |
| <field name="MAPRED_TASK_ENV" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="Use {@link #MAPRED_MAP_TASK_ENV} or |
| {@link #MAPRED_REDUCE_TASK_ENV}"> |
| <doc> |
| <![CDATA[Configuration key to set the environment of the child map/reduce tasks. |
| |
| The format of the value is <code>k1=v1,k2=v2</code>. Further it can |
| reference existing environment variables via <code>$key</code> on |
| Linux or <code>%key%</code> on Windows. |
| |
| Example: |
| <ul> |
| <li> A=foo - This will set the env variable A to foo. </li> |
| </ul> |
| |
| @deprecated Use {@link #MAPRED_MAP_TASK_ENV} or |
| {@link #MAPRED_REDUCE_TASK_ENV}]]> |
| </doc> |
| </field> |
| <field name="MAPRED_MAP_TASK_ENV" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Configuration key to set the environment of the child map tasks. |
| |
| The format of the value is <code>k1=v1,k2=v2</code>. Further it can |
| reference existing environment variables via <code>$key</code> on |
| Linux or <code>%key%</code> on Windows. |
| |
| Example: |
| <ul> |
| <li> A=foo - This will set the env variable A to foo. </li> |
| </ul>]]> |
| </doc> |
| </field> |
| <field name="MAPRED_REDUCE_TASK_ENV" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Configuration key to set the environment of the child reduce tasks. |
| |
| The format of the value is <code>k1=v1,k2=v2</code>. Further it can |
| reference existing environment variables via <code>$key</code> on |
| Linux or <code>%key%</code> on Windows. |
| |
| Example: |
| <ul> |
| <li> A=foo - This will set the env variable A to foo. </li> |
| </ul>]]> |
| </doc> |
| </field> |
| <field name="MAPRED_MAP_TASK_LOG_LEVEL" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Configuration key to set the logging level for the map task. |
| |
| The allowed logging levels are: |
| OFF, FATAL, ERROR, WARN, INFO, DEBUG, TRACE and ALL.]]> |
| </doc> |
| </field> |
| <field name="MAPRED_REDUCE_TASK_LOG_LEVEL" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Configuration key to set the logging level for the reduce task. |
| |
| The allowed logging levels are: |
| OFF, FATAL, ERROR, WARN, INFO, DEBUG, TRACE and ALL.]]> |
| </doc> |
| </field> |
| <field name="DEFAULT_LOG_LEVEL" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Default logging level for map/reduce tasks.]]> |
| </doc> |
| </field> |
| <field name="WORKFLOW_ID" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[The variable is kept for M/R 1.x applications, M/R 2.x applications should |
| use {@link MRJobConfig#WORKFLOW_ID} instead]]> |
| </doc> |
| </field> |
| <field name="WORKFLOW_NAME" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[The variable is kept for M/R 1.x applications, M/R 2.x applications should |
| use {@link MRJobConfig#WORKFLOW_NAME} instead]]> |
| </doc> |
| </field> |
| <field name="WORKFLOW_NODE_NAME" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[The variable is kept for M/R 1.x applications, M/R 2.x applications should |
| use {@link MRJobConfig#WORKFLOW_NODE_NAME} instead]]> |
| </doc> |
| </field> |
| <field name="WORKFLOW_ADJACENCY_PREFIX_STRING" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[The variable is kept for M/R 1.x applications, M/R 2.x applications should |
| use {@link MRJobConfig#WORKFLOW_ADJACENCY_PREFIX_STRING} instead]]> |
| </doc> |
| </field> |
| <field name="WORKFLOW_ADJACENCY_PREFIX_PATTERN" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[The variable is kept for M/R 1.x applications, M/R 2.x applications should |
| use {@link MRJobConfig#WORKFLOW_ADJACENCY_PREFIX_PATTERN} instead]]> |
| </doc> |
| </field> |
| <field name="WORKFLOW_TAGS" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[The variable is kept for M/R 1.x applications, M/R 2.x applications should |
| use {@link MRJobConfig#WORKFLOW_TAGS} instead]]> |
| </doc> |
| </field> |
| <field name="MAPREDUCE_RECOVER_JOB" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[The variable is kept for M/R 1.x applications, M/R 2.x applications should |
| not use it]]> |
| </doc> |
| </field> |
| <field name="DEFAULT_MAPREDUCE_RECOVER_JOB" type="boolean" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[The variable is kept for M/R 1.x applications, M/R 2.x applications should |
| not use it]]> |
| </doc> |
| </field> |
| <doc> |
| <![CDATA[A map/reduce job configuration. |
| |
| <p><code>JobConf</code> is the primary interface for a user to describe a |
| map-reduce job to the Hadoop framework for execution. The framework tries to |
| faithfully execute the job as-is described by <code>JobConf</code>, however: |
| <ol> |
| <li> |
| Some configuration parameters might have been marked as |
| <a href="{@docRoot}/org/apache/hadoop/conf/Configuration.html#FinalParams"> |
| final</a> by administrators and hence cannot be altered. |
| </li> |
| <li> |
| While some job parameters are straight-forward to set |
| (e.g. {@link #setNumReduceTasks(int)}), some parameters interact subtly |
| with the rest of the framework and/or job-configuration and is relatively |
| more complex for the user to control finely |
| (e.g. {@link #setNumMapTasks(int)}). |
| </li> |
| </ol> |
| |
| <p><code>JobConf</code> typically specifies the {@link Mapper}, combiner |
| (if any), {@link Partitioner}, {@link Reducer}, {@link InputFormat} and |
| {@link OutputFormat} implementations to be used etc. |
| |
| <p>Optionally <code>JobConf</code> is used to specify other advanced facets |
| of the job such as <code>Comparator</code>s to be used, files to be put in |
| the {@link DistributedCache}, whether or not intermediate and/or job outputs |
| are to be compressed (and how), debugability via user-provided scripts |
| ( {@link #setMapDebugScript(String)}/{@link #setReduceDebugScript(String)}), |
| for doing post-processing on task logs, task's stdout, stderr, syslog. |
| and etc.</p> |
| |
| <p>Here is an example on how to configure a job via <code>JobConf</code>:</p> |
| <p><blockquote><pre> |
| // Create a new JobConf |
| JobConf job = new JobConf(new Configuration(), MyJob.class); |
| |
| // Specify various job-specific parameters |
| job.setJobName("myjob"); |
| |
| FileInputFormat.setInputPaths(job, new Path("in")); |
| FileOutputFormat.setOutputPath(job, new Path("out")); |
| |
| job.setMapperClass(MyJob.MyMapper.class); |
| job.setCombinerClass(MyJob.MyReducer.class); |
| job.setReducerClass(MyJob.MyReducer.class); |
| |
| job.setInputFormat(SequenceFileInputFormat.class); |
| job.setOutputFormat(SequenceFileOutputFormat.class); |
| </pre></blockquote> |
| |
| @see JobClient |
| @see ClusterStatus |
| @see Tool |
| @see DistributedCache]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.JobConf --> |
| <!-- start interface org.apache.hadoop.mapred.JobConfigurable --> |
| <interface name="JobConfigurable" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <method name="configure" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <doc> |
| <![CDATA[Initializes a new instance from a {@link JobConf}. |
| |
| @param job the configuration]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[That what may be configured.]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapred.JobConfigurable --> |
| <!-- start interface org.apache.hadoop.mapred.JobContext --> |
| <interface name="JobContext" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapreduce.JobContext"/> |
| <method name="getJobConf" return="org.apache.hadoop.mapred.JobConf" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the job Configuration |
| |
| @return JobConf]]> |
| </doc> |
| </method> |
| <method name="getProgressible" return="org.apache.hadoop.util.Progressable" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the progress mechanism for reporting progress. |
| |
| @return progress mechanism]]> |
| </doc> |
| </method> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapred.JobContext --> |
| <!-- start class org.apache.hadoop.mapred.JobID --> |
| <class name="JobID" extends="org.apache.hadoop.mapreduce.JobID" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="JobID" type="java.lang.String, int" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Constructs a JobID object |
| @param jtIdentifier jobTracker identifier |
| @param id job number]]> |
| </doc> |
| </constructor> |
| <constructor name="JobID" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="downgrade" return="org.apache.hadoop.mapred.JobID" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="old" type="org.apache.hadoop.mapreduce.JobID"/> |
| <doc> |
| <![CDATA[Downgrade a new JobID to an old one |
| @param old a new or old JobID |
| @return either old or a new JobID build to match old]]> |
| </doc> |
| </method> |
| <method name="read" return="org.apache.hadoop.mapred.JobID" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="in" type="java.io.DataInput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="forName" return="org.apache.hadoop.mapred.JobID" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="str" type="java.lang.String"/> |
| <exception name="IllegalArgumentException" type="java.lang.IllegalArgumentException"/> |
| <doc> |
| <![CDATA[Construct a JobId object from given string |
| @return constructed JobId object or null if the given String is null |
| @throws IllegalArgumentException if the given string is malformed]]> |
| </doc> |
| </method> |
| <method name="getJobIDsPattern" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jtIdentifier" type="java.lang.String"/> |
| <param name="jobId" type="java.lang.Integer"/> |
| <doc> |
| <![CDATA[Returns a regex pattern which matches task IDs. Arguments can |
| be given null, in which case that part of the regex will be generic. |
| For example to obtain a regex matching <i>any job</i> |
| run on the jobtracker started at <i>200707121733</i>, we would use : |
| <pre> |
| JobID.getTaskIDsPattern("200707121733", null); |
| </pre> |
| which will return : |
| <pre> "job_200707121733_[0-9]*" </pre> |
| @param jtIdentifier jobTracker identifier, or null |
| @param jobId job number, or null |
| @return a regex pattern matching JobIDs]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[JobID represents the immutable and unique identifier for |
| the job. JobID consists of two parts. First part |
| represents the jobtracker identifier, so that jobID to jobtracker map |
| is defined. For cluster setup this string is the jobtracker |
| start time, for local setting, it is "local". |
| Second part of the JobID is the job number. <br> |
| An example JobID is : |
| <code>job_200707121733_0003</code> , which represents the third job |
| running at the jobtracker started at <code>200707121733</code>. |
| <p> |
| Applications should never construct or parse JobID strings, but rather |
| use appropriate constructors or {@link #forName(String)} method. |
| |
| @see TaskID |
| @see TaskAttemptID]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.JobID --> |
| <!-- start class org.apache.hadoop.mapred.JobPriority --> |
| <class name="JobPriority" extends="java.lang.Enum" |
| abstract="false" |
| static="false" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <method name="values" return="org.apache.hadoop.mapred.JobPriority[]" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="valueOf" return="org.apache.hadoop.mapred.JobPriority" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="name" type="java.lang.String"/> |
| </method> |
| <doc> |
| <![CDATA[Used to describe the priority of the running job. |
| DEFAULT : While submitting a job, if the user is not specifying priority, |
| YARN has the capability to pick the default priority as per its config. |
| Hence MapReduce can indicate such cases with this new enum. |
| UNDEFINED_PRIORITY : YARN supports priority as an integer. Hence other than |
| the five defined enums, YARN can consider other integers also. To generalize |
| such cases, this specific enum is used.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.JobPriority --> |
| <!-- start class org.apache.hadoop.mapred.JobQueueInfo --> |
| <class name="JobQueueInfo" extends="org.apache.hadoop.mapreduce.QueueInfo" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="JobQueueInfo" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Default constructor for Job Queue Info.]]> |
| </doc> |
| </constructor> |
| <constructor name="JobQueueInfo" type="java.lang.String, java.lang.String" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Construct a new JobQueueInfo object using the queue name and the |
| scheduling information passed. |
| |
| @param queueName Name of the job queue |
| @param schedulingInfo Scheduling Information associated with the job |
| queue]]> |
| </doc> |
| </constructor> |
| <method name="getQueueState" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Use getState() instead]]> |
| </doc> |
| </method> |
| <method name="getChildren" return="java.util.List" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <doc> |
| <![CDATA[Class that contains the information regarding the Job Queues which are |
| maintained by the Hadoop Map/Reduce framework.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.JobQueueInfo --> |
| <!-- start class org.apache.hadoop.mapred.JobStatus --> |
| <class name="JobStatus" extends="org.apache.hadoop.mapreduce.JobStatus" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="JobStatus" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <constructor name="JobStatus" type="org.apache.hadoop.mapred.JobID, float, float, float, int" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <constructor name="JobStatus" type="org.apache.hadoop.mapred.JobID, float, float, int" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Create a job status object for a given jobid. |
| @param jobid The jobid of the job |
| @param mapProgress The progress made on the maps |
| @param reduceProgress The progress made on the reduces |
| @param runState The current state of the job]]> |
| </doc> |
| </constructor> |
| <constructor name="JobStatus" type="org.apache.hadoop.mapred.JobID, float, float, float, int, org.apache.hadoop.mapred.JobPriority" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Create a job status object for a given jobid. |
| @param jobid The jobid of the job |
| @param mapProgress The progress made on the maps |
| @param reduceProgress The progress made on the reduces |
| @param runState The current state of the job |
| @param jp Priority of the job.]]> |
| </doc> |
| </constructor> |
| <constructor name="JobStatus" type="org.apache.hadoop.mapred.JobID, float, float, float, float, int, org.apache.hadoop.mapred.JobPriority" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Create a job status object for a given jobid. |
| @param jobid The jobid of the job |
| @param setupProgress The progress made on the setup |
| @param mapProgress The progress made on the maps |
| @param reduceProgress The progress made on the reduces |
| @param cleanupProgress The progress made on the cleanup |
| @param runState The current state of the job |
| @param jp Priority of the job.]]> |
| </doc> |
| </constructor> |
| <constructor name="JobStatus" type="org.apache.hadoop.mapred.JobID, float, float, float, int, java.lang.String, java.lang.String, java.lang.String, java.lang.String" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Create a job status object for a given jobid. |
| @param jobid The jobid of the job |
| @param mapProgress The progress made on the maps |
| @param reduceProgress The progress made on the reduces |
| @param cleanupProgress The progress made on cleanup |
| @param runState The current state of the job |
| @param user userid of the person who submitted the job. |
| @param jobName user-specified job name. |
| @param jobFile job configuration file. |
| @param trackingUrl link to the web-ui for details of the job.]]> |
| </doc> |
| </constructor> |
| <constructor name="JobStatus" type="org.apache.hadoop.mapred.JobID, float, float, int, java.lang.String, java.lang.String, java.lang.String, java.lang.String" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Create a job status object for a given jobid. |
| @param jobid The jobid of the job |
| @param mapProgress The progress made on the maps |
| @param reduceProgress The progress made on the reduces |
| @param runState The current state of the job |
| @param user userid of the person who submitted the job. |
| @param jobName user-specified job name. |
| @param jobFile job configuration file. |
| @param trackingUrl link to the web-ui for details of the job.]]> |
| </doc> |
| </constructor> |
| <constructor name="JobStatus" type="org.apache.hadoop.mapred.JobID, float, float, float, int, org.apache.hadoop.mapred.JobPriority, java.lang.String, java.lang.String, java.lang.String, java.lang.String" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Create a job status object for a given jobid. |
| @param jobid The jobid of the job |
| @param mapProgress The progress made on the maps |
| @param reduceProgress The progress made on the reduces |
| @param runState The current state of the job |
| @param jp Priority of the job. |
| @param user userid of the person who submitted the job. |
| @param jobName user-specified job name. |
| @param jobFile job configuration file. |
| @param trackingUrl link to the web-ui for details of the job.]]> |
| </doc> |
| </constructor> |
| <constructor name="JobStatus" type="org.apache.hadoop.mapred.JobID, float, float, float, float, int, org.apache.hadoop.mapred.JobPriority, java.lang.String, java.lang.String, java.lang.String, java.lang.String" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Create a job status object for a given jobid. |
| @param jobid The jobid of the job |
| @param setupProgress The progress made on the setup |
| @param mapProgress The progress made on the maps |
| @param reduceProgress The progress made on the reduces |
| @param cleanupProgress The progress made on the cleanup |
| @param runState The current state of the job |
| @param jp Priority of the job. |
| @param user userid of the person who submitted the job. |
| @param jobName user-specified job name. |
| @param jobFile job configuration file. |
| @param trackingUrl link to the web-ui for details of the job.]]> |
| </doc> |
| </constructor> |
| <constructor name="JobStatus" type="org.apache.hadoop.mapred.JobID, float, float, float, float, int, org.apache.hadoop.mapred.JobPriority, java.lang.String, java.lang.String, java.lang.String, java.lang.String, boolean" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Create a job status object for a given jobid. |
| @param jobid The jobid of the job |
| @param setupProgress The progress made on the setup |
| @param mapProgress The progress made on the maps |
| @param reduceProgress The progress made on the reduces |
| @param cleanupProgress The progress made on the cleanup |
| @param runState The current state of the job |
| @param jp Priority of the job. |
| @param user userid of the person who submitted the job. |
| @param jobName user-specified job name. |
| @param jobFile job configuration file. |
| @param trackingUrl link to the web-ui for details of the job. |
| @param isUber Whether job running in uber mode]]> |
| </doc> |
| </constructor> |
| <constructor name="JobStatus" type="org.apache.hadoop.mapred.JobID, float, float, float, float, int, org.apache.hadoop.mapred.JobPriority, java.lang.String, java.lang.String, java.lang.String, java.lang.String, boolean, java.lang.String" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Create a job status object for a given jobid. |
| @param jobid The jobid of the job |
| @param setupProgress The progress made on the setup |
| @param mapProgress The progress made on the maps |
| @param reduceProgress The progress made on the reduces |
| @param cleanupProgress The progress made on the cleanup |
| @param runState The current state of the job |
| @param jp Priority of the job. |
| @param user userid of the person who submitted the job. |
| @param jobName user-specified job name. |
| @param jobFile job configuration file. |
| @param trackingUrl link to the web-ui for details of the job. |
| @param isUber Whether job running in uber mode |
| @param historyFile history file]]> |
| </doc> |
| </constructor> |
| <constructor name="JobStatus" type="org.apache.hadoop.mapred.JobID, float, float, float, float, int, org.apache.hadoop.mapred.JobPriority, java.lang.String, java.lang.String, java.lang.String, java.lang.String, java.lang.String" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Create a job status object for a given jobid. |
| @param jobid The jobid of the job |
| @param setupProgress The progress made on the setup |
| @param mapProgress The progress made on the maps |
| @param reduceProgress The progress made on the reduces |
| @param cleanupProgress The progress made on the cleanup |
| @param runState The current state of the job |
| @param jp Priority of the job. |
| @param user userid of the person who submitted the job. |
| @param jobName user-specified job name. |
| @param queue job queue name. |
| @param jobFile job configuration file. |
| @param trackingUrl link to the web-ui for details of the job.]]> |
| </doc> |
| </constructor> |
| <constructor name="JobStatus" type="org.apache.hadoop.mapred.JobID, float, float, float, float, int, org.apache.hadoop.mapred.JobPriority, java.lang.String, java.lang.String, java.lang.String, java.lang.String, java.lang.String, boolean" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Create a job status object for a given jobid. |
| @param jobid The jobid of the job |
| @param setupProgress The progress made on the setup |
| @param mapProgress The progress made on the maps |
| @param reduceProgress The progress made on the reduces |
| @param cleanupProgress The progress made on the cleanup |
| @param runState The current state of the job |
| @param jp Priority of the job. |
| @param user userid of the person who submitted the job. |
| @param jobName user-specified job name. |
| @param queue job queue name. |
| @param jobFile job configuration file. |
| @param trackingUrl link to the web-ui for details of the job. |
| @param isUber Whether job running in uber mode]]> |
| </doc> |
| </constructor> |
| <constructor name="JobStatus" type="org.apache.hadoop.mapred.JobID, float, float, float, float, int, org.apache.hadoop.mapred.JobPriority, java.lang.String, java.lang.String, java.lang.String, java.lang.String, java.lang.String, boolean, java.lang.String" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Create a job status object for a given jobid. |
| @param jobid The jobid of the job |
| @param setupProgress The progress made on the setup |
| @param mapProgress The progress made on the maps |
| @param reduceProgress The progress made on the reduces |
| @param cleanupProgress The progress made on the cleanup |
| @param runState The current state of the job |
| @param jp Priority of the job. |
| @param user userid of the person who submitted the job. |
| @param jobName user-specified job name. |
| @param queue job queue name. |
| @param jobFile job configuration file. |
| @param trackingUrl link to the web-ui for details of the job. |
| @param isUber Whether job running in uber mode |
| @param historyFile history file]]> |
| </doc> |
| </constructor> |
| <method name="getJobRunState" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="state" type="int"/> |
| <doc> |
| <![CDATA[Helper method to get human-readable state of the job. |
| @param state job state |
| @return human-readable state of the job]]> |
| </doc> |
| </method> |
| <method name="downgrade" return="org.apache.hadoop.mapred.JobStatus" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="stat" type="org.apache.hadoop.mapreduce.JobStatus"/> |
| </method> |
| <method name="getJobId" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="use getJobID instead"> |
| <doc> |
| <![CDATA[@deprecated use getJobID instead]]> |
| </doc> |
| </method> |
| <method name="getJobID" return="org.apache.hadoop.mapred.JobID" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return The jobid of the Job]]> |
| </doc> |
| </method> |
| <method name="getJobPriority" return="org.apache.hadoop.mapred.JobPriority" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Return the priority of the job |
| @return job priority]]> |
| </doc> |
| </method> |
| <method name="setMapProgress" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="p" type="float"/> |
| <doc> |
| <![CDATA[Sets the map progress of this job |
| @param p The value of map progress to set to]]> |
| </doc> |
| </method> |
| <method name="setCleanupProgress" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="p" type="float"/> |
| <doc> |
| <![CDATA[Sets the cleanup progress of this job |
| @param p The value of cleanup progress to set to]]> |
| </doc> |
| </method> |
| <method name="setSetupProgress" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="p" type="float"/> |
| <doc> |
| <![CDATA[Sets the setup progress of this job |
| @param p The value of setup progress to set to]]> |
| </doc> |
| </method> |
| <method name="setReduceProgress" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="p" type="float"/> |
| <doc> |
| <![CDATA[Sets the reduce progress of this Job |
| @param p The value of reduce progress to set to]]> |
| </doc> |
| </method> |
| <method name="setFinishTime" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="finishTime" type="long"/> |
| <doc> |
| <![CDATA[Set the finish time of the job |
| @param finishTime The finishTime of the job]]> |
| </doc> |
| </method> |
| <method name="setHistoryFile" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="historyFile" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the job history file url for a completed job]]> |
| </doc> |
| </method> |
| <method name="setTrackingUrl" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="trackingUrl" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the link to the web-ui for details of the job.]]> |
| </doc> |
| </method> |
| <method name="setRetired" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Set the job retire flag to true.]]> |
| </doc> |
| </method> |
| <method name="getRunState" return="int" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return running state of the job]]> |
| </doc> |
| </method> |
| <method name="setStartTime" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="startTime" type="long"/> |
| <doc> |
| <![CDATA[Set the start time of the job |
| @param startTime The startTime of the job]]> |
| </doc> |
| </method> |
| <method name="setUsername" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="userName" type="java.lang.String"/> |
| <doc> |
| <![CDATA[@param userName The username of the job]]> |
| </doc> |
| </method> |
| <method name="setJobACLs" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="acls" type="java.util.Map"/> |
| </method> |
| <method name="setFailureInfo" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="failureInfo" type="java.lang.String"/> |
| </method> |
| <method name="setJobPriority" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jp" type="org.apache.hadoop.mapred.JobPriority"/> |
| <doc> |
| <![CDATA[Set the priority of the job, defaulting to NORMAL. |
| @param jp new job priority]]> |
| </doc> |
| </method> |
| <method name="mapProgress" return="float" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return Percentage of progress in maps]]> |
| </doc> |
| </method> |
| <method name="cleanupProgress" return="float" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return Percentage of progress in cleanup]]> |
| </doc> |
| </method> |
| <method name="setupProgress" return="float" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return Percentage of progress in setup]]> |
| </doc> |
| </method> |
| <method name="reduceProgress" return="float" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return Percentage of progress in reduce]]> |
| </doc> |
| </method> |
| <field name="RUNNING" type="int" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="SUCCEEDED" type="int" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="FAILED" type="int" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="PREP" type="int" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="KILLED" type="int" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[Describes the current status of a job. This is |
| not intended to be a comprehensive piece of data. |
| For that, look at JobProfile.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.JobStatus --> |
| <!-- start class org.apache.hadoop.mapred.KeyValueLineRecordReader --> |
| <class name="KeyValueLineRecordReader" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.RecordReader"/> |
| <constructor name="KeyValueLineRecordReader" type="org.apache.hadoop.conf.Configuration, org.apache.hadoop.mapred.FileSplit" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </constructor> |
| <method name="getKeyClass" return="java.lang.Class" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="createKey" return="org.apache.hadoop.io.Text" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="createValue" return="org.apache.hadoop.io.Text" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="findSeparator" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="utf" type="byte[]"/> |
| <param name="start" type="int"/> |
| <param name="length" type="int"/> |
| <param name="sep" type="byte"/> |
| </method> |
| <method name="next" return="boolean" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="org.apache.hadoop.io.Text"/> |
| <param name="value" type="org.apache.hadoop.io.Text"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Read key/value pair in a line.]]> |
| </doc> |
| </method> |
| <method name="getProgress" return="float" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getPos" return="long" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="close" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[This class treats a line in the input as a key/value pair separated by a |
| separator character. The separator can be specified in config file |
| under the attribute name mapreduce.input.keyvaluelinerecordreader.key.value.separator. The default |
| separator is the tab character ('\t').]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.KeyValueLineRecordReader --> |
| <!-- start class org.apache.hadoop.mapred.KeyValueTextInputFormat --> |
| <class name="KeyValueTextInputFormat" extends="org.apache.hadoop.mapred.FileInputFormat" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.JobConfigurable"/> |
| <constructor name="KeyValueTextInputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="configure" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| </method> |
| <method name="isSplitable" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="fs" type="org.apache.hadoop.fs.FileSystem"/> |
| <param name="file" type="org.apache.hadoop.fs.Path"/> |
| </method> |
| <method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="genericSplit" type="org.apache.hadoop.mapred.InputSplit"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[An {@link InputFormat} for plain text files. Files are broken into lines. |
| Either linefeed or carriage-return are used to signal end of line. Each line |
| is divided into key and value parts by a separator byte. If no such a byte |
| exists, the key will be the entire line and value will be empty.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.KeyValueTextInputFormat --> |
| <!-- start class org.apache.hadoop.mapred.MapFileOutputFormat --> |
| <class name="MapFileOutputFormat" extends="org.apache.hadoop.mapred.FileOutputFormat" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="MapFileOutputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="ignored" type="org.apache.hadoop.fs.FileSystem"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="name" type="java.lang.String"/> |
| <param name="progress" type="org.apache.hadoop.util.Progressable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getReaders" return="org.apache.hadoop.io.MapFile.Reader[]" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="ignored" type="org.apache.hadoop.fs.FileSystem"/> |
| <param name="dir" type="org.apache.hadoop.fs.Path"/> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Open the output generated by this format.]]> |
| </doc> |
| </method> |
| <method name="getEntry" return="org.apache.hadoop.io.Writable" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="readers" type="org.apache.hadoop.io.MapFile.Reader[]"/> |
| <param name="partitioner" type="org.apache.hadoop.mapred.Partitioner"/> |
| <param name="key" type="K"/> |
| <param name="value" type="V"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get an entry from output generated by this class.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[An {@link OutputFormat} that writes {@link MapFile}s.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.MapFileOutputFormat --> |
| <!-- start interface org.apache.hadoop.mapred.Mapper --> |
| <interface name="Mapper" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.JobConfigurable"/> |
| <implements name="org.apache.hadoop.io.Closeable"/> |
| <method name="map" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="K1"/> |
| <param name="value" type="V1"/> |
| <param name="output" type="org.apache.hadoop.mapred.OutputCollector"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Maps a single input key/value pair into an intermediate key/value pair. |
| |
| <p>Output pairs need not be of the same types as input pairs. A given |
| input pair may map to zero or many output pairs. Output pairs are |
| collected with calls to |
| {@link OutputCollector#collect(Object,Object)}.</p> |
| |
| <p>Applications can use the {@link Reporter} provided to report progress |
| or just indicate that they are alive. In scenarios where the application |
| takes significant amount of time to process individual key/value |
| pairs, this is crucial since the framework might assume that the task has |
| timed-out and kill that task. The other way of avoiding this is to set |
| <a href="{@docRoot}/../hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml#mapreduce.task.timeout"> |
| mapreduce.task.timeout</a> to a high-enough value (or even zero for no |
| time-outs).</p> |
| |
| @param key the input key. |
| @param value the input value. |
| @param output collects mapped keys and values. |
| @param reporter facility to report progress.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Maps input key/value pairs to a set of intermediate key/value pairs. |
| |
| <p>Maps are the individual tasks which transform input records into a |
| intermediate records. The transformed intermediate records need not be of |
| the same type as the input records. A given input pair may map to zero or |
| many output pairs.</p> |
| |
| <p>The Hadoop Map-Reduce framework spawns one map task for each |
| {@link InputSplit} generated by the {@link InputFormat} for the job. |
| <code>Mapper</code> implementations can access the {@link JobConf} for the |
| job via the {@link JobConfigurable#configure(JobConf)} and initialize |
| themselves. Similarly they can use the {@link Closeable#close()} method for |
| de-initialization.</p> |
| |
| <p>The framework then calls |
| {@link #map(Object, Object, OutputCollector, Reporter)} |
| for each key/value pair in the <code>InputSplit</code> for that task.</p> |
| |
| <p>All intermediate values associated with a given output key are |
| subsequently grouped by the framework, and passed to a {@link Reducer} to |
| determine the final output. Users can control the grouping by specifying |
| a <code>Comparator</code> via |
| {@link JobConf#setOutputKeyComparatorClass(Class)}.</p> |
| |
| <p>The grouped <code>Mapper</code> outputs are partitioned per |
| <code>Reducer</code>. Users can control which keys (and hence records) go to |
| which <code>Reducer</code> by implementing a custom {@link Partitioner}. |
| |
| <p>Users can optionally specify a <code>combiner</code>, via |
| {@link JobConf#setCombinerClass(Class)}, to perform local aggregation of the |
| intermediate outputs, which helps to cut down the amount of data transferred |
| from the <code>Mapper</code> to the <code>Reducer</code>. |
| |
| <p>The intermediate, grouped outputs are always stored in |
| {@link SequenceFile}s. Applications can specify if and how the intermediate |
| outputs are to be compressed and which {@link CompressionCodec}s are to be |
| used via the <code>JobConf</code>.</p> |
| |
| <p>If the job has |
| <a href="{@docRoot}/org/apache/hadoop/mapred/JobConf.html#ReducerNone">zero |
| reduces</a> then the output of the <code>Mapper</code> is directly written |
| to the {@link FileSystem} without grouping by keys.</p> |
| |
| <p>Example:</p> |
| <p><blockquote><pre> |
| public class MyMapper<K extends WritableComparable, V extends Writable> |
| extends MapReduceBase implements Mapper<K, V, K, V> { |
| |
| static enum MyCounters { NUM_RECORDS } |
| |
| private String mapTaskId; |
| private String inputFile; |
| private int noRecords = 0; |
| |
| public void configure(JobConf job) { |
| mapTaskId = job.get(JobContext.TASK_ATTEMPT_ID); |
| inputFile = job.get(JobContext.MAP_INPUT_FILE); |
| } |
| |
| public void map(K key, V val, |
| OutputCollector<K, V> output, Reporter reporter) |
| throws IOException { |
| // Process the <key, value> pair (assume this takes a while) |
| // ... |
| // ... |
| |
| // Let the framework know that we are alive, and kicking! |
| // reporter.progress(); |
| |
| // Process some more |
| // ... |
| // ... |
| |
| // Increment the no. of <key, value> pairs processed |
| ++noRecords; |
| |
| // Increment counters |
| reporter.incrCounter(NUM_RECORDS, 1); |
| |
| // Every 100 records update application-level status |
| if ((noRecords%100) == 0) { |
| reporter.setStatus(mapTaskId + " processed " + noRecords + |
| " from input-file: " + inputFile); |
| } |
| |
| // Output the result |
| output.collect(key, val); |
| } |
| } |
| </pre></blockquote> |
| |
| <p>Applications may write a custom {@link MapRunnable} to exert greater |
| control on map processing e.g. multi-threaded <code>Mapper</code>s etc.</p> |
| |
| @see JobConf |
| @see InputFormat |
| @see Partitioner |
| @see Reducer |
| @see MapReduceBase |
| @see MapRunnable |
| @see SequenceFile]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapred.Mapper --> |
| <!-- start class org.apache.hadoop.mapred.MapReduceBase --> |
| <class name="MapReduceBase" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.io.Closeable"/> |
| <implements name="org.apache.hadoop.mapred.JobConfigurable"/> |
| <constructor name="MapReduceBase" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="close" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Default implementation that does nothing.]]> |
| </doc> |
| </method> |
| <method name="configure" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <doc> |
| <![CDATA[Default implementation that does nothing.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Base class for {@link Mapper} and {@link Reducer} implementations. |
| |
| <p>Provides default no-op implementations for a few methods, most non-trivial |
| applications need to override some of them.</p>]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.MapReduceBase --> |
| <!-- start interface org.apache.hadoop.mapred.MapRunnable --> |
| <interface name="MapRunnable" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.JobConfigurable"/> |
| <method name="run" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="input" type="org.apache.hadoop.mapred.RecordReader"/> |
| <param name="output" type="org.apache.hadoop.mapred.OutputCollector"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Start mapping input <tt><key, value></tt> pairs. |
| |
| <p>Mapping of input records to output records is complete when this method |
| returns.</p> |
| |
| @param input the {@link RecordReader} to read the input records. |
| @param output the {@link OutputCollector} to collect the outputrecords. |
| @param reporter {@link Reporter} to report progress, status-updates etc. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Expert: Generic interface for {@link Mapper}s. |
| |
| <p>Custom implementations of <code>MapRunnable</code> can exert greater |
| control on map processing e.g. multi-threaded, asynchronous mappers etc.</p> |
| |
| @see Mapper]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapred.MapRunnable --> |
| <!-- start class org.apache.hadoop.mapred.MapRunner --> |
| <class name="MapRunner" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.MapRunnable"/> |
| <constructor name="MapRunner" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="configure" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| </method> |
| <method name="run" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="input" type="org.apache.hadoop.mapred.RecordReader"/> |
| <param name="output" type="org.apache.hadoop.mapred.OutputCollector"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getMapper" return="org.apache.hadoop.mapred.Mapper" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </method> |
| <doc> |
| <![CDATA[Default {@link MapRunnable} implementation.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.MapRunner --> |
| <!-- start class org.apache.hadoop.mapred.MultiFileInputFormat --> |
| <class name="MultiFileInputFormat" extends="org.apache.hadoop.mapred.FileInputFormat" |
| abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="MultiFileInputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="numSplits" type="int"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="split" type="org.apache.hadoop.mapred.InputSplit"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[An abstract {@link InputFormat} that returns {@link MultiFileSplit}'s |
| in {@link #getSplits(JobConf, int)} method. Splits are constructed from |
| the files under the input paths. Each split returned contains <i>nearly</i> |
| equal content length. <br> |
| Subclasses implement {@link #getRecordReader(InputSplit, JobConf, Reporter)} |
| to construct <code>RecordReader</code>'s for <code>MultiFileSplit</code>'s. |
| @see MultiFileSplit]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.MultiFileInputFormat --> |
| <!-- start class org.apache.hadoop.mapred.MultiFileSplit --> |
| <class name="MultiFileSplit" extends="org.apache.hadoop.mapred.lib.CombineFileSplit" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="MultiFileSplit" type="org.apache.hadoop.mapred.JobConf, org.apache.hadoop.fs.Path[], long[]" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getLocations" return="java.lang.String[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="toString" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <doc> |
| <![CDATA[A sub-collection of input files. Unlike {@link FileSplit}, MultiFileSplit |
| class does not represent a split of a file, but a split of input files |
| into smaller sets. The atomic unit of split is a file. <br> |
| MultiFileSplit can be used to implement {@link RecordReader}'s, with |
| reading one record per file. |
| @see FileSplit |
| @see MultiFileInputFormat]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.MultiFileSplit --> |
| <!-- start interface org.apache.hadoop.mapred.OutputCollector --> |
| <interface name="OutputCollector" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <method name="collect" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="K"/> |
| <param name="value" type="V"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Adds a key/value pair to the output. |
| |
| @param key the key to collect. |
| @param value to value to collect. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Collects the <code><key, value></code> pairs output by {@link Mapper}s |
| and {@link Reducer}s. |
| |
| <p><code>OutputCollector</code> is the generalization of the facility |
| provided by the Map-Reduce framework to collect data output by either the |
| <code>Mapper</code> or the <code>Reducer</code> i.e. intermediate outputs |
| or the output of the job.</p>]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapred.OutputCollector --> |
| <!-- start class org.apache.hadoop.mapred.OutputCommitter --> |
| <class name="OutputCommitter" extends="org.apache.hadoop.mapreduce.OutputCommitter" |
| abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="OutputCommitter" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="setupJob" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobContext" type="org.apache.hadoop.mapred.JobContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[For the framework to setup the job output during initialization. This is |
| called from the application master process for the entire job. This will be |
| called multiple times, once per job attempt. |
| |
| @param jobContext Context of the job whose output is being written. |
| @throws IOException if temporary output could not be created]]> |
| </doc> |
| </method> |
| <method name="cleanupJob" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="Use {@link #commitJob(JobContext)} or |
| {@link #abortJob(JobContext, int)} instead."> |
| <param name="jobContext" type="org.apache.hadoop.mapred.JobContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[For cleaning up the job's output after job completion. This is called |
| from the application master process for the entire job. This may be called |
| multiple times. |
| |
| @param jobContext Context of the job whose output is being written. |
| @throws IOException |
| @deprecated Use {@link #commitJob(JobContext)} or |
| {@link #abortJob(JobContext, int)} instead.]]> |
| </doc> |
| </method> |
| <method name="commitJob" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobContext" type="org.apache.hadoop.mapred.JobContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[For committing job's output after successful job completion. Note that this |
| is invoked for jobs with final runstate as SUCCESSFUL. This is called |
| from the application master process for the entire job. This is guaranteed |
| to only be called once. If it throws an exception the entire job will |
| fail. |
| |
| @param jobContext Context of the job whose output is being written. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="abortJob" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobContext" type="org.apache.hadoop.mapred.JobContext"/> |
| <param name="status" type="int"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[For aborting an unsuccessful job's output. Note that this is invoked for |
| jobs with final runstate as {@link JobStatus#FAILED} or |
| {@link JobStatus#KILLED}. This is called from the application |
| master process for the entire job. This may be called multiple times. |
| |
| @param jobContext Context of the job whose output is being written. |
| @param status final runstate of the job |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="setupTask" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskContext" type="org.apache.hadoop.mapred.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Sets up output for the task. This is called from each individual task's |
| process that will output to HDFS, and it is called just for that task. This |
| may be called multiple times for the same task, but for different task |
| attempts. |
| |
| @param taskContext Context of the task whose output is being written. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="needsTaskCommit" return="boolean" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskContext" type="org.apache.hadoop.mapred.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Check whether task needs a commit. This is called from each individual |
| task's process that will output to HDFS, and it is called just for that |
| task. |
| |
| @param taskContext |
| @return true/false |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="commitTask" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskContext" type="org.apache.hadoop.mapred.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[To promote the task's temporary output to final output location. |
| If {@link #needsTaskCommit(TaskAttemptContext)} returns true and this |
| task is the task that the AM determines finished first, this method |
| is called to commit an individual task's output. This is to mark |
| that tasks output as complete, as {@link #commitJob(JobContext)} will |
| also be called later on if the entire job finished successfully. This |
| is called from a task's process. This may be called multiple times for the |
| same task, but different task attempts. It should be very rare for this to |
| be called multiple times and requires odd networking failures to make this |
| happen. In the future the Hadoop framework may eliminate this race. |
| |
| @param taskContext Context of the task whose output is being written. |
| @throws IOException if commit is not]]> |
| </doc> |
| </method> |
| <method name="abortTask" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskContext" type="org.apache.hadoop.mapred.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Discard the task output. This is called from a task's process to clean |
| up a single task's output that can not yet been committed. This may be |
| called multiple times for the same task, but for different task attempts. |
| |
| @param taskContext |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="isRecoverySupported" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="Use {@link #isRecoverySupported(JobContext)} instead."> |
| <doc> |
| <![CDATA[This method implements the new interface by calling the old method. Note |
| that the input types are different between the new and old apis and this is |
| a bridge between the two. |
| |
| @deprecated Use {@link #isRecoverySupported(JobContext)} instead.]]> |
| </doc> |
| </method> |
| <method name="isRecoverySupported" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobContext" type="org.apache.hadoop.mapred.JobContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Is task output recovery supported for restarting jobs? |
| |
| If task output recovery is supported, job restart can be done more |
| efficiently. |
| |
| @param jobContext |
| Context of the job whose output is being written. |
| @return <code>true</code> if task output recovery is supported, |
| <code>false</code> otherwise |
| @throws IOException |
| @see #recoverTask(TaskAttemptContext)]]> |
| </doc> |
| </method> |
| <method name="isCommitJobRepeatable" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobContext" type="org.apache.hadoop.mapred.JobContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Returns true if an in-progress job commit can be retried. If the MR AM is |
| re-run then it will check this value to determine if it can retry an |
| in-progress commit that was started by a previous version. |
| Note that in rare scenarios, the previous AM version might still be running |
| at that time, due to system anomalies. Hence if this method returns true |
| then the retry commit operation should be able to run concurrently with |
| the previous operation. |
| |
| If repeatable job commit is supported, job restart can tolerate previous |
| AM failures during job commit. |
| |
| By default, it is not supported. Extended classes (like: |
| FileOutputCommitter) should explicitly override it if provide support. |
| |
| @param jobContext |
| Context of the job whose output is being written. |
| @return <code>true</code> repeatable job commit is supported, |
| <code>false</code> otherwise |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="isCommitJobRepeatable" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobContext" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="recoverTask" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskContext" type="org.apache.hadoop.mapred.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Recover the task output. |
| |
| The retry-count for the job will be passed via the |
| {@link MRConstants#APPLICATION_ATTEMPT_ID} key in |
| {@link TaskAttemptContext#getConfiguration()} for the |
| <code>OutputCommitter</code>. This is called from the application master |
| process, but it is called individually for each task. |
| |
| If an exception is thrown the task will be attempted again. |
| |
| @param taskContext Context of the task whose output is being recovered |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="setupJob" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobContext" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[This method implements the new interface by calling the old method. Note |
| that the input types are different between the new and old apis and this |
| is a bridge between the two.]]> |
| </doc> |
| </method> |
| <method name="cleanupJob" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="true" visibility="public" |
| deprecated="Use {@link #commitJob(org.apache.hadoop.mapreduce.JobContext)} |
| or {@link #abortJob(org.apache.hadoop.mapreduce.JobContext, org.apache.hadoop.mapreduce.JobStatus.State)} |
| instead."> |
| <param name="context" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[This method implements the new interface by calling the old method. Note |
| that the input types are different between the new and old apis and this |
| is a bridge between the two. |
| @deprecated Use {@link #commitJob(org.apache.hadoop.mapreduce.JobContext)} |
| or {@link #abortJob(org.apache.hadoop.mapreduce.JobContext, org.apache.hadoop.mapreduce.JobStatus.State)} |
| instead.]]> |
| </doc> |
| </method> |
| <method name="commitJob" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[This method implements the new interface by calling the old method. Note |
| that the input types are different between the new and old apis and this |
| is a bridge between the two.]]> |
| </doc> |
| </method> |
| <method name="abortJob" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <param name="runState" type="org.apache.hadoop.mapreduce.JobStatus.State"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[This method implements the new interface by calling the old method. Note |
| that the input types are different between the new and old apis and this |
| is a bridge between the two.]]> |
| </doc> |
| </method> |
| <method name="setupTask" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[This method implements the new interface by calling the old method. Note |
| that the input types are different between the new and old apis and this |
| is a bridge between the two.]]> |
| </doc> |
| </method> |
| <method name="needsTaskCommit" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[This method implements the new interface by calling the old method. Note |
| that the input types are different between the new and old apis and this |
| is a bridge between the two.]]> |
| </doc> |
| </method> |
| <method name="commitTask" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[This method implements the new interface by calling the old method. Note |
| that the input types are different between the new and old apis and this |
| is a bridge between the two.]]> |
| </doc> |
| </method> |
| <method name="abortTask" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[This method implements the new interface by calling the old method. Note |
| that the input types are different between the new and old apis and this |
| is a bridge between the two.]]> |
| </doc> |
| </method> |
| <method name="recoverTask" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[This method implements the new interface by calling the old method. Note |
| that the input types are different between the new and old apis and this |
| is a bridge between the two.]]> |
| </doc> |
| </method> |
| <method name="isRecoverySupported" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[This method implements the new interface by calling the old method. Note |
| that the input types are different between the new and old apis and this is |
| a bridge between the two.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[<code>OutputCommitter</code> describes the commit of task output for a |
| Map-Reduce job. |
| |
| <p>The Map-Reduce framework relies on the <code>OutputCommitter</code> of |
| the job to:<p> |
| <ol> |
| <li> |
| Setup the job during initialization. For example, create the temporary |
| output directory for the job during the initialization of the job. |
| </li> |
| <li> |
| Cleanup the job after the job completion. For example, remove the |
| temporary output directory after the job completion. |
| </li> |
| <li> |
| Setup the task temporary output. |
| </li> |
| <li> |
| Check whether a task needs a commit. This is to avoid the commit |
| procedure if a task does not need commit. |
| </li> |
| <li> |
| Commit of the task output. |
| </li> |
| <li> |
| Discard the task commit. |
| </li> |
| </ol> |
| The methods in this class can be called from several different processes and |
| from several different contexts. It is important to know which process and |
| which context each is called from. Each method should be marked accordingly |
| in its documentation. It is also important to note that not all methods are |
| guaranteed to be called once and only once. If a method is not guaranteed to |
| have this property the output committer needs to handle this appropriately. |
| Also note it will only be in rare situations where they may be called |
| multiple times for the same task. |
| |
| @see FileOutputCommitter |
| @see JobContext |
| @see TaskAttemptContext]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.OutputCommitter --> |
| <!-- start interface org.apache.hadoop.mapred.OutputFormat --> |
| <interface name="OutputFormat" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="ignored" type="org.apache.hadoop.fs.FileSystem"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="name" type="java.lang.String"/> |
| <param name="progress" type="org.apache.hadoop.util.Progressable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get the {@link RecordWriter} for the given job. |
| |
| @param ignored |
| @param job configuration for the job whose output is being written. |
| @param name the unique name for this part of the output. |
| @param progress mechanism for reporting progress while writing to file. |
| @return a {@link RecordWriter} to write the output for the job. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="checkOutputSpecs" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="ignored" type="org.apache.hadoop.fs.FileSystem"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Check for validity of the output-specification for the job. |
| |
| <p>This is to validate the output specification for the job when it is |
| a job is submitted. Typically checks that it does not already exist, |
| throwing an exception when it already exists, so that output is not |
| overwritten.</p> |
| |
| @param ignored |
| @param job job configuration. |
| @throws IOException when output should not be attempted]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[<code>OutputFormat</code> describes the output-specification for a |
| Map-Reduce job. |
| |
| <p>The Map-Reduce framework relies on the <code>OutputFormat</code> of the |
| job to:<p> |
| <ol> |
| <li> |
| Validate the output-specification of the job. For e.g. check that the |
| output directory doesn't already exist. |
| <li> |
| Provide the {@link RecordWriter} implementation to be used to write out |
| the output files of the job. Output files are stored in a |
| {@link FileSystem}. |
| </li> |
| </ol> |
| |
| @see RecordWriter |
| @see JobConf]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapred.OutputFormat --> |
| <!-- start class org.apache.hadoop.mapred.OutputLogFilter --> |
| <class name="OutputLogFilter" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.fs.PathFilter"/> |
| <constructor name="OutputLogFilter" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="accept" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="path" type="org.apache.hadoop.fs.Path"/> |
| </method> |
| <doc> |
| <![CDATA[This class filters log files from directory given |
| It doesnt accept paths having _logs. |
| This can be used to list paths of output directory as follows: |
| Path[] fileList = FileUtil.stat2Paths(fs.listStatus(outDir, |
| new OutputLogFilter()));]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.OutputLogFilter --> |
| <!-- start interface org.apache.hadoop.mapred.Partitioner --> |
| <interface name="Partitioner" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.JobConfigurable"/> |
| <method name="getPartition" return="int" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="K2"/> |
| <param name="value" type="V2"/> |
| <param name="numPartitions" type="int"/> |
| <doc> |
| <![CDATA[Get the paritition number for a given key (hence record) given the total |
| number of partitions i.e. number of reduce-tasks for the job. |
| |
| <p>Typically a hash function on a all or a subset of the key.</p> |
| |
| @param key the key to be paritioned. |
| @param value the entry value. |
| @param numPartitions the total number of partitions. |
| @return the partition number for the <code>key</code>.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Partitions the key space. |
| |
| <p><code>Partitioner</code> controls the partitioning of the keys of the |
| intermediate map-outputs. The key (or a subset of the key) is used to derive |
| the partition, typically by a hash function. The total number of partitions |
| is the same as the number of reduce tasks for the job. Hence this controls |
| which of the <code>m</code> reduce tasks the intermediate key (and hence the |
| record) is sent for reduction.</p> |
| |
| <p>Note: A <code>Partitioner</code> is created only when there are multiple |
| reducers.</p> |
| |
| @see Reducer]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapred.Partitioner --> |
| <!-- start interface org.apache.hadoop.mapred.RecordReader --> |
| <interface name="RecordReader" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="java.io.Closeable"/> |
| <method name="next" return="boolean" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="K"/> |
| <param name="value" type="V"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Reads the next key/value pair from the input for processing. |
| |
| @param key the key to read data into |
| @param value the value to read data into |
| @return true iff a key/value was read, false if at EOF]]> |
| </doc> |
| </method> |
| <method name="createKey" return="K" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Create an object of the appropriate type to be used as a key. |
| |
| @return a new key object.]]> |
| </doc> |
| </method> |
| <method name="createValue" return="V" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Create an object of the appropriate type to be used as a value. |
| |
| @return a new value object.]]> |
| </doc> |
| </method> |
| <method name="getPos" return="long" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Returns the current position in the input. |
| |
| @return the current position in the input. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="close" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Close this {@link InputSplit} to future operations. |
| |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getProgress" return="float" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[How much of the input has the {@link RecordReader} consumed i.e. |
| has been processed by? |
| |
| @return progress from <code>0.0</code> to <code>1.0</code>. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[<code>RecordReader</code> reads <key, value> pairs from an |
| {@link InputSplit}. |
| |
| <p><code>RecordReader</code>, typically, converts the byte-oriented view of |
| the input, provided by the <code>InputSplit</code>, and presents a |
| record-oriented view for the {@link Mapper} and {@link Reducer} tasks for |
| processing. It thus assumes the responsibility of processing record |
| boundaries and presenting the tasks with keys and values.</p> |
| |
| @see InputSplit |
| @see InputFormat]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapred.RecordReader --> |
| <!-- start interface org.apache.hadoop.mapred.RecordWriter --> |
| <interface name="RecordWriter" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <method name="write" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="K"/> |
| <param name="value" type="V"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Writes a key/value pair. |
| |
| @param key the key to write. |
| @param value the value to write. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="close" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Close this <code>RecordWriter</code> to future operations. |
| |
| @param reporter facility to report progress. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[<code>RecordWriter</code> writes the output <key, value> pairs |
| to an output file. |
| |
| <p><code>RecordWriter</code> implementations write the job outputs to the |
| {@link FileSystem}. |
| |
| @see OutputFormat]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapred.RecordWriter --> |
| <!-- start interface org.apache.hadoop.mapred.Reducer --> |
| <interface name="Reducer" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.JobConfigurable"/> |
| <implements name="org.apache.hadoop.io.Closeable"/> |
| <method name="reduce" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="K2"/> |
| <param name="values" type="java.util.Iterator"/> |
| <param name="output" type="org.apache.hadoop.mapred.OutputCollector"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[<i>Reduces</i> values for a given key. |
| |
| <p>The framework calls this method for each |
| <code><key, (list of values)></code> pair in the grouped inputs. |
| Output values must be of the same type as input values. Input keys must |
| not be altered. The framework will <b>reuse</b> the key and value objects |
| that are passed into the reduce, therefore the application should clone |
| the objects they want to keep a copy of. In many cases, all values are |
| combined into zero or one value. |
| </p> |
| |
| <p>Output pairs are collected with calls to |
| {@link OutputCollector#collect(Object,Object)}.</p> |
| |
| <p>Applications can use the {@link Reporter} provided to report progress |
| or just indicate that they are alive. In scenarios where the application |
| takes a significant amount of time to process individual key/value |
| pairs, this is crucial since the framework might assume that the task has |
| timed-out and kill that task. The other way of avoiding this is to set |
| <a href="{@docRoot}/../hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml#mapreduce.task.timeout"> |
| mapreduce.task.timeout</a> to a high-enough value (or even zero for no |
| time-outs).</p> |
| |
| @param key the key. |
| @param values the list of values to reduce. |
| @param output to collect keys and combined values. |
| @param reporter facility to report progress.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Reduces a set of intermediate values which share a key to a smaller set of |
| values. |
| |
| <p>The number of <code>Reducer</code>s for the job is set by the user via |
| {@link JobConf#setNumReduceTasks(int)}. <code>Reducer</code> implementations |
| can access the {@link JobConf} for the job via the |
| {@link JobConfigurable#configure(JobConf)} method and initialize themselves. |
| Similarly they can use the {@link Closeable#close()} method for |
| de-initialization.</p> |
| |
| <p><code>Reducer</code> has 3 primary phases:</p> |
| <ol> |
| <li> |
| |
| <b id="Shuffle">Shuffle</b> |
| |
| <p><code>Reducer</code> is input the grouped output of a {@link Mapper}. |
| In the phase the framework, for each <code>Reducer</code>, fetches the |
| relevant partition of the output of all the <code>Mapper</code>s, via HTTP. |
| </p> |
| </li> |
| |
| <li> |
| <b id="Sort">Sort</b> |
| |
| <p>The framework groups <code>Reducer</code> inputs by <code>key</code>s |
| (since different <code>Mapper</code>s may have output the same key) in this |
| stage.</p> |
| |
| <p>The shuffle and sort phases occur simultaneously i.e. while outputs are |
| being fetched they are merged.</p> |
| |
| <b id="SecondarySort">SecondarySort</b> |
| |
| <p>If equivalence rules for keys while grouping the intermediates are |
| different from those for grouping keys before reduction, then one may |
| specify a <code>Comparator</code> via |
| {@link JobConf#setOutputValueGroupingComparator(Class)}.Since |
| {@link JobConf#setOutputKeyComparatorClass(Class)} can be used to |
| control how intermediate keys are grouped, these can be used in conjunction |
| to simulate <i>secondary sort on values</i>.</p> |
| |
| |
| For example, say that you want to find duplicate web pages and tag them |
| all with the url of the "best" known example. You would set up the job |
| like: |
| <ul> |
| <li>Map Input Key: url</li> |
| <li>Map Input Value: document</li> |
| <li>Map Output Key: document checksum, url pagerank</li> |
| <li>Map Output Value: url</li> |
| <li>Partitioner: by checksum</li> |
| <li>OutputKeyComparator: by checksum and then decreasing pagerank</li> |
| <li>OutputValueGroupingComparator: by checksum</li> |
| </ul> |
| </li> |
| |
| <li> |
| <b id="Reduce">Reduce</b> |
| |
| <p>In this phase the |
| {@link #reduce(Object, Iterator, OutputCollector, Reporter)} |
| method is called for each <code><key, (list of values)></code> pair in |
| the grouped inputs.</p> |
| <p>The output of the reduce task is typically written to the |
| {@link FileSystem} via |
| {@link OutputCollector#collect(Object, Object)}.</p> |
| </li> |
| </ol> |
| |
| <p>The output of the <code>Reducer</code> is <b>not re-sorted</b>.</p> |
| |
| <p>Example:</p> |
| <p><blockquote><pre> |
| public class MyReducer<K extends WritableComparable, V extends Writable> |
| extends MapReduceBase implements Reducer<K, V, K, V> { |
| |
| static enum MyCounters { NUM_RECORDS } |
| |
| private String reduceTaskId; |
| private int noKeys = 0; |
| |
| public void configure(JobConf job) { |
| reduceTaskId = job.get(JobContext.TASK_ATTEMPT_ID); |
| } |
| |
| public void reduce(K key, Iterator<V> values, |
| OutputCollector<K, V> output, |
| Reporter reporter) |
| throws IOException { |
| |
| // Process |
| int noValues = 0; |
| while (values.hasNext()) { |
| V value = values.next(); |
| |
| // Increment the no. of values for this key |
| ++noValues; |
| |
| // Process the <key, value> pair (assume this takes a while) |
| // ... |
| // ... |
| |
| // Let the framework know that we are alive, and kicking! |
| if ((noValues%10) == 0) { |
| reporter.progress(); |
| } |
| |
| // Process some more |
| // ... |
| // ... |
| |
| // Output the <key, value> |
| output.collect(key, value); |
| } |
| |
| // Increment the no. of <key, list of values> pairs processed |
| ++noKeys; |
| |
| // Increment counters |
| reporter.incrCounter(NUM_RECORDS, 1); |
| |
| // Every 100 keys update application-level status |
| if ((noKeys%100) == 0) { |
| reporter.setStatus(reduceTaskId + " processed " + noKeys); |
| } |
| } |
| } |
| </pre></blockquote> |
| |
| @see Mapper |
| @see Partitioner |
| @see Reporter |
| @see MapReduceBase]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapred.Reducer --> |
| <!-- start interface org.apache.hadoop.mapred.Reporter --> |
| <interface name="Reporter" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.util.Progressable"/> |
| <method name="setStatus" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="status" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the status description for the task. |
| |
| @param status brief description of the current status.]]> |
| </doc> |
| </method> |
| <method name="getCounter" return="org.apache.hadoop.mapred.Counters.Counter" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="name" type="java.lang.Enum"/> |
| <doc> |
| <![CDATA[Get the {@link Counter} of the given group with the given name. |
| |
| @param name counter name |
| @return the <code>Counter</code> of the given group/name.]]> |
| </doc> |
| </method> |
| <method name="getCounter" return="org.apache.hadoop.mapred.Counters.Counter" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="group" type="java.lang.String"/> |
| <param name="name" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Get the {@link Counter} of the given group with the given name. |
| |
| @param group counter group |
| @param name counter name |
| @return the <code>Counter</code> of the given group/name.]]> |
| </doc> |
| </method> |
| <method name="incrCounter" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="java.lang.Enum"/> |
| <param name="amount" type="long"/> |
| <doc> |
| <![CDATA[Increments the counter identified by the key, which can be of |
| any {@link Enum} type, by the specified amount. |
| |
| @param key key to identify the counter to be incremented. The key can be |
| be any <code>Enum</code>. |
| @param amount A non-negative amount by which the counter is to |
| be incremented.]]> |
| </doc> |
| </method> |
| <method name="incrCounter" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="group" type="java.lang.String"/> |
| <param name="counter" type="java.lang.String"/> |
| <param name="amount" type="long"/> |
| <doc> |
| <![CDATA[Increments the counter identified by the group and counter name |
| by the specified amount. |
| |
| @param group name to identify the group of the counter to be incremented. |
| @param counter name to identify the counter within the group. |
| @param amount A non-negative amount by which the counter is to |
| be incremented.]]> |
| </doc> |
| </method> |
| <method name="getInputSplit" return="org.apache.hadoop.mapred.InputSplit" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="UnsupportedOperationException" type="java.lang.UnsupportedOperationException"/> |
| <doc> |
| <![CDATA[Get the {@link InputSplit} object for a map. |
| |
| @return the <code>InputSplit</code> that the map is reading from. |
| @throws UnsupportedOperationException if called outside a mapper]]> |
| </doc> |
| </method> |
| <method name="getProgress" return="float" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the progress of the task. Progress is represented as a number between |
| 0 and 1 (inclusive).]]> |
| </doc> |
| </method> |
| <field name="NULL" type="org.apache.hadoop.mapred.Reporter" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[A constant of Reporter type that does nothing.]]> |
| </doc> |
| </field> |
| <doc> |
| <![CDATA[A facility for Map-Reduce applications to report progress and update |
| counters, status information etc. |
| |
| <p>{@link Mapper} and {@link Reducer} can use the <code>Reporter</code> |
| provided to report progress or just indicate that they are alive. In |
| scenarios where the application takes significant amount of time to |
| process individual key/value pairs, this is crucial since the framework |
| might assume that the task has timed-out and kill that task. |
| |
| <p>Applications can also update {@link Counters} via the provided |
| <code>Reporter</code> .</p> |
| |
| @see Progressable |
| @see Counters]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapred.Reporter --> |
| <!-- start interface org.apache.hadoop.mapred.RunningJob --> |
| <interface name="RunningJob" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <method name="getConfiguration" return="org.apache.hadoop.conf.Configuration" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the underlying job configuration |
| |
| @return the configuration of the job.]]> |
| </doc> |
| </method> |
| <method name="getID" return="org.apache.hadoop.mapred.JobID" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the job identifier. |
| |
| @return the job identifier.]]> |
| </doc> |
| </method> |
| <method name="getJobID" return="java.lang.String" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="This method is deprecated and will be removed. Applications should |
| rather use {@link #getID()}."> |
| <doc> |
| <![CDATA[@deprecated This method is deprecated and will be removed. Applications should |
| rather use {@link #getID()}.]]> |
| </doc> |
| </method> |
| <method name="getJobName" return="java.lang.String" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the name of the job. |
| |
| @return the name of the job.]]> |
| </doc> |
| </method> |
| <method name="getJobFile" return="java.lang.String" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the path of the submitted job configuration. |
| |
| @return the path of the submitted job configuration.]]> |
| </doc> |
| </method> |
| <method name="getTrackingURL" return="java.lang.String" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the URL where some job progress information will be displayed. |
| |
| @return the URL where some job progress information will be displayed.]]> |
| </doc> |
| </method> |
| <method name="mapProgress" return="float" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get the <i>progress</i> of the job's map-tasks, as a float between 0.0 |
| and 1.0. When all map tasks have completed, the function returns 1.0. |
| |
| @return the progress of the job's map-tasks. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="reduceProgress" return="float" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get the <i>progress</i> of the job's reduce-tasks, as a float between 0.0 |
| and 1.0. When all reduce tasks have completed, the function returns 1.0. |
| |
| @return the progress of the job's reduce-tasks. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="cleanupProgress" return="float" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get the <i>progress</i> of the job's cleanup-tasks, as a float between 0.0 |
| and 1.0. When all cleanup tasks have completed, the function returns 1.0. |
| |
| @return the progress of the job's cleanup-tasks. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="setupProgress" return="float" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get the <i>progress</i> of the job's setup-tasks, as a float between 0.0 |
| and 1.0. When all setup tasks have completed, the function returns 1.0. |
| |
| @return the progress of the job's setup-tasks. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="isComplete" return="boolean" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Check if the job is finished or not. |
| This is a non-blocking call. |
| |
| @return <code>true</code> if the job is complete, else <code>false</code>. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="isSuccessful" return="boolean" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Check if the job completed successfully. |
| |
| @return <code>true</code> if the job succeeded, else <code>false</code>. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="waitForCompletion" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Blocks until the job is complete. |
| |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getJobState" return="int" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Returns the current state of the Job. |
| {@link JobStatus} |
| |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getJobStatus" return="org.apache.hadoop.mapred.JobStatus" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Returns a snapshot of the current status, {@link JobStatus}, of the Job. |
| Need to call again for latest information. |
| |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="killJob" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Kill the running job. Blocks until all job tasks have been killed as well. |
| If the job is no longer running, it simply returns. |
| |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="setJobPriority" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="priority" type="java.lang.String"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Set the priority of a running job. |
| @param priority the new priority for the job. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getTaskCompletionEvents" return="org.apache.hadoop.mapred.TaskCompletionEvent[]" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="startFrom" type="int"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get events indicating completion (success/failure) of component tasks. |
| |
| @param startFrom index to start fetching events from |
| @return an array of {@link TaskCompletionEvent}s |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="killTask" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskId" type="org.apache.hadoop.mapred.TaskAttemptID"/> |
| <param name="shouldFail" type="boolean"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Kill indicated task attempt. |
| |
| @param taskId the id of the task to be terminated. |
| @param shouldFail if true the task is failed and added to failed tasks |
| list, otherwise it is just killed, w/o affecting |
| job failure status. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="killTask" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="Applications should rather use {@link #killTask(TaskAttemptID, boolean)}"> |
| <param name="taskId" type="java.lang.String"/> |
| <param name="shouldFail" type="boolean"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[@deprecated Applications should rather use {@link #killTask(TaskAttemptID, boolean)}]]> |
| </doc> |
| </method> |
| <method name="getCounters" return="org.apache.hadoop.mapred.Counters" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Gets the counters for this job. |
| |
| @return the counters for this job or null if the job has been retired. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getTaskDiagnostics" return="java.lang.String[]" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskid" type="org.apache.hadoop.mapred.TaskAttemptID"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Gets the diagnostic messages for a given task attempt. |
| @param taskid |
| @return the list of diagnostic messages for the task |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getHistoryUrl" return="java.lang.String" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get the url where history file is archived. Returns empty string if |
| history file is not available yet. |
| |
| @return the url where history file is archived |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="isRetired" return="boolean" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Check whether the job has been removed from JobTracker memory and retired. |
| On retire, the job history file is copied to a location known by |
| {@link #getHistoryUrl()} |
| @return <code>true</code> if the job retired, else <code>false</code>. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getFailureInfo" return="java.lang.String" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get failure info for the job. |
| @return the failure info for the job. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[<code>RunningJob</code> is the user-interface to query for details on a |
| running Map-Reduce job. |
| |
| <p>Clients can get hold of <code>RunningJob</code> via the {@link JobClient} |
| and then query the running-job for details such as name, configuration, |
| progress etc.</p> |
| |
| @see JobClient]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapred.RunningJob --> |
| <!-- start class org.apache.hadoop.mapred.SequenceFileAsBinaryInputFormat --> |
| <class name="SequenceFileAsBinaryInputFormat" extends="org.apache.hadoop.mapred.SequenceFileInputFormat" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="SequenceFileAsBinaryInputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="split" type="org.apache.hadoop.mapred.InputSplit"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[InputFormat reading keys, values from SequenceFiles in binary (raw) |
| format.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.SequenceFileAsBinaryInputFormat --> |
| <!-- start class org.apache.hadoop.mapred.SequenceFileAsBinaryOutputFormat --> |
| <class name="SequenceFileAsBinaryOutputFormat" extends="org.apache.hadoop.mapred.SequenceFileOutputFormat" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="SequenceFileAsBinaryOutputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="setSequenceFileOutputKeyClass" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="theClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Set the key class for the {@link SequenceFile} |
| <p>This allows the user to specify the key class to be different |
| from the actual class ({@link BytesWritable}) used for writing </p> |
| |
| @param conf the {@link JobConf} to modify |
| @param theClass the SequenceFile output key class.]]> |
| </doc> |
| </method> |
| <method name="setSequenceFileOutputValueClass" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="theClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Set the value class for the {@link SequenceFile} |
| <p>This allows the user to specify the value class to be different |
| from the actual class ({@link BytesWritable}) used for writing </p> |
| |
| @param conf the {@link JobConf} to modify |
| @param theClass the SequenceFile output key class.]]> |
| </doc> |
| </method> |
| <method name="getSequenceFileOutputKeyClass" return="java.lang.Class" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <doc> |
| <![CDATA[Get the key class for the {@link SequenceFile} |
| |
| @return the key class of the {@link SequenceFile}]]> |
| </doc> |
| </method> |
| <method name="getSequenceFileOutputValueClass" return="java.lang.Class" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <doc> |
| <![CDATA[Get the value class for the {@link SequenceFile} |
| |
| @return the value class of the {@link SequenceFile}]]> |
| </doc> |
| </method> |
| <method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="ignored" type="org.apache.hadoop.fs.FileSystem"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="name" type="java.lang.String"/> |
| <param name="progress" type="org.apache.hadoop.util.Progressable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="checkOutputSpecs" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="ignored" type="org.apache.hadoop.fs.FileSystem"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[An {@link OutputFormat} that writes keys, values to |
| {@link SequenceFile}s in binary(raw) format]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.SequenceFileAsBinaryOutputFormat --> |
| <!-- start class org.apache.hadoop.mapred.SequenceFileAsTextInputFormat --> |
| <class name="SequenceFileAsTextInputFormat" extends="org.apache.hadoop.mapred.SequenceFileInputFormat" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="SequenceFileAsTextInputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="split" type="org.apache.hadoop.mapred.InputSplit"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[This class is similar to SequenceFileInputFormat, |
| except it generates SequenceFileAsTextRecordReader |
| which converts the input keys and values to their |
| String forms by calling toString() method.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.SequenceFileAsTextInputFormat --> |
| <!-- start class org.apache.hadoop.mapred.SequenceFileAsTextRecordReader --> |
| <class name="SequenceFileAsTextRecordReader" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.RecordReader"/> |
| <constructor name="SequenceFileAsTextRecordReader" type="org.apache.hadoop.conf.Configuration, org.apache.hadoop.mapred.FileSplit" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </constructor> |
| <method name="createKey" return="org.apache.hadoop.io.Text" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="createValue" return="org.apache.hadoop.io.Text" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="next" return="boolean" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="org.apache.hadoop.io.Text"/> |
| <param name="value" type="org.apache.hadoop.io.Text"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Read key/value pair in a line.]]> |
| </doc> |
| </method> |
| <method name="getProgress" return="float" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getPos" return="long" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="close" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[This class converts the input keys and values to their String forms by calling toString() |
| method. This class to SequenceFileAsTextInputFormat class is as LineRecordReader |
| class to TextInputFormat class.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.SequenceFileAsTextRecordReader --> |
| <!-- start class org.apache.hadoop.mapred.SequenceFileInputFilter --> |
| <class name="SequenceFileInputFilter" extends="org.apache.hadoop.mapred.SequenceFileInputFormat" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="SequenceFileInputFilter" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="split" type="org.apache.hadoop.mapred.InputSplit"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Create a record reader for the given split |
| @param split file split |
| @param job job configuration |
| @param reporter reporter who sends report to task tracker |
| @return RecordReader]]> |
| </doc> |
| </method> |
| <method name="setFilterClass" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <param name="filterClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[set the filter class |
| |
| @param conf application configuration |
| @param filterClass filter class]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[A class that allows a map/red job to work on a sample of sequence files. |
| The sample is decided by the filter class set by the job.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.SequenceFileInputFilter --> |
| <!-- start class org.apache.hadoop.mapred.SequenceFileInputFormat --> |
| <class name="SequenceFileInputFormat" extends="org.apache.hadoop.mapred.FileInputFormat" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="SequenceFileInputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="listStatus" return="org.apache.hadoop.fs.FileStatus[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="split" type="org.apache.hadoop.mapred.InputSplit"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[An {@link InputFormat} for {@link SequenceFile}s.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.SequenceFileInputFormat --> |
| <!-- start class org.apache.hadoop.mapred.SequenceFileOutputFormat --> |
| <class name="SequenceFileOutputFormat" extends="org.apache.hadoop.mapred.FileOutputFormat" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="SequenceFileOutputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="ignored" type="org.apache.hadoop.fs.FileSystem"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="name" type="java.lang.String"/> |
| <param name="progress" type="org.apache.hadoop.util.Progressable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getReaders" return="org.apache.hadoop.io.SequenceFile.Reader[]" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <param name="dir" type="org.apache.hadoop.fs.Path"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Open the output generated by this format.]]> |
| </doc> |
| </method> |
| <method name="getOutputCompressionType" return="org.apache.hadoop.io.SequenceFile.CompressionType" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <doc> |
| <![CDATA[Get the {@link CompressionType} for the output {@link SequenceFile}. |
| @param conf the {@link JobConf} |
| @return the {@link CompressionType} for the output {@link SequenceFile}, |
| defaulting to {@link CompressionType#RECORD}]]> |
| </doc> |
| </method> |
| <method name="setOutputCompressionType" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="style" type="org.apache.hadoop.io.SequenceFile.CompressionType"/> |
| <doc> |
| <![CDATA[Set the {@link CompressionType} for the output {@link SequenceFile}. |
| @param conf the {@link JobConf} to modify |
| @param style the {@link CompressionType} for the output |
| {@link SequenceFile}]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[An {@link OutputFormat} that writes {@link SequenceFile}s.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.SequenceFileOutputFormat --> |
| <!-- start class org.apache.hadoop.mapred.SequenceFileRecordReader --> |
| <class name="SequenceFileRecordReader" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.RecordReader"/> |
| <constructor name="SequenceFileRecordReader" type="org.apache.hadoop.conf.Configuration, org.apache.hadoop.mapred.FileSplit" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </constructor> |
| <method name="getKeyClass" return="java.lang.Class" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[The class of key that must be passed to {@link |
| #next(Object, Object)}..]]> |
| </doc> |
| </method> |
| <method name="getValueClass" return="java.lang.Class" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[The class of value that must be passed to {@link |
| #next(Object, Object)}..]]> |
| </doc> |
| </method> |
| <method name="createKey" return="K" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="createValue" return="V" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="next" return="boolean" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="K"/> |
| <param name="value" type="V"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="next" return="boolean" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="key" type="K"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getCurrentValue" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="value" type="V"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getProgress" return="float" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Return the progress within the input split |
| @return 0.0 to 1.0 of the input byte range]]> |
| </doc> |
| </method> |
| <method name="getPos" return="long" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="seek" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="pos" type="long"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="close" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <field name="conf" type="org.apache.hadoop.conf.Configuration" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[An {@link RecordReader} for {@link SequenceFile}s.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.SequenceFileRecordReader --> |
| <!-- start class org.apache.hadoop.mapred.SkipBadRecords --> |
| <class name="SkipBadRecords" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="SkipBadRecords" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getAttemptsToStartSkipping" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <doc> |
| <![CDATA[Get the number of Task attempts AFTER which skip mode |
| will be kicked off. When skip mode is kicked off, the |
| tasks reports the range of records which it will process |
| next to the TaskTracker. So that on failures, TT knows which |
| ones are possibly the bad records. On further executions, |
| those are skipped. |
| Default value is 2. |
| |
| @param conf the configuration |
| @return attemptsToStartSkipping no of task attempts]]> |
| </doc> |
| </method> |
| <method name="setAttemptsToStartSkipping" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <param name="attemptsToStartSkipping" type="int"/> |
| <doc> |
| <![CDATA[Set the number of Task attempts AFTER which skip mode |
| will be kicked off. When skip mode is kicked off, the |
| tasks reports the range of records which it will process |
| next to the TaskTracker. So that on failures, TT knows which |
| ones are possibly the bad records. On further executions, |
| those are skipped. |
| Default value is 2. |
| |
| @param conf the configuration |
| @param attemptsToStartSkipping no of task attempts]]> |
| </doc> |
| </method> |
| <method name="getAutoIncrMapperProcCount" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <doc> |
| <![CDATA[Get the flag which if set to true, |
| {@link SkipBadRecords#COUNTER_MAP_PROCESSED_RECORDS} is incremented |
| by MapRunner after invoking the map function. This value must be set to |
| false for applications which process the records asynchronously |
| or buffer the input records. For example streaming. |
| In such cases applications should increment this counter on their own. |
| Default value is true. |
| |
| @param conf the configuration |
| @return <code>true</code> if auto increment |
| {@link SkipBadRecords#COUNTER_MAP_PROCESSED_RECORDS}. |
| <code>false</code> otherwise.]]> |
| </doc> |
| </method> |
| <method name="setAutoIncrMapperProcCount" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <param name="autoIncr" type="boolean"/> |
| <doc> |
| <![CDATA[Set the flag which if set to true, |
| {@link SkipBadRecords#COUNTER_MAP_PROCESSED_RECORDS} is incremented |
| by MapRunner after invoking the map function. This value must be set to |
| false for applications which process the records asynchronously |
| or buffer the input records. For example streaming. |
| In such cases applications should increment this counter on their own. |
| Default value is true. |
| |
| @param conf the configuration |
| @param autoIncr whether to auto increment |
| {@link SkipBadRecords#COUNTER_MAP_PROCESSED_RECORDS}.]]> |
| </doc> |
| </method> |
| <method name="getAutoIncrReducerProcCount" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <doc> |
| <![CDATA[Get the flag which if set to true, |
| {@link SkipBadRecords#COUNTER_REDUCE_PROCESSED_GROUPS} is incremented |
| by framework after invoking the reduce function. This value must be set to |
| false for applications which process the records asynchronously |
| or buffer the input records. For example streaming. |
| In such cases applications should increment this counter on their own. |
| Default value is true. |
| |
| @param conf the configuration |
| @return <code>true</code> if auto increment |
| {@link SkipBadRecords#COUNTER_REDUCE_PROCESSED_GROUPS}. |
| <code>false</code> otherwise.]]> |
| </doc> |
| </method> |
| <method name="setAutoIncrReducerProcCount" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <param name="autoIncr" type="boolean"/> |
| <doc> |
| <![CDATA[Set the flag which if set to true, |
| {@link SkipBadRecords#COUNTER_REDUCE_PROCESSED_GROUPS} is incremented |
| by framework after invoking the reduce function. This value must be set to |
| false for applications which process the records asynchronously |
| or buffer the input records. For example streaming. |
| In such cases applications should increment this counter on their own. |
| Default value is true. |
| |
| @param conf the configuration |
| @param autoIncr whether to auto increment |
| {@link SkipBadRecords#COUNTER_REDUCE_PROCESSED_GROUPS}.]]> |
| </doc> |
| </method> |
| <method name="getSkipOutputPath" return="org.apache.hadoop.fs.Path" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <doc> |
| <![CDATA[Get the directory to which skipped records are written. By default it is |
| the sub directory of the output _logs directory. |
| User can stop writing skipped records by setting the value null. |
| |
| @param conf the configuration. |
| @return path skip output directory. Null is returned if this is not set |
| and output directory is also not set.]]> |
| </doc> |
| </method> |
| <method name="setSkipOutputPath" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="path" type="org.apache.hadoop.fs.Path"/> |
| <doc> |
| <![CDATA[Set the directory to which skipped records are written. By default it is |
| the sub directory of the output _logs directory. |
| User can stop writing skipped records by setting the value null. |
| |
| @param conf the configuration. |
| @param path skip output directory path]]> |
| </doc> |
| </method> |
| <method name="getMapperMaxSkipRecords" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <doc> |
| <![CDATA[Get the number of acceptable skip records surrounding the bad record PER |
| bad record in mapper. The number includes the bad record as well. |
| To turn the feature of detection/skipping of bad records off, set the |
| value to 0. |
| The framework tries to narrow down the skipped range by retrying |
| until this threshold is met OR all attempts get exhausted for this task. |
| Set the value to Long.MAX_VALUE to indicate that framework need not try to |
| narrow down. Whatever records(depends on application) get skipped are |
| acceptable. |
| Default value is 0. |
| |
| @param conf the configuration |
| @return maxSkipRecs acceptable skip records.]]> |
| </doc> |
| </method> |
| <method name="setMapperMaxSkipRecords" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <param name="maxSkipRecs" type="long"/> |
| <doc> |
| <![CDATA[Set the number of acceptable skip records surrounding the bad record PER |
| bad record in mapper. The number includes the bad record as well. |
| To turn the feature of detection/skipping of bad records off, set the |
| value to 0. |
| The framework tries to narrow down the skipped range by retrying |
| until this threshold is met OR all attempts get exhausted for this task. |
| Set the value to Long.MAX_VALUE to indicate that framework need not try to |
| narrow down. Whatever records(depends on application) get skipped are |
| acceptable. |
| Default value is 0. |
| |
| @param conf the configuration |
| @param maxSkipRecs acceptable skip records.]]> |
| </doc> |
| </method> |
| <method name="getReducerMaxSkipGroups" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <doc> |
| <![CDATA[Get the number of acceptable skip groups surrounding the bad group PER |
| bad group in reducer. The number includes the bad group as well. |
| To turn the feature of detection/skipping of bad groups off, set the |
| value to 0. |
| The framework tries to narrow down the skipped range by retrying |
| until this threshold is met OR all attempts get exhausted for this task. |
| Set the value to Long.MAX_VALUE to indicate that framework need not try to |
| narrow down. Whatever groups(depends on application) get skipped are |
| acceptable. |
| Default value is 0. |
| |
| @param conf the configuration |
| @return maxSkipGrps acceptable skip groups.]]> |
| </doc> |
| </method> |
| <method name="setReducerMaxSkipGroups" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <param name="maxSkipGrps" type="long"/> |
| <doc> |
| <![CDATA[Set the number of acceptable skip groups surrounding the bad group PER |
| bad group in reducer. The number includes the bad group as well. |
| To turn the feature of detection/skipping of bad groups off, set the |
| value to 0. |
| The framework tries to narrow down the skipped range by retrying |
| until this threshold is met OR all attempts get exhausted for this task. |
| Set the value to Long.MAX_VALUE to indicate that framework need not try to |
| narrow down. Whatever groups(depends on application) get skipped are |
| acceptable. |
| Default value is 0. |
| |
| @param conf the configuration |
| @param maxSkipGrps acceptable skip groups.]]> |
| </doc> |
| </method> |
| <field name="COUNTER_GROUP" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Special counters which are written by the application and are |
| used by the framework for detecting bad records. For detecting bad records |
| these counters must be incremented by the application.]]> |
| </doc> |
| </field> |
| <field name="COUNTER_MAP_PROCESSED_RECORDS" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Number of processed map records. |
| @see SkipBadRecords#getAutoIncrMapperProcCount(Configuration)]]> |
| </doc> |
| </field> |
| <field name="COUNTER_REDUCE_PROCESSED_GROUPS" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Number of processed reduce groups. |
| @see SkipBadRecords#getAutoIncrReducerProcCount(Configuration)]]> |
| </doc> |
| </field> |
| <doc> |
| <![CDATA[Utility class for skip bad records functionality. It contains various |
| settings related to skipping of bad records. |
| |
| <p>Hadoop provides an optional mode of execution in which the bad records |
| are detected and skipped in further attempts. |
| |
| <p>This feature can be used when map/reduce tasks crashes deterministically on |
| certain input. This happens due to bugs in the map/reduce function. The usual |
| course would be to fix these bugs. But sometimes this is not possible; |
| perhaps the bug is in third party libraries for which the source code is |
| not available. Due to this, the task never reaches to completion even with |
| multiple attempts and complete data for that task is lost.</p> |
| |
| <p>With this feature, only a small portion of data is lost surrounding |
| the bad record, which may be acceptable for some user applications. |
| see {@link SkipBadRecords#setMapperMaxSkipRecords(Configuration, long)}</p> |
| |
| <p>The skipping mode gets kicked off after certain no of failures |
| see {@link SkipBadRecords#setAttemptsToStartSkipping(Configuration, int)}</p> |
| |
| <p>In the skipping mode, the map/reduce task maintains the record range which |
| is getting processed at all times. Before giving the input to the |
| map/reduce function, it sends this record range to the Task tracker. |
| If task crashes, the Task tracker knows which one was the last reported |
| range. On further attempts that range get skipped.</p>]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.SkipBadRecords --> |
| <!-- start class org.apache.hadoop.mapred.SplitLocationInfo --> |
| <class name="SplitLocationInfo" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="SplitLocationInfo" type="java.lang.String, boolean" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="isOnDisk" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="isInMemory" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getLocation" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.SplitLocationInfo --> |
| <!-- start interface org.apache.hadoop.mapred.TaskAttemptContext --> |
| <interface name="TaskAttemptContext" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <method name="getTaskAttemptID" return="org.apache.hadoop.mapred.TaskAttemptID" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getProgressible" return="org.apache.hadoop.util.Progressable" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getJobConf" return="org.apache.hadoop.mapred.JobConf" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapred.TaskAttemptContext --> |
| <!-- start class org.apache.hadoop.mapred.TaskAttemptID --> |
| <class name="TaskAttemptID" extends="org.apache.hadoop.mapreduce.TaskAttemptID" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="TaskAttemptID" type="org.apache.hadoop.mapred.TaskID, int" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Constructs a TaskAttemptID object from given {@link TaskID}. |
| @param taskId TaskID that this task belongs to |
| @param id the task attempt number]]> |
| </doc> |
| </constructor> |
| <constructor name="TaskAttemptID" type="java.lang.String, int, boolean, int, int" |
| static="false" final="false" visibility="public" |
| deprecated="Use {@link #TaskAttemptID(String, int, TaskType, int, int)}."> |
| <doc> |
| <![CDATA[Constructs a TaskId object from given parts. |
| @param jtIdentifier jobTracker identifier |
| @param jobId job number |
| @param isMap whether the tip is a map |
| @param taskId taskId number |
| @param id the task attempt number |
| @deprecated Use {@link #TaskAttemptID(String, int, TaskType, int, int)}.]]> |
| </doc> |
| </constructor> |
| <constructor name="TaskAttemptID" type="java.lang.String, int, org.apache.hadoop.mapreduce.TaskType, int, int" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Constructs a TaskId object from given parts. |
| @param jtIdentifier jobTracker identifier |
| @param jobId job number |
| @param type the TaskType |
| @param taskId taskId number |
| @param id the task attempt number]]> |
| </doc> |
| </constructor> |
| <constructor name="TaskAttemptID" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="downgrade" return="org.apache.hadoop.mapred.TaskAttemptID" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="old" type="org.apache.hadoop.mapreduce.TaskAttemptID"/> |
| <doc> |
| <![CDATA[Downgrade a new TaskAttemptID to an old one |
| @param old the new id |
| @return either old or a new TaskAttemptID constructed to match old]]> |
| </doc> |
| </method> |
| <method name="getTaskID" return="org.apache.hadoop.mapred.TaskID" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getJobID" return="org.apache.hadoop.mapred.JobID" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="read" return="org.apache.hadoop.mapred.TaskAttemptID" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="in" type="java.io.DataInput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="forName" return="org.apache.hadoop.mapred.TaskAttemptID" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="str" type="java.lang.String"/> |
| <exception name="IllegalArgumentException" type="java.lang.IllegalArgumentException"/> |
| <doc> |
| <![CDATA[Construct a TaskAttemptID object from given string |
| @return constructed TaskAttemptID object or null if the given String is null |
| @throws IllegalArgumentException if the given string is malformed]]> |
| </doc> |
| </method> |
| <method name="getTaskAttemptIDsPattern" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jtIdentifier" type="java.lang.String"/> |
| <param name="jobId" type="java.lang.Integer"/> |
| <param name="isMap" type="java.lang.Boolean"/> |
| <param name="taskId" type="java.lang.Integer"/> |
| <param name="attemptId" type="java.lang.Integer"/> |
| <doc> |
| <![CDATA[Returns a regex pattern which matches task attempt IDs. Arguments can |
| be given null, in which case that part of the regex will be generic. |
| For example to obtain a regex matching <i>all task attempt IDs</i> |
| of <i>any jobtracker</i>, in <i>any job</i>, of the <i>first |
| map task</i>, we would use : |
| <pre> |
| TaskAttemptID.getTaskAttemptIDsPattern(null, null, true, 1, null); |
| </pre> |
| which will return : |
| <pre> "attempt_[^_]*_[0-9]*_m_000001_[0-9]*" </pre> |
| @param jtIdentifier jobTracker identifier, or null |
| @param jobId job number, or null |
| @param isMap whether the tip is a map, or null |
| @param taskId taskId number, or null |
| @param attemptId the task attempt number, or null |
| @return a regex pattern matching TaskAttemptIDs]]> |
| </doc> |
| </method> |
| <method name="getTaskAttemptIDsPattern" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jtIdentifier" type="java.lang.String"/> |
| <param name="jobId" type="java.lang.Integer"/> |
| <param name="type" type="org.apache.hadoop.mapreduce.TaskType"/> |
| <param name="taskId" type="java.lang.Integer"/> |
| <param name="attemptId" type="java.lang.Integer"/> |
| <doc> |
| <![CDATA[Returns a regex pattern which matches task attempt IDs. Arguments can |
| be given null, in which case that part of the regex will be generic. |
| For example to obtain a regex matching <i>all task attempt IDs</i> |
| of <i>any jobtracker</i>, in <i>any job</i>, of the <i>first |
| map task</i>, we would use : |
| <pre> |
| TaskAttemptID.getTaskAttemptIDsPattern(null, null, TaskType.MAP, 1, null); |
| </pre> |
| which will return : |
| <pre> "attempt_[^_]*_[0-9]*_m_000001_[0-9]*" </pre> |
| @param jtIdentifier jobTracker identifier, or null |
| @param jobId job number, or null |
| @param type the {@link TaskType} |
| @param taskId taskId number, or null |
| @param attemptId the task attempt number, or null |
| @return a regex pattern matching TaskAttemptIDs]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[TaskAttemptID represents the immutable and unique identifier for |
| a task attempt. Each task attempt is one particular instance of a Map or |
| Reduce Task identified by its TaskID. |
| |
| TaskAttemptID consists of 2 parts. First part is the |
| {@link TaskID}, that this TaskAttemptID belongs to. |
| Second part is the task attempt number. <br> |
| An example TaskAttemptID is : |
| <code>attempt_200707121733_0003_m_000005_0</code> , which represents the |
| zeroth task attempt for the fifth map task in the third job |
| running at the jobtracker started at <code>200707121733</code>. |
| <p> |
| Applications should never construct or parse TaskAttemptID strings |
| , but rather use appropriate constructors or {@link #forName(String)} |
| method. |
| |
| @see JobID |
| @see TaskID]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.TaskAttemptID --> |
| <!-- start class org.apache.hadoop.mapred.TaskCompletionEvent --> |
| <class name="TaskCompletionEvent" extends="org.apache.hadoop.mapreduce.TaskCompletionEvent" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="TaskCompletionEvent" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Default constructor for Writable.]]> |
| </doc> |
| </constructor> |
| <constructor name="TaskCompletionEvent" type="int, org.apache.hadoop.mapred.TaskAttemptID, int, boolean, org.apache.hadoop.mapred.TaskCompletionEvent.Status, java.lang.String" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Constructor. eventId should be created externally and incremented |
| per event for each job. |
| @param eventId event id, event id should be unique and assigned in |
| incrementally, starting from 0. |
| @param taskId task id |
| @param status task's status |
| @param taskTrackerHttp task tracker's host:port for http.]]> |
| </doc> |
| </constructor> |
| <method name="getTaskId" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="use {@link #getTaskAttemptId()} instead."> |
| <doc> |
| <![CDATA[Returns task id. |
| @return task id |
| @deprecated use {@link #getTaskAttemptId()} instead.]]> |
| </doc> |
| </method> |
| <method name="getTaskAttemptId" return="org.apache.hadoop.mapred.TaskAttemptID" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns task id. |
| @return task id]]> |
| </doc> |
| </method> |
| <method name="getTaskStatus" return="org.apache.hadoop.mapred.TaskCompletionEvent.Status" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns {@link Status} |
| @return task completion status]]> |
| </doc> |
| </method> |
| <method name="setTaskId" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="use {@link #setTaskAttemptId(TaskAttemptID)} instead."> |
| <param name="taskId" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Sets task id. |
| @param taskId |
| @deprecated use {@link #setTaskAttemptId(TaskAttemptID)} instead.]]> |
| </doc> |
| </method> |
| <method name="setTaskID" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="use {@link #setTaskAttemptId(TaskAttemptID)} instead."> |
| <param name="taskId" type="org.apache.hadoop.mapred.TaskAttemptID"/> |
| <doc> |
| <![CDATA[Sets task id. |
| @param taskId |
| @deprecated use {@link #setTaskAttemptId(TaskAttemptID)} instead.]]> |
| </doc> |
| </method> |
| <method name="setTaskAttemptId" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="taskId" type="org.apache.hadoop.mapred.TaskAttemptID"/> |
| <doc> |
| <![CDATA[Sets task id. |
| @param taskId]]> |
| </doc> |
| </method> |
| <field name="EMPTY_ARRAY" type="org.apache.hadoop.mapred.TaskCompletionEvent[]" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[This is used to track task completion events on |
| job tracker.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.TaskCompletionEvent --> |
| <!-- start class org.apache.hadoop.mapred.TaskCompletionEvent.Status --> |
| <class name="TaskCompletionEvent.Status" extends="java.lang.Enum" |
| abstract="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <method name="values" return="org.apache.hadoop.mapred.TaskCompletionEvent.Status[]" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="valueOf" return="org.apache.hadoop.mapred.TaskCompletionEvent.Status" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="name" type="java.lang.String"/> |
| </method> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.TaskCompletionEvent.Status --> |
| <!-- start class org.apache.hadoop.mapred.TaskID --> |
| <class name="TaskID" extends="org.apache.hadoop.mapreduce.TaskID" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="TaskID" type="org.apache.hadoop.mapreduce.JobID, boolean, int" |
| static="false" final="false" visibility="public" |
| deprecated="Use {@link #TaskID(String, int, TaskType, int)}"> |
| <doc> |
| <![CDATA[Constructs a TaskID object from given {@link JobID}. |
| @param jobId JobID that this tip belongs to |
| @param isMap whether the tip is a map |
| @param id the tip number |
| @deprecated Use {@link #TaskID(String, int, TaskType, int)}]]> |
| </doc> |
| </constructor> |
| <constructor name="TaskID" type="java.lang.String, int, boolean, int" |
| static="false" final="false" visibility="public" |
| deprecated="Use {@link #TaskID(org.apache.hadoop.mapreduce.JobID, TaskType, |
| int)}"> |
| <doc> |
| <![CDATA[Constructs a TaskInProgressId object from given parts. |
| @param jtIdentifier jobTracker identifier |
| @param jobId job number |
| @param isMap whether the tip is a map |
| @param id the tip number |
| @deprecated Use {@link #TaskID(org.apache.hadoop.mapreduce.JobID, TaskType, |
| int)}]]> |
| </doc> |
| </constructor> |
| <constructor name="TaskID" type="org.apache.hadoop.mapreduce.JobID, org.apache.hadoop.mapreduce.TaskType, int" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Constructs a TaskID object from given {@link JobID}. |
| @param jobId JobID that this tip belongs to |
| @param type the {@link TaskType} |
| @param id the tip number]]> |
| </doc> |
| </constructor> |
| <constructor name="TaskID" type="java.lang.String, int, org.apache.hadoop.mapreduce.TaskType, int" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Constructs a TaskInProgressId object from given parts. |
| @param jtIdentifier jobTracker identifier |
| @param jobId job number |
| @param type the {@link TaskType} |
| @param id the tip number]]> |
| </doc> |
| </constructor> |
| <constructor name="TaskID" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="downgrade" return="org.apache.hadoop.mapred.TaskID" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="old" type="org.apache.hadoop.mapreduce.TaskID"/> |
| <doc> |
| <![CDATA[Downgrade a new TaskID to an old one |
| @param old a new or old TaskID |
| @return either old or a new TaskID build to match old]]> |
| </doc> |
| </method> |
| <method name="read" return="org.apache.hadoop.mapred.TaskID" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="in" type="java.io.DataInput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getJobID" return="org.apache.hadoop.mapred.JobID" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getTaskIDsPattern" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="Use {@link TaskID#getTaskIDsPattern(String, Integer, TaskType, |
| Integer)}"> |
| <param name="jtIdentifier" type="java.lang.String"/> |
| <param name="jobId" type="java.lang.Integer"/> |
| <param name="isMap" type="java.lang.Boolean"/> |
| <param name="taskId" type="java.lang.Integer"/> |
| <doc> |
| <![CDATA[Returns a regex pattern which matches task IDs. Arguments can |
| be given null, in which case that part of the regex will be generic. |
| For example to obtain a regex matching <i>the first map task</i> |
| of <i>any jobtracker</i>, of <i>any job</i>, we would use : |
| <pre> |
| TaskID.getTaskIDsPattern(null, null, true, 1); |
| </pre> |
| which will return : |
| <pre> "task_[^_]*_[0-9]*_m_000001*" </pre> |
| @param jtIdentifier jobTracker identifier, or null |
| @param jobId job number, or null |
| @param isMap whether the tip is a map, or null |
| @param taskId taskId number, or null |
| @return a regex pattern matching TaskIDs |
| @deprecated Use {@link TaskID#getTaskIDsPattern(String, Integer, TaskType, |
| Integer)}]]> |
| </doc> |
| </method> |
| <method name="getTaskIDsPattern" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jtIdentifier" type="java.lang.String"/> |
| <param name="jobId" type="java.lang.Integer"/> |
| <param name="type" type="org.apache.hadoop.mapreduce.TaskType"/> |
| <param name="taskId" type="java.lang.Integer"/> |
| <doc> |
| <![CDATA[Returns a regex pattern which matches task IDs. Arguments can |
| be given null, in which case that part of the regex will be generic. |
| For example to obtain a regex matching <i>the first map task</i> |
| of <i>any jobtracker</i>, of <i>any job</i>, we would use : |
| <pre> |
| TaskID.getTaskIDsPattern(null, null, true, 1); |
| </pre> |
| which will return : |
| <pre> "task_[^_]*_[0-9]*_m_000001*" </pre> |
| @param jtIdentifier jobTracker identifier, or null |
| @param jobId job number, or null |
| @param type the {@link TaskType}, or null |
| @param taskId taskId number, or null |
| @return a regex pattern matching TaskIDs]]> |
| </doc> |
| </method> |
| <method name="forName" return="org.apache.hadoop.mapred.TaskID" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="str" type="java.lang.String"/> |
| <exception name="IllegalArgumentException" type="java.lang.IllegalArgumentException"/> |
| </method> |
| <doc> |
| <![CDATA[TaskID represents the immutable and unique identifier for |
| a Map or Reduce Task. Each TaskID encompasses multiple attempts made to |
| execute the Map or Reduce Task, each of which are uniquely indentified by |
| their TaskAttemptID. |
| |
| TaskID consists of 3 parts. First part is the {@link JobID}, that this |
| TaskInProgress belongs to. Second part of the TaskID is either 'm' or 'r' |
| representing whether the task is a map task or a reduce task. |
| And the third part is the task number. <br> |
| An example TaskID is : |
| <code>task_200707121733_0003_m_000005</code> , which represents the |
| fifth map task in the third job running at the jobtracker |
| started at <code>200707121733</code>. |
| <p> |
| Applications should never construct or parse TaskID strings |
| , but rather use appropriate constructors or {@link #forName(String)} |
| method. |
| |
| @see JobID |
| @see TaskAttemptID]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.TaskID --> |
| <!-- start class org.apache.hadoop.mapred.TaskReport --> |
| <class name="TaskReport" extends="org.apache.hadoop.mapreduce.TaskReport" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="TaskReport" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getTaskId" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[The string of the task id.]]> |
| </doc> |
| </method> |
| <method name="getTaskID" return="org.apache.hadoop.mapred.TaskID" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[The id of the task.]]> |
| </doc> |
| </method> |
| <method name="getCounters" return="org.apache.hadoop.mapred.Counters" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="setSuccessfulAttempt" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="t" type="org.apache.hadoop.mapred.TaskAttemptID"/> |
| <doc> |
| <![CDATA[set successful attempt ID of the task.]]> |
| </doc> |
| </method> |
| <method name="getSuccessfulTaskAttempt" return="org.apache.hadoop.mapred.TaskAttemptID" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the attempt ID that took this task to completion]]> |
| </doc> |
| </method> |
| <method name="setRunningTaskAttempts" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="runningAttempts" type="java.util.Collection"/> |
| <doc> |
| <![CDATA[set running attempt(s) of the task.]]> |
| </doc> |
| </method> |
| <method name="getRunningTaskAttempts" return="java.util.Collection" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the running task attempt IDs for this task]]> |
| </doc> |
| </method> |
| <method name="setFinishTime" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="finishTime" type="long"/> |
| <doc> |
| <![CDATA[set finish time of task. |
| @param finishTime finish time of task.]]> |
| </doc> |
| </method> |
| <method name="setStartTime" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="startTime" type="long"/> |
| <doc> |
| <![CDATA[set start time of the task.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[A report on the state of a task.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.TaskReport --> |
| <!-- start class org.apache.hadoop.mapred.TextInputFormat --> |
| <class name="TextInputFormat" extends="org.apache.hadoop.mapred.FileInputFormat" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.JobConfigurable"/> |
| <constructor name="TextInputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="configure" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| </method> |
| <method name="isSplitable" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="fs" type="org.apache.hadoop.fs.FileSystem"/> |
| <param name="file" type="org.apache.hadoop.fs.Path"/> |
| </method> |
| <method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="genericSplit" type="org.apache.hadoop.mapred.InputSplit"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[An {@link InputFormat} for plain text files. Files are broken into lines. |
| Either linefeed or carriage-return are used to signal end of line. Keys are |
| the position in the file, and values are the line of text..]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.TextInputFormat --> |
| <!-- start class org.apache.hadoop.mapred.TextOutputFormat --> |
| <class name="TextOutputFormat" extends="org.apache.hadoop.mapred.FileOutputFormat" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="TextOutputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="ignored" type="org.apache.hadoop.fs.FileSystem"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="name" type="java.lang.String"/> |
| <param name="progress" type="org.apache.hadoop.util.Progressable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[An {@link OutputFormat} that writes plain text files.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.TextOutputFormat --> |
| <!-- start class org.apache.hadoop.mapred.Utils --> |
| <class name="Utils" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="Utils" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <doc> |
| <![CDATA[A utility class. It provides |
| A path filter utility to filter out output/part files in the output dir]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.Utils --> |
| </package> |
| <package name="org.apache.hadoop.mapred.jobcontrol"> |
| <!-- start class org.apache.hadoop.mapred.jobcontrol.Job --> |
| <class name="Job" extends="org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="Job" type="org.apache.hadoop.mapred.JobConf, java.util.ArrayList" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Construct a job. |
| @param jobConf a mapred job configuration representing a job to be executed. |
| @param dependingJobs an array of jobs the current job depends on]]> |
| </doc> |
| </constructor> |
| <constructor name="Job" type="org.apache.hadoop.mapred.JobConf" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </constructor> |
| <method name="getAssignedJobID" return="org.apache.hadoop.mapred.JobID" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the mapred ID of this job as assigned by the mapred framework.]]> |
| </doc> |
| </method> |
| <method name="setAssignedJobID" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="setAssignedJobID should not be called. |
| JOBID is set by the framework."> |
| <param name="mapredJobID" type="org.apache.hadoop.mapred.JobID"/> |
| <doc> |
| <![CDATA[@deprecated setAssignedJobID should not be called. |
| JOBID is set by the framework.]]> |
| </doc> |
| </method> |
| <method name="getJobConf" return="org.apache.hadoop.mapred.JobConf" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the mapred job conf of this job]]> |
| </doc> |
| </method> |
| <method name="setJobConf" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobConf" type="org.apache.hadoop.mapred.JobConf"/> |
| <doc> |
| <![CDATA[Set the mapred job conf for this job. |
| @param jobConf the mapred job conf for this job.]]> |
| </doc> |
| </method> |
| <method name="getState" return="int" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the state of this job]]> |
| </doc> |
| </method> |
| <method name="setState" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="state" type="int"/> |
| <doc> |
| <![CDATA[This is a no-op function, Its a behavior change from 1.x We no more can |
| change the state from job |
| |
| @param state |
| the new state for this job.]]> |
| </doc> |
| </method> |
| <method name="addDependingJob" return="boolean" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="dependingJob" type="org.apache.hadoop.mapred.jobcontrol.Job"/> |
| <doc> |
| <![CDATA[Add a job to this jobs' dependency list. |
| Dependent jobs can only be added while a Job |
| is waiting to run, not during or afterwards. |
| |
| @param dependingJob Job that this Job depends on. |
| @return <tt>true</tt> if the Job was added.]]> |
| </doc> |
| </method> |
| <method name="getJobClient" return="org.apache.hadoop.mapred.JobClient" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the job client of this job]]> |
| </doc> |
| </method> |
| <method name="getDependingJobs" return="java.util.ArrayList" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the depending jobs of this job]]> |
| </doc> |
| </method> |
| <method name="getMapredJobID" return="java.lang.String" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the mapred ID of this job as assigned by the mapred framework.]]> |
| </doc> |
| </method> |
| <method name="setMapredJobID" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="mapredJobID" type="java.lang.String"/> |
| <doc> |
| <![CDATA[This is no-op method for backward compatibility. It's a behavior change |
| from 1.x, we can not change job ids from job. |
| |
| @param mapredJobID |
| the mapred job ID for this job.]]> |
| </doc> |
| </method> |
| <field name="SUCCESS" type="int" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="WAITING" type="int" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="RUNNING" type="int" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="READY" type="int" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="FAILED" type="int" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="DEPENDENT_FAILED" type="int" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.jobcontrol.Job --> |
| <!-- start class org.apache.hadoop.mapred.jobcontrol.JobControl --> |
| <class name="JobControl" extends="org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="JobControl" type="java.lang.String" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Construct a job control for a group of jobs. |
| @param groupName a name identifying this group]]> |
| </doc> |
| </constructor> |
| <method name="getWaitingJobs" return="java.util.ArrayList" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the jobs in the waiting state]]> |
| </doc> |
| </method> |
| <method name="getRunningJobs" return="java.util.ArrayList" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the jobs in the running state]]> |
| </doc> |
| </method> |
| <method name="getReadyJobs" return="java.util.ArrayList" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the jobs in the ready state]]> |
| </doc> |
| </method> |
| <method name="getSuccessfulJobs" return="java.util.ArrayList" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the jobs in the success state]]> |
| </doc> |
| </method> |
| <method name="getFailedJobs" return="java.util.ArrayList" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="addJobs" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobs" type="java.util.Collection"/> |
| <doc> |
| <![CDATA[Add a collection of jobs |
| |
| @param jobs]]> |
| </doc> |
| </method> |
| <method name="getState" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the thread state]]> |
| </doc> |
| </method> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.jobcontrol.JobControl --> |
| </package> |
| <package name="org.apache.hadoop.mapred.join"> |
| <!-- start class org.apache.hadoop.mapred.join.ArrayListBackedIterator --> |
| <class name="ArrayListBackedIterator" extends="org.apache.hadoop.mapreduce.lib.join.ArrayListBackedIterator" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.join.ResetableIterator"/> |
| <constructor name="ArrayListBackedIterator" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <constructor name="ArrayListBackedIterator" type="java.util.ArrayList" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <doc> |
| <![CDATA[This class provides an implementation of ResetableIterator. The |
| implementation uses an {@link java.util.ArrayList} to store elements |
| added to it, replaying them as requested. |
| Prefer {@link StreamBackedIterator}.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.join.ArrayListBackedIterator --> |
| <!-- start interface org.apache.hadoop.mapred.join.ComposableInputFormat --> |
| <interface name="ComposableInputFormat" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.InputFormat"/> |
| <method name="getRecordReader" return="org.apache.hadoop.mapred.join.ComposableRecordReader" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="split" type="org.apache.hadoop.mapred.InputSplit"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[Refinement of InputFormat requiring implementors to provide |
| ComposableRecordReader instead of RecordReader.]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapred.join.ComposableInputFormat --> |
| <!-- start interface org.apache.hadoop.mapred.join.ComposableRecordReader --> |
| <interface name="ComposableRecordReader" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.RecordReader"/> |
| <implements name="java.lang.Comparable"/> |
| <method name="id" return="int" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Return the position in the collector this class occupies.]]> |
| </doc> |
| </method> |
| <method name="key" return="K" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Return the key this RecordReader would supply on a call to next(K,V)]]> |
| </doc> |
| </method> |
| <method name="key" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="K"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Clone the key at the head of this RecordReader into the object provided.]]> |
| </doc> |
| </method> |
| <method name="hasNext" return="boolean" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns true if the stream is not empty, but provides no guarantee that |
| a call to next(K,V) will succeed.]]> |
| </doc> |
| </method> |
| <method name="skip" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="K"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Skip key-value pairs with keys less than or equal to the key provided.]]> |
| </doc> |
| </method> |
| <method name="accept" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jc" type="org.apache.hadoop.mapred.join.CompositeRecordReader.JoinCollector"/> |
| <param name="key" type="K"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[While key-value pairs from this RecordReader match the given key, register |
| them with the JoinCollector provided.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Additional operations required of a RecordReader to participate in a join.]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapred.join.ComposableRecordReader --> |
| <!-- start class org.apache.hadoop.mapred.join.CompositeInputFormat --> |
| <class name="CompositeInputFormat" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.join.ComposableInputFormat"/> |
| <constructor name="CompositeInputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="setFormat" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Interpret a given string as a composite expression. |
| {@code |
| func ::= <ident>([<func>,]*<func>) |
| func ::= tbl(<class>,"<path>") |
| class ::= @see java.lang.Class#forName(java.lang.String) |
| path ::= @see org.apache.hadoop.fs.Path#Path(java.lang.String) |
| } |
| Reads expression from the <tt>mapred.join.expr</tt> property and |
| user-supplied join types from <tt>mapred.join.define.<ident></tt> |
| types. Paths supplied to <tt>tbl</tt> are given as input paths to the |
| InputFormat class listed. |
| @see #compose(java.lang.String, java.lang.Class, java.lang.String...)]]> |
| </doc> |
| </method> |
| <method name="addDefaults" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Adds the default set of identifiers to the parser.]]> |
| </doc> |
| </method> |
| <method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="numSplits" type="int"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Build a CompositeInputSplit from the child InputFormats by assigning the |
| ith split from each child to the ith composite split.]]> |
| </doc> |
| </method> |
| <method name="getRecordReader" return="org.apache.hadoop.mapred.join.ComposableRecordReader" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="split" type="org.apache.hadoop.mapred.InputSplit"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Construct a CompositeRecordReader for the children of this InputFormat |
| as defined in the init expression. |
| The outermost join need only be composable, not necessarily a composite. |
| Mandating TupleWritable isn't strictly correct.]]> |
| </doc> |
| </method> |
| <method name="compose" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="inf" type="java.lang.Class"/> |
| <param name="path" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Convenience method for constructing composite formats. |
| Given InputFormat class (inf), path (p) return: |
| {@code tbl(<inf>, <p>) }]]> |
| </doc> |
| </method> |
| <method name="compose" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="op" type="java.lang.String"/> |
| <param name="inf" type="java.lang.Class"/> |
| <param name="path" type="java.lang.String[]"/> |
| <doc> |
| <![CDATA[Convenience method for constructing composite formats. |
| Given operation (op), Object class (inf), set of paths (p) return: |
| {@code <op>(tbl(<inf>,<p1>),tbl(<inf>,<p2>),...,tbl(<inf>,<pn>)) }]]> |
| </doc> |
| </method> |
| <method name="compose" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="op" type="java.lang.String"/> |
| <param name="inf" type="java.lang.Class"/> |
| <param name="path" type="org.apache.hadoop.fs.Path[]"/> |
| <doc> |
| <![CDATA[Convenience method for constructing composite formats. |
| Given operation (op), Object class (inf), set of paths (p) return: |
| {@code <op>(tbl(<inf>,<p1>),tbl(<inf>,<p2>),...,tbl(<inf>,<pn>)) }]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[An InputFormat capable of performing joins over a set of data sources sorted |
| and partitioned the same way. |
| |
| A user may define new join types by setting the property |
| <tt>mapred.join.define.<ident></tt> to a classname. In the expression |
| <tt>mapred.join.expr</tt>, the identifier will be assumed to be a |
| ComposableRecordReader. |
| <tt>mapred.join.keycomparator</tt> can be a classname used to compare keys |
| in the join. |
| @see #setFormat |
| @see JoinRecordReader |
| @see MultiFilterRecordReader]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.join.CompositeInputFormat --> |
| <!-- start class org.apache.hadoop.mapred.join.CompositeInputSplit --> |
| <class name="CompositeInputSplit" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.InputSplit"/> |
| <constructor name="CompositeInputSplit" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <constructor name="CompositeInputSplit" type="int" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="add" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="s" type="org.apache.hadoop.mapred.InputSplit"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Add an InputSplit to this collection. |
| @throws IOException If capacity was not specified during construction |
| or if capacity has been reached.]]> |
| </doc> |
| </method> |
| <method name="get" return="org.apache.hadoop.mapred.InputSplit" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="i" type="int"/> |
| <doc> |
| <![CDATA[Get ith child InputSplit.]]> |
| </doc> |
| </method> |
| <method name="getLength" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Return the aggregate length of all child InputSplits currently added.]]> |
| </doc> |
| </method> |
| <method name="getLength" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="i" type="int"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get the length of ith child InputSplit.]]> |
| </doc> |
| </method> |
| <method name="getLocations" return="java.lang.String[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Collect a set of hosts from all child InputSplits.]]> |
| </doc> |
| </method> |
| <method name="getLocation" return="java.lang.String[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="i" type="int"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[getLocations from ith InputSplit.]]> |
| </doc> |
| </method> |
| <method name="write" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="out" type="java.io.DataOutput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Write splits in the following format. |
| {@code |
| <count><class1><class2>...<classn><split1><split2>...<splitn> |
| }]]> |
| </doc> |
| </method> |
| <method name="readFields" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="in" type="java.io.DataInput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[{@inheritDoc} |
| @throws IOException If the child InputSplit cannot be read, typically |
| for faliing access checks.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[This InputSplit contains a set of child InputSplits. Any InputSplit inserted |
| into this collection must have a public default constructor.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.join.CompositeInputSplit --> |
| <!-- start class org.apache.hadoop.mapred.join.CompositeRecordReader --> |
| <class name="CompositeRecordReader" extends="java.lang.Object" |
| abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.conf.Configurable"/> |
| <constructor name="CompositeRecordReader" type="int, int, java.lang.Class" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Create a RecordReader with <tt>capacity</tt> children to position |
| <tt>id</tt> in the parent reader. |
| The id of a root CompositeRecordReader is -1 by convention, but relying |
| on this is not recommended.]]> |
| </doc> |
| </constructor> |
| <method name="combine" return="boolean" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="srcs" type="java.lang.Object[]"/> |
| <param name="value" type="org.apache.hadoop.mapred.join.TupleWritable"/> |
| </method> |
| <method name="id" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Return the position in the collector this class occupies.]]> |
| </doc> |
| </method> |
| <method name="setConf" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <doc> |
| <![CDATA[{@inheritDoc}]]> |
| </doc> |
| </method> |
| <method name="getConf" return="org.apache.hadoop.conf.Configuration" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[{@inheritDoc}]]> |
| </doc> |
| </method> |
| <method name="getRecordReaderQueue" return="java.util.PriorityQueue" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Return sorted list of RecordReaders for this composite.]]> |
| </doc> |
| </method> |
| <method name="getComparator" return="org.apache.hadoop.io.WritableComparator" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Return comparator defining the ordering for RecordReaders in this |
| composite.]]> |
| </doc> |
| </method> |
| <method name="add" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="rr" type="org.apache.hadoop.mapred.join.ComposableRecordReader"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Add a RecordReader to this collection. |
| The id() of a RecordReader determines where in the Tuple its |
| entry will appear. Adding RecordReaders with the same id has |
| undefined behavior.]]> |
| </doc> |
| </method> |
| <method name="key" return="K" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Return the key for the current join or the value at the top of the |
| RecordReader heap.]]> |
| </doc> |
| </method> |
| <method name="key" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="K"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Clone the key at the top of this RR into the given object.]]> |
| </doc> |
| </method> |
| <method name="hasNext" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Return true if it is possible that this could emit more values.]]> |
| </doc> |
| </method> |
| <method name="skip" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="K"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Pass skip key to child RRs.]]> |
| </doc> |
| </method> |
| <method name="getDelegate" return="org.apache.hadoop.mapred.join.ResetableIterator" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Obtain an iterator over the child RRs apropos of the value type |
| ultimately emitted from this join.]]> |
| </doc> |
| </method> |
| <method name="accept" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jc" type="org.apache.hadoop.mapred.join.CompositeRecordReader.JoinCollector"/> |
| <param name="key" type="K"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[If key provided matches that of this Composite, give JoinCollector |
| iterator over values it may emit.]]> |
| </doc> |
| </method> |
| <method name="fillJoinCollector" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="iterkey" type="K"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[For all child RRs offering the key provided, obtain an iterator |
| at that position in the JoinCollector.]]> |
| </doc> |
| </method> |
| <method name="compareTo" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="other" type="org.apache.hadoop.mapred.join.ComposableRecordReader"/> |
| <doc> |
| <![CDATA[Implement Comparable contract (compare key of join or head of heap |
| with that of another).]]> |
| </doc> |
| </method> |
| <method name="createKey" return="K" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Create a new key value common to all child RRs. |
| @throws ClassCastException if key classes differ.]]> |
| </doc> |
| </method> |
| <method name="createInternalValue" return="org.apache.hadoop.mapred.join.TupleWritable" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Create a value to be used internally for joins.]]> |
| </doc> |
| </method> |
| <method name="getPos" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Unsupported (returns zero in all cases).]]> |
| </doc> |
| </method> |
| <method name="close" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Close all child RRs.]]> |
| </doc> |
| </method> |
| <method name="getProgress" return="float" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Report progress as the minimum of all child RR progress.]]> |
| </doc> |
| </method> |
| <field name="jc" type="org.apache.hadoop.mapred.join.CompositeRecordReader.JoinCollector" |
| transient="false" volatile="false" |
| static="false" final="true" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <field name="kids" type="org.apache.hadoop.mapred.join.ComposableRecordReader[]" |
| transient="false" volatile="false" |
| static="false" final="true" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[A RecordReader that can effect joins of RecordReaders sharing a common key |
| type and partitioning.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.join.CompositeRecordReader --> |
| <!-- start class org.apache.hadoop.mapred.join.InnerJoinRecordReader --> |
| <class name="InnerJoinRecordReader" extends="org.apache.hadoop.mapred.join.JoinRecordReader" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <method name="combine" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="srcs" type="java.lang.Object[]"/> |
| <param name="dst" type="org.apache.hadoop.mapred.join.TupleWritable"/> |
| <doc> |
| <![CDATA[Return true iff the tuple is full (all data sources contain this key).]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Full inner join.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.join.InnerJoinRecordReader --> |
| <!-- start class org.apache.hadoop.mapred.join.JoinRecordReader --> |
| <class name="JoinRecordReader" extends="org.apache.hadoop.mapred.join.CompositeRecordReader" |
| abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.join.ComposableRecordReader"/> |
| <constructor name="JoinRecordReader" type="int, org.apache.hadoop.mapred.JobConf, int, java.lang.Class" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </constructor> |
| <method name="next" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="K"/> |
| <param name="value" type="org.apache.hadoop.mapred.join.TupleWritable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Emit the next set of key, value pairs as defined by the child |
| RecordReaders and operation associated with this composite RR.]]> |
| </doc> |
| </method> |
| <method name="createValue" return="org.apache.hadoop.mapred.join.TupleWritable" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[{@inheritDoc}]]> |
| </doc> |
| </method> |
| <method name="getDelegate" return="org.apache.hadoop.mapred.join.ResetableIterator" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Return an iterator wrapping the JoinCollector.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Base class for Composite joins returning Tuples of arbitrary Writables.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.join.JoinRecordReader --> |
| <!-- start class org.apache.hadoop.mapred.join.MultiFilterRecordReader --> |
| <class name="MultiFilterRecordReader" extends="org.apache.hadoop.mapred.join.CompositeRecordReader" |
| abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.join.ComposableRecordReader"/> |
| <constructor name="MultiFilterRecordReader" type="int, org.apache.hadoop.mapred.JobConf, int, java.lang.Class" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </constructor> |
| <method name="emit" return="V" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="dst" type="org.apache.hadoop.mapred.join.TupleWritable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[For each tuple emitted, return a value (typically one of the values |
| in the tuple). |
| Modifying the Writables in the tuple is permitted and unlikely to affect |
| join behavior in most cases, but it is not recommended. It's safer to |
| clone first.]]> |
| </doc> |
| </method> |
| <method name="combine" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="srcs" type="java.lang.Object[]"/> |
| <param name="dst" type="org.apache.hadoop.mapred.join.TupleWritable"/> |
| <doc> |
| <![CDATA[Default implementation offers {@link #emit} every Tuple from the |
| collector (the outer join of child RRs).]]> |
| </doc> |
| </method> |
| <method name="next" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="K"/> |
| <param name="value" type="V"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[{@inheritDoc}]]> |
| </doc> |
| </method> |
| <method name="createValue" return="V" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[{@inheritDoc}]]> |
| </doc> |
| </method> |
| <method name="getDelegate" return="org.apache.hadoop.mapred.join.ResetableIterator" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Return an iterator returning a single value from the tuple. |
| @see MultiFilterDelegationIterator]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Base class for Composite join returning values derived from multiple |
| sources, but generally not tuples.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.join.MultiFilterRecordReader --> |
| <!-- start class org.apache.hadoop.mapred.join.OuterJoinRecordReader --> |
| <class name="OuterJoinRecordReader" extends="org.apache.hadoop.mapred.join.JoinRecordReader" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <method name="combine" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="srcs" type="java.lang.Object[]"/> |
| <param name="dst" type="org.apache.hadoop.mapred.join.TupleWritable"/> |
| <doc> |
| <![CDATA[Emit everything from the collector.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Full outer join.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.join.OuterJoinRecordReader --> |
| <!-- start class org.apache.hadoop.mapred.join.OverrideRecordReader --> |
| <class name="OverrideRecordReader" extends="org.apache.hadoop.mapred.join.MultiFilterRecordReader" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <method name="emit" return="V" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="dst" type="org.apache.hadoop.mapred.join.TupleWritable"/> |
| <doc> |
| <![CDATA[Emit the value with the highest position in the tuple.]]> |
| </doc> |
| </method> |
| <method name="fillJoinCollector" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="iterkey" type="K"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Instead of filling the JoinCollector with iterators from all |
| data sources, fill only the rightmost for this key. |
| This not only saves space by discarding the other sources, but |
| it also emits the number of key-value pairs in the preferred |
| RecordReader instead of repeating that stream n times, where |
| n is the cardinality of the cross product of the discarded |
| streams for the given key.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Prefer the "rightmost" data source for this key. |
| For example, <tt>override(S1,S2,S3)</tt> will prefer values |
| from S3 over S2, and values from S2 over S1 for all keys |
| emitted from all sources.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.join.OverrideRecordReader --> |
| <!-- start class org.apache.hadoop.mapred.join.Parser --> |
| <class name="Parser" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="Parser" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <doc> |
| <![CDATA[Very simple shift-reduce parser for join expressions. |
| |
| This should be sufficient for the user extension permitted now, but ought to |
| be replaced with a parser generator if more complex grammars are supported. |
| In particular, this "shift-reduce" parser has no states. Each set |
| of formals requires a different internal node type, which is responsible for |
| interpreting the list of tokens it receives. This is sufficient for the |
| current grammar, but it has several annoying properties that might inhibit |
| extension. In particular, parenthesis are always function calls; an |
| algebraic or filter grammar would not only require a node type, but must |
| also work around the internals of this parser. |
| |
| For most other cases, adding classes to the hierarchy- particularly by |
| extending JoinRecordReader and MultiFilterRecordReader- is fairly |
| straightforward. One need only override the relevant method(s) (usually only |
| {@link CompositeRecordReader#combine}) and include a property to map its |
| value to an identifier in the parser.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.join.Parser --> |
| <!-- start class org.apache.hadoop.mapred.join.Parser.Node --> |
| <class name="Parser.Node" extends="java.lang.Object" |
| abstract="true" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.join.ComposableInputFormat"/> |
| <constructor name="Node" type="java.lang.String" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="addIdentifier" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="ident" type="java.lang.String"/> |
| <param name="mcstrSig" type="java.lang.Class[]"/> |
| <param name="nodetype" type="java.lang.Class"/> |
| <param name="cl" type="java.lang.Class"/> |
| <exception name="NoSuchMethodException" type="java.lang.NoSuchMethodException"/> |
| <doc> |
| <![CDATA[For a given identifier, add a mapping to the nodetype for the parse |
| tree and to the ComposableRecordReader to be created, including the |
| formals required to invoke the constructor. |
| The nodetype and constructor signature should be filled in from the |
| child node.]]> |
| </doc> |
| </method> |
| <method name="setID" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="id" type="int"/> |
| </method> |
| <method name="setKeyComparator" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="cmpcl" type="java.lang.Class"/> |
| </method> |
| <field name="rrCstrMap" type="java.util.Map" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <field name="id" type="int" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <field name="ident" type="java.lang.String" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <field name="cmpcl" type="java.lang.Class" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.join.Parser.Node --> |
| <!-- start class org.apache.hadoop.mapred.join.Parser.NodeToken --> |
| <class name="Parser.NodeToken" extends="org.apache.hadoop.mapred.join.Parser.Token" |
| abstract="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <method name="getNode" return="org.apache.hadoop.mapred.join.Parser.Node" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.join.Parser.NodeToken --> |
| <!-- start class org.apache.hadoop.mapred.join.Parser.NumToken --> |
| <class name="Parser.NumToken" extends="org.apache.hadoop.mapred.join.Parser.Token" |
| abstract="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="NumToken" type="double" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getNum" return="double" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.join.Parser.NumToken --> |
| <!-- start class org.apache.hadoop.mapred.join.Parser.StrToken --> |
| <class name="Parser.StrToken" extends="org.apache.hadoop.mapred.join.Parser.Token" |
| abstract="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="StrToken" type="org.apache.hadoop.mapred.join.Parser.TType, java.lang.String" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getStr" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.join.Parser.StrToken --> |
| <!-- start class org.apache.hadoop.mapred.join.Parser.Token --> |
| <class name="Parser.Token" extends="java.lang.Object" |
| abstract="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <method name="getType" return="org.apache.hadoop.mapred.join.Parser.TType" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getNode" return="org.apache.hadoop.mapred.join.Parser.Node" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getNum" return="double" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getStr" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[Tagged-union type for tokens from the join expression. |
| @see Parser.TType]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.join.Parser.Token --> |
| <!-- start class org.apache.hadoop.mapred.join.Parser.TType --> |
| <class name="Parser.TType" extends="java.lang.Enum" |
| abstract="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <method name="values" return="org.apache.hadoop.mapred.join.Parser.TType[]" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="valueOf" return="org.apache.hadoop.mapred.join.Parser.TType" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="name" type="java.lang.String"/> |
| </method> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.join.Parser.TType --> |
| <!-- start interface org.apache.hadoop.mapred.join.ResetableIterator --> |
| <interface name="ResetableIterator" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapreduce.lib.join.ResetableIterator"/> |
| <doc> |
| <![CDATA[This defines an interface to a stateful Iterator that can replay elements |
| added to it directly. |
| Note that this does not extend {@link java.util.Iterator}.]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapred.join.ResetableIterator --> |
| <!-- start class org.apache.hadoop.mapred.join.StreamBackedIterator --> |
| <class name="StreamBackedIterator" extends="org.apache.hadoop.mapreduce.lib.join.StreamBackedIterator" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.join.ResetableIterator"/> |
| <constructor name="StreamBackedIterator" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <doc> |
| <![CDATA[This class provides an implementation of ResetableIterator. This |
| implementation uses a byte array to store elements added to it.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.join.StreamBackedIterator --> |
| <!-- start class org.apache.hadoop.mapred.join.TupleWritable --> |
| <class name="TupleWritable" extends="org.apache.hadoop.mapreduce.lib.join.TupleWritable" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="TupleWritable" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Create an empty tuple with no allocated storage for writables.]]> |
| </doc> |
| </constructor> |
| <constructor name="TupleWritable" type="org.apache.hadoop.io.Writable[]" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Initialize tuple with storage; unknown whether any of them contain |
| "written" values.]]> |
| </doc> |
| </constructor> |
| <doc> |
| <![CDATA[Writable type storing multiple {@link org.apache.hadoop.io.Writable}s. |
| |
| This is *not* a general-purpose tuple type. In almost all cases, users are |
| encouraged to implement their own serializable types, which can perform |
| better validation and provide more efficient encodings than this class is |
| capable. TupleWritable relies on the join framework for type safety and |
| assumes its instances will rarely be persisted, assumptions not only |
| incompatible with, but contrary to the general case. |
| |
| @see org.apache.hadoop.io.Writable]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.join.TupleWritable --> |
| <!-- start class org.apache.hadoop.mapred.join.WrappedRecordReader --> |
| <class name="WrappedRecordReader" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.join.ComposableRecordReader"/> |
| <implements name="org.apache.hadoop.conf.Configurable"/> |
| <method name="id" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[{@inheritDoc}]]> |
| </doc> |
| </method> |
| <method name="key" return="K" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Return the key at the head of this RR.]]> |
| </doc> |
| </method> |
| <method name="key" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="qkey" type="K"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Clone the key at the head of this RR into the object supplied.]]> |
| </doc> |
| </method> |
| <method name="hasNext" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Return true if the RR- including the k,v pair stored in this object- |
| is exhausted.]]> |
| </doc> |
| </method> |
| <method name="skip" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="K"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Skip key-value pairs with keys less than or equal to the key provided.]]> |
| </doc> |
| </method> |
| <method name="next" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Read the next k,v pair into the head of this object; return true iff |
| the RR and this are exhausted.]]> |
| </doc> |
| </method> |
| <method name="accept" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="i" type="org.apache.hadoop.mapred.join.CompositeRecordReader.JoinCollector"/> |
| <param name="key" type="K"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Add an iterator to the collector at the position occupied by this |
| RecordReader over the values in this stream paired with the key |
| provided (ie register a stream of values from this source matching K |
| with a collector).]]> |
| </doc> |
| </method> |
| <method name="next" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="K"/> |
| <param name="value" type="U"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Write key-value pair at the head of this stream to the objects provided; |
| get next key-value pair from proxied RR.]]> |
| </doc> |
| </method> |
| <method name="createKey" return="K" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Request new key from proxied RR.]]> |
| </doc> |
| </method> |
| <method name="createValue" return="U" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Request new value from proxied RR.]]> |
| </doc> |
| </method> |
| <method name="getProgress" return="float" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Request progress from proxied RR.]]> |
| </doc> |
| </method> |
| <method name="getPos" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Request position from proxied RR.]]> |
| </doc> |
| </method> |
| <method name="close" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Forward close request to proxied RR.]]> |
| </doc> |
| </method> |
| <method name="compareTo" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="other" type="org.apache.hadoop.mapred.join.ComposableRecordReader"/> |
| <doc> |
| <![CDATA[Implement Comparable contract (compare key at head of proxied RR |
| with that of another).]]> |
| </doc> |
| </method> |
| <method name="equals" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="other" type="java.lang.Object"/> |
| <doc> |
| <![CDATA[Return true iff compareTo(other) retn true.]]> |
| </doc> |
| </method> |
| <method name="hashCode" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="setConf" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| </method> |
| <method name="getConf" return="org.apache.hadoop.conf.Configuration" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <doc> |
| <![CDATA[Proxy class for a RecordReader participating in the join framework. |
| This class keeps track of the "head" key-value pair for the |
| provided RecordReader and keeps a store of values matching a key when |
| this source is participating in a join.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.join.WrappedRecordReader --> |
| </package> |
| <package name="org.apache.hadoop.mapred.lib"> |
| <!-- start class org.apache.hadoop.mapred.lib.BinaryPartitioner --> |
| <class name="BinaryPartitioner" extends="org.apache.hadoop.mapreduce.lib.partition.BinaryPartitioner" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.Partitioner"/> |
| <constructor name="BinaryPartitioner" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="configure" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| </method> |
| <doc> |
| <![CDATA[Partition {@link BinaryComparable} keys using a configurable part of |
| the bytes array returned by {@link BinaryComparable#getBytes()}. |
| |
| @see org.apache.hadoop.mapreduce.lib.partition.BinaryPartitioner]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.BinaryPartitioner --> |
| <!-- start class org.apache.hadoop.mapred.lib.ChainMapper --> |
| <class name="ChainMapper" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.Mapper"/> |
| <constructor name="ChainMapper" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Constructor.]]> |
| </doc> |
| </constructor> |
| <method name="addMapper" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="klass" type="java.lang.Class"/> |
| <param name="inputKeyClass" type="java.lang.Class"/> |
| <param name="inputValueClass" type="java.lang.Class"/> |
| <param name="outputKeyClass" type="java.lang.Class"/> |
| <param name="outputValueClass" type="java.lang.Class"/> |
| <param name="byValue" type="boolean"/> |
| <param name="mapperConf" type="org.apache.hadoop.mapred.JobConf"/> |
| <doc> |
| <![CDATA[Adds a Mapper class to the chain job's JobConf. |
| <p> |
| It has to be specified how key and values are passed from one element of |
| the chain to the next, by value or by reference. If a Mapper leverages the |
| assumed semantics that the key and values are not modified by the collector |
| 'by value' must be used. If the Mapper does not expect this semantics, as |
| an optimization to avoid serialization and deserialization 'by reference' |
| can be used. |
| <p> |
| For the added Mapper the configuration given for it, |
| <code>mapperConf</code>, have precedence over the job's JobConf. This |
| precedence is in effect when the task is running. |
| <p> |
| IMPORTANT: There is no need to specify the output key/value classes for the |
| ChainMapper, this is done by the addMapper for the last mapper in the chain |
| <p> |
| |
| @param job job's JobConf to add the Mapper class. |
| @param klass the Mapper class to add. |
| @param inputKeyClass mapper input key class. |
| @param inputValueClass mapper input value class. |
| @param outputKeyClass mapper output key class. |
| @param outputValueClass mapper output value class. |
| @param byValue indicates if key/values should be passed by value |
| to the next Mapper in the chain, if any. |
| @param mapperConf a JobConf with the configuration for the Mapper |
| class. It is recommended to use a JobConf without default values using the |
| <code>JobConf(boolean loadDefaults)</code> constructor with FALSE.]]> |
| </doc> |
| </method> |
| <method name="configure" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <doc> |
| <![CDATA[Configures the ChainMapper and all the Mappers in the chain. |
| <p> |
| If this method is overriden <code>super.configure(...)</code> should be |
| invoked at the beginning of the overwriter method.]]> |
| </doc> |
| </method> |
| <method name="map" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="java.lang.Object"/> |
| <param name="value" type="java.lang.Object"/> |
| <param name="output" type="org.apache.hadoop.mapred.OutputCollector"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Chains the <code>map(...)</code> methods of the Mappers in the chain.]]> |
| </doc> |
| </method> |
| <method name="close" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Closes the ChainMapper and all the Mappers in the chain. |
| <p> |
| If this method is overriden <code>super.close()</code> should be |
| invoked at the end of the overwriter method.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[The ChainMapper class allows to use multiple Mapper classes within a single |
| Map task. |
| <p> |
| The Mapper classes are invoked in a chained (or piped) fashion, the output of |
| the first becomes the input of the second, and so on until the last Mapper, |
| the output of the last Mapper will be written to the task's output. |
| <p> |
| The key functionality of this feature is that the Mappers in the chain do not |
| need to be aware that they are executed in a chain. This enables having |
| reusable specialized Mappers that can be combined to perform composite |
| operations within a single task. |
| <p> |
| Special care has to be taken when creating chains that the key/values output |
| by a Mapper are valid for the following Mapper in the chain. It is assumed |
| all Mappers and the Reduce in the chain use maching output and input key and |
| value classes as no conversion is done by the chaining code. |
| <p> |
| Using the ChainMapper and the ChainReducer classes is possible to compose |
| Map/Reduce jobs that look like <code>[MAP+ / REDUCE MAP*]</code>. And |
| immediate benefit of this pattern is a dramatic reduction in disk IO. |
| <p> |
| IMPORTANT: There is no need to specify the output key/value classes for the |
| ChainMapper, this is done by the addMapper for the last mapper in the chain. |
| <p> |
| ChainMapper usage pattern: |
| <p> |
| <pre> |
| ... |
| conf.setJobName("chain"); |
| conf.setInputFormat(TextInputFormat.class); |
| conf.setOutputFormat(TextOutputFormat.class); |
| |
| JobConf mapAConf = new JobConf(false); |
| ... |
| ChainMapper.addMapper(conf, AMap.class, LongWritable.class, Text.class, |
| Text.class, Text.class, true, mapAConf); |
| |
| JobConf mapBConf = new JobConf(false); |
| ... |
| ChainMapper.addMapper(conf, BMap.class, Text.class, Text.class, |
| LongWritable.class, Text.class, false, mapBConf); |
| |
| JobConf reduceConf = new JobConf(false); |
| ... |
| ChainReducer.setReducer(conf, XReduce.class, LongWritable.class, Text.class, |
| Text.class, Text.class, true, reduceConf); |
| |
| ChainReducer.addMapper(conf, CMap.class, Text.class, Text.class, |
| LongWritable.class, Text.class, false, null); |
| |
| ChainReducer.addMapper(conf, DMap.class, LongWritable.class, Text.class, |
| LongWritable.class, LongWritable.class, true, null); |
| |
| FileInputFormat.setInputPaths(conf, inDir); |
| FileOutputFormat.setOutputPath(conf, outDir); |
| ... |
| |
| JobClient jc = new JobClient(conf); |
| RunningJob job = jc.submitJob(conf); |
| ... |
| </pre>]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.ChainMapper --> |
| <!-- start class org.apache.hadoop.mapred.lib.ChainReducer --> |
| <class name="ChainReducer" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.Reducer"/> |
| <constructor name="ChainReducer" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Constructor.]]> |
| </doc> |
| </constructor> |
| <method name="setReducer" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="klass" type="java.lang.Class"/> |
| <param name="inputKeyClass" type="java.lang.Class"/> |
| <param name="inputValueClass" type="java.lang.Class"/> |
| <param name="outputKeyClass" type="java.lang.Class"/> |
| <param name="outputValueClass" type="java.lang.Class"/> |
| <param name="byValue" type="boolean"/> |
| <param name="reducerConf" type="org.apache.hadoop.mapred.JobConf"/> |
| <doc> |
| <![CDATA[Sets the Reducer class to the chain job's JobConf. |
| <p> |
| It has to be specified how key and values are passed from one element of |
| the chain to the next, by value or by reference. If a Reducer leverages the |
| assumed semantics that the key and values are not modified by the collector |
| 'by value' must be used. If the Reducer does not expect this semantics, as |
| an optimization to avoid serialization and deserialization 'by reference' |
| can be used. |
| <p> |
| For the added Reducer the configuration given for it, |
| <code>reducerConf</code>, have precedence over the job's JobConf. This |
| precedence is in effect when the task is running. |
| <p> |
| IMPORTANT: There is no need to specify the output key/value classes for the |
| ChainReducer, this is done by the setReducer or the addMapper for the last |
| element in the chain. |
| |
| @param job job's JobConf to add the Reducer class. |
| @param klass the Reducer class to add. |
| @param inputKeyClass reducer input key class. |
| @param inputValueClass reducer input value class. |
| @param outputKeyClass reducer output key class. |
| @param outputValueClass reducer output value class. |
| @param byValue indicates if key/values should be passed by value |
| to the next Mapper in the chain, if any. |
| @param reducerConf a JobConf with the configuration for the Reducer |
| class. It is recommended to use a JobConf without default values using the |
| <code>JobConf(boolean loadDefaults)</code> constructor with FALSE.]]> |
| </doc> |
| </method> |
| <method name="addMapper" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="klass" type="java.lang.Class"/> |
| <param name="inputKeyClass" type="java.lang.Class"/> |
| <param name="inputValueClass" type="java.lang.Class"/> |
| <param name="outputKeyClass" type="java.lang.Class"/> |
| <param name="outputValueClass" type="java.lang.Class"/> |
| <param name="byValue" type="boolean"/> |
| <param name="mapperConf" type="org.apache.hadoop.mapred.JobConf"/> |
| <doc> |
| <![CDATA[Adds a Mapper class to the chain job's JobConf. |
| <p> |
| It has to be specified how key and values are passed from one element of |
| the chain to the next, by value or by reference. If a Mapper leverages the |
| assumed semantics that the key and values are not modified by the collector |
| 'by value' must be used. If the Mapper does not expect this semantics, as |
| an optimization to avoid serialization and deserialization 'by reference' |
| can be used. |
| <p> |
| For the added Mapper the configuration given for it, |
| <code>mapperConf</code>, have precedence over the job's JobConf. This |
| precedence is in effect when the task is running. |
| <p> |
| IMPORTANT: There is no need to specify the output key/value classes for the |
| ChainMapper, this is done by the addMapper for the last mapper in the chain |
| . |
| |
| @param job chain job's JobConf to add the Mapper class. |
| @param klass the Mapper class to add. |
| @param inputKeyClass mapper input key class. |
| @param inputValueClass mapper input value class. |
| @param outputKeyClass mapper output key class. |
| @param outputValueClass mapper output value class. |
| @param byValue indicates if key/values should be passed by value |
| to the next Mapper in the chain, if any. |
| @param mapperConf a JobConf with the configuration for the Mapper |
| class. It is recommended to use a JobConf without default values using the |
| <code>JobConf(boolean loadDefaults)</code> constructor with FALSE.]]> |
| </doc> |
| </method> |
| <method name="configure" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <doc> |
| <![CDATA[Configures the ChainReducer, the Reducer and all the Mappers in the chain. |
| <p> |
| If this method is overriden <code>super.configure(...)</code> should be |
| invoked at the beginning of the overwriter method.]]> |
| </doc> |
| </method> |
| <method name="reduce" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="java.lang.Object"/> |
| <param name="values" type="java.util.Iterator"/> |
| <param name="output" type="org.apache.hadoop.mapred.OutputCollector"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Chains the <code>reduce(...)</code> method of the Reducer with the |
| <code>map(...) </code> methods of the Mappers in the chain.]]> |
| </doc> |
| </method> |
| <method name="close" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Closes the ChainReducer, the Reducer and all the Mappers in the chain. |
| <p> |
| If this method is overriden <code>super.close()</code> should be |
| invoked at the end of the overwriter method.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[The ChainReducer class allows to chain multiple Mapper classes after a |
| Reducer within the Reducer task. |
| <p> |
| For each record output by the Reducer, the Mapper classes are invoked in a |
| chained (or piped) fashion, the output of the first becomes the input of the |
| second, and so on until the last Mapper, the output of the last Mapper will |
| be written to the task's output. |
| <p> |
| The key functionality of this feature is that the Mappers in the chain do not |
| need to be aware that they are executed after the Reducer or in a chain. |
| This enables having reusable specialized Mappers that can be combined to |
| perform composite operations within a single task. |
| <p> |
| Special care has to be taken when creating chains that the key/values output |
| by a Mapper are valid for the following Mapper in the chain. It is assumed |
| all Mappers and the Reduce in the chain use maching output and input key and |
| value classes as no conversion is done by the chaining code. |
| <p> |
| Using the ChainMapper and the ChainReducer classes is possible to compose |
| Map/Reduce jobs that look like <code>[MAP+ / REDUCE MAP*]</code>. And |
| immediate benefit of this pattern is a dramatic reduction in disk IO. |
| <p> |
| IMPORTANT: There is no need to specify the output key/value classes for the |
| ChainReducer, this is done by the setReducer or the addMapper for the last |
| element in the chain. |
| <p> |
| ChainReducer usage pattern: |
| <p> |
| <pre> |
| ... |
| conf.setJobName("chain"); |
| conf.setInputFormat(TextInputFormat.class); |
| conf.setOutputFormat(TextOutputFormat.class); |
| |
| JobConf mapAConf = new JobConf(false); |
| ... |
| ChainMapper.addMapper(conf, AMap.class, LongWritable.class, Text.class, |
| Text.class, Text.class, true, mapAConf); |
| |
| JobConf mapBConf = new JobConf(false); |
| ... |
| ChainMapper.addMapper(conf, BMap.class, Text.class, Text.class, |
| LongWritable.class, Text.class, false, mapBConf); |
| |
| JobConf reduceConf = new JobConf(false); |
| ... |
| ChainReducer.setReducer(conf, XReduce.class, LongWritable.class, Text.class, |
| Text.class, Text.class, true, reduceConf); |
| |
| ChainReducer.addMapper(conf, CMap.class, Text.class, Text.class, |
| LongWritable.class, Text.class, false, null); |
| |
| ChainReducer.addMapper(conf, DMap.class, LongWritable.class, Text.class, |
| LongWritable.class, LongWritable.class, true, null); |
| |
| FileInputFormat.setInputPaths(conf, inDir); |
| FileOutputFormat.setOutputPath(conf, outDir); |
| ... |
| |
| JobClient jc = new JobClient(conf); |
| RunningJob job = jc.submitJob(conf); |
| ... |
| </pre>]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.ChainReducer --> |
| <!-- start class org.apache.hadoop.mapred.lib.CombineFileInputFormat --> |
| <class name="CombineFileInputFormat" extends="org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat" |
| abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.InputFormat"/> |
| <constructor name="CombineFileInputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[default constructor]]> |
| </doc> |
| </constructor> |
| <method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="numSplits" type="int"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="createPool" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="Use {@link #createPool(List)}."> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="filters" type="java.util.List"/> |
| <doc> |
| <![CDATA[Create a new pool and add the filters to it. |
| A split cannot have files from different pools. |
| @deprecated Use {@link #createPool(List)}.]]> |
| </doc> |
| </method> |
| <method name="createPool" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="Use {@link #createPool(PathFilter...)}."> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="filters" type="org.apache.hadoop.fs.PathFilter[]"/> |
| <doc> |
| <![CDATA[Create a new pool and add the filters to it. |
| A pathname can satisfy any one of the specified filters. |
| A split cannot have files from different pools. |
| @deprecated Use {@link #createPool(PathFilter...)}.]]> |
| </doc> |
| </method> |
| <method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="split" type="org.apache.hadoop.mapred.InputSplit"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[This is not implemented yet.]]> |
| </doc> |
| </method> |
| <method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="listStatus" return="org.apache.hadoop.fs.FileStatus[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[List input directories. |
| Subclasses may override to, e.g., select only files matching a regular |
| expression. |
| |
| @param job the job to list input paths for |
| @return array of FileStatus objects |
| @throws IOException if zero items.]]> |
| </doc> |
| </method> |
| <method name="isSplitable" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="fs" type="org.apache.hadoop.fs.FileSystem"/> |
| <param name="file" type="org.apache.hadoop.fs.Path"/> |
| </method> |
| <doc> |
| <![CDATA[An abstract {@link org.apache.hadoop.mapred.InputFormat} that returns {@link CombineFileSplit}'s |
| in {@link org.apache.hadoop.mapred.InputFormat#getSplits(JobConf, int)} method. |
| Splits are constructed from the files under the input paths. |
| A split cannot have files from different pools. |
| Each split returned may contain blocks from different files. |
| If a maxSplitSize is specified, then blocks on the same node are |
| combined to form a single split. Blocks that are left over are |
| then combined with other blocks in the same rack. |
| If maxSplitSize is not specified, then blocks from the same rack |
| are combined in a single split; no attempt is made to create |
| node-local splits. |
| If the maxSplitSize is equal to the block size, then this class |
| is similar to the default spliting behaviour in Hadoop: each |
| block is a locally processed split. |
| Subclasses implement {@link org.apache.hadoop.mapred.InputFormat#getRecordReader(InputSplit, JobConf, Reporter)} |
| to construct <code>RecordReader</code>'s for <code>CombineFileSplit</code>'s. |
| @see CombineFileSplit]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.CombineFileInputFormat --> |
| <!-- start class org.apache.hadoop.mapred.lib.CombineFileRecordReader --> |
| <class name="CombineFileRecordReader" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.RecordReader"/> |
| <constructor name="CombineFileRecordReader" type="org.apache.hadoop.mapred.JobConf, org.apache.hadoop.mapred.lib.CombineFileSplit, org.apache.hadoop.mapred.Reporter, java.lang.Class" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[A generic RecordReader that can hand out different recordReaders |
| for each chunk in the CombineFileSplit.]]> |
| </doc> |
| </constructor> |
| <method name="next" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="K"/> |
| <param name="value" type="V"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="createKey" return="K" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="createValue" return="V" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getPos" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[return the amount of data processed]]> |
| </doc> |
| </method> |
| <method name="close" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getProgress" return="float" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[return progress based on the amount of data processed so far.]]> |
| </doc> |
| </method> |
| <method name="initNextRecordReader" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get the record reader for the next chunk in this CombineFileSplit.]]> |
| </doc> |
| </method> |
| <field name="split" type="org.apache.hadoop.mapred.lib.CombineFileSplit" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <field name="jc" type="org.apache.hadoop.mapred.JobConf" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <field name="reporter" type="org.apache.hadoop.mapred.Reporter" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <field name="rrConstructor" type="java.lang.reflect.Constructor" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <field name="idx" type="int" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <field name="progress" type="long" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <field name="curReader" type="org.apache.hadoop.mapred.RecordReader" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[A generic RecordReader that can hand out different recordReaders |
| for each chunk in a {@link CombineFileSplit}. |
| A CombineFileSplit can combine data chunks from multiple files. |
| This class allows using different RecordReaders for processing |
| these data chunks from different files. |
| @see CombineFileSplit]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.CombineFileRecordReader --> |
| <!-- start class org.apache.hadoop.mapred.lib.CombineFileRecordReaderWrapper --> |
| <class name="CombineFileRecordReaderWrapper" extends="java.lang.Object" |
| abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.RecordReader"/> |
| <constructor name="CombineFileRecordReaderWrapper" type="org.apache.hadoop.mapred.FileInputFormat, org.apache.hadoop.mapred.lib.CombineFileSplit, org.apache.hadoop.conf.Configuration, org.apache.hadoop.mapred.Reporter, java.lang.Integer" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </constructor> |
| <method name="next" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="K"/> |
| <param name="value" type="V"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="createKey" return="K" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="createValue" return="V" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getPos" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="close" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getProgress" return="float" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[A wrapper class for a record reader that handles a single file split. It |
| delegates most of the methods to the wrapped instance. A concrete subclass |
| needs to provide a constructor that calls this parent constructor with the |
| appropriate input format. The subclass constructor must satisfy the specific |
| constructor signature that is required by |
| <code>CombineFileRecordReader</code>. |
| |
| Subclassing is needed to get a concrete record reader wrapper because of the |
| constructor requirement. |
| |
| @see CombineFileRecordReader |
| @see CombineFileInputFormat]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.CombineFileRecordReaderWrapper --> |
| <!-- start class org.apache.hadoop.mapred.lib.CombineFileSplit --> |
| <class name="CombineFileSplit" extends="org.apache.hadoop.mapreduce.lib.input.CombineFileSplit" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.InputSplit"/> |
| <constructor name="CombineFileSplit" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <constructor name="CombineFileSplit" type="org.apache.hadoop.mapred.JobConf, org.apache.hadoop.fs.Path[], long[], long[], java.lang.String[]" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <constructor name="CombineFileSplit" type="org.apache.hadoop.mapred.JobConf, org.apache.hadoop.fs.Path[], long[]" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <constructor name="CombineFileSplit" type="org.apache.hadoop.mapred.lib.CombineFileSplit" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Copy constructor]]> |
| </doc> |
| </constructor> |
| <method name="getJob" return="org.apache.hadoop.mapred.JobConf" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.CombineFileSplit --> |
| <!-- start class org.apache.hadoop.mapred.lib.CombineSequenceFileInputFormat --> |
| <class name="CombineSequenceFileInputFormat" extends="org.apache.hadoop.mapred.lib.CombineFileInputFormat" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="CombineSequenceFileInputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="split" type="org.apache.hadoop.mapred.InputSplit"/> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[Input format that is a <code>CombineFileInputFormat</code>-equivalent for |
| <code>SequenceFileInputFormat</code>. |
| |
| @see CombineFileInputFormat]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.CombineSequenceFileInputFormat --> |
| <!-- start class org.apache.hadoop.mapred.lib.CombineTextInputFormat --> |
| <class name="CombineTextInputFormat" extends="org.apache.hadoop.mapred.lib.CombineFileInputFormat" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="CombineTextInputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="split" type="org.apache.hadoop.mapred.InputSplit"/> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[Input format that is a <code>CombineFileInputFormat</code>-equivalent for |
| <code>TextInputFormat</code>. |
| |
| @see CombineFileInputFormat]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.CombineTextInputFormat --> |
| <!-- start class org.apache.hadoop.mapred.lib.FieldSelectionMapReduce --> |
| <class name="FieldSelectionMapReduce" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.Mapper"/> |
| <implements name="org.apache.hadoop.mapred.Reducer"/> |
| <constructor name="FieldSelectionMapReduce" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="map" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="K"/> |
| <param name="val" type="V"/> |
| <param name="output" type="org.apache.hadoop.mapred.OutputCollector"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[The identify function. Input key/value pair is written directly to output.]]> |
| </doc> |
| </method> |
| <method name="configure" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| </method> |
| <method name="close" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="reduce" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="org.apache.hadoop.io.Text"/> |
| <param name="values" type="java.util.Iterator"/> |
| <param name="output" type="org.apache.hadoop.mapred.OutputCollector"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <field name="LOG" type="org.slf4j.Logger" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[This class implements a mapper/reducer class that can be used to perform |
| field selections in a manner similar to unix cut. The input data is treated |
| as fields separated by a user specified separator (the default value is |
| "\t"). The user can specify a list of fields that form the map output keys, |
| and a list of fields that form the map output values. If the inputformat is |
| TextInputFormat, the mapper will ignore the key to the map function. and the |
| fields are from the value only. Otherwise, the fields are the union of those |
| from the key and those from the value. |
| |
| The field separator is under attribute "mapreduce.fieldsel.data.field.separator" |
| |
| The map output field list spec is under attribute |
| "mapreduce.fieldsel.map.output.key.value.fields.spec". |
| The value is expected to be like "keyFieldsSpec:valueFieldsSpec" |
| key/valueFieldsSpec are comma (,) separated field spec: fieldSpec,fieldSpec,fieldSpec ... |
| Each field spec can be a simple number (e.g. 5) specifying a specific field, or a range |
| (like 2-5) to specify a range of fields, or an open range (like 3-) specifying all |
| the fields starting from field 3. The open range field spec applies value fields only. |
| They have no effect on the key fields. |
| |
| Here is an example: "4,3,0,1:6,5,1-3,7-". It specifies to use fields 4,3,0 and 1 for keys, |
| and use fields 6,5,1,2,3,7 and above for values. |
| |
| The reduce output field list spec is under attribute |
| "mapreduce.fieldsel.reduce.output.key.value.fields.spec". |
| |
| The reducer extracts output key/value pairs in a similar manner, except that |
| the key is never ignored.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.FieldSelectionMapReduce --> |
| <!-- start class org.apache.hadoop.mapred.lib.FilterOutputFormat --> |
| <class name="FilterOutputFormat" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.OutputFormat"/> |
| <constructor name="FilterOutputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <constructor name="FilterOutputFormat" type="org.apache.hadoop.mapred.OutputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Create a FilterOutputFormat based on the supplied output format. |
| @param out the underlying OutputFormat]]> |
| </doc> |
| </constructor> |
| <method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="ignored" type="org.apache.hadoop.fs.FileSystem"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="name" type="java.lang.String"/> |
| <param name="progress" type="org.apache.hadoop.util.Progressable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="checkOutputSpecs" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="ignored" type="org.apache.hadoop.fs.FileSystem"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <field name="baseOut" type="org.apache.hadoop.mapred.OutputFormat" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[FilterOutputFormat is a convenience class that wraps OutputFormat.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.FilterOutputFormat --> |
| <!-- start class org.apache.hadoop.mapred.lib.HashPartitioner --> |
| <class name="HashPartitioner" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.Partitioner"/> |
| <constructor name="HashPartitioner" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="configure" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| </method> |
| <method name="getPartition" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="K2"/> |
| <param name="value" type="V2"/> |
| <param name="numReduceTasks" type="int"/> |
| <doc> |
| <![CDATA[Use {@link Object#hashCode()} to partition.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Partition keys by their {@link Object#hashCode()}.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.HashPartitioner --> |
| <!-- start class org.apache.hadoop.mapred.lib.IdentityMapper --> |
| <class name="IdentityMapper" extends="org.apache.hadoop.mapred.MapReduceBase" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.Mapper"/> |
| <constructor name="IdentityMapper" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="map" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="K"/> |
| <param name="val" type="V"/> |
| <param name="output" type="org.apache.hadoop.mapred.OutputCollector"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[The identity function. Input key/value pair is written directly to |
| output.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Implements the identity function, mapping inputs directly to outputs.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.IdentityMapper --> |
| <!-- start class org.apache.hadoop.mapred.lib.IdentityReducer --> |
| <class name="IdentityReducer" extends="org.apache.hadoop.mapred.MapReduceBase" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.Reducer"/> |
| <constructor name="IdentityReducer" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="reduce" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="K"/> |
| <param name="values" type="java.util.Iterator"/> |
| <param name="output" type="org.apache.hadoop.mapred.OutputCollector"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Writes all keys and values directly to output.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Performs no reduction, writing all input values directly to the output.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.IdentityReducer --> |
| <!-- start class org.apache.hadoop.mapred.lib.InputSampler --> |
| <class name="InputSampler" extends="org.apache.hadoop.mapreduce.lib.partition.InputSampler" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="InputSampler" type="org.apache.hadoop.mapred.JobConf" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="writePartitionFile" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="sampler" type="org.apache.hadoop.mapred.lib.InputSampler.Sampler"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </method> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.InputSampler --> |
| <!-- start class org.apache.hadoop.mapred.lib.InverseMapper --> |
| <class name="InverseMapper" extends="org.apache.hadoop.mapred.MapReduceBase" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.Mapper"/> |
| <constructor name="InverseMapper" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="map" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="K"/> |
| <param name="value" type="V"/> |
| <param name="output" type="org.apache.hadoop.mapred.OutputCollector"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[The inverse function. Input keys and values are swapped.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[A {@link Mapper} that swaps keys and values.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.InverseMapper --> |
| <!-- start class org.apache.hadoop.mapred.lib.KeyFieldBasedComparator --> |
| <class name="KeyFieldBasedComparator" extends="org.apache.hadoop.mapreduce.lib.partition.KeyFieldBasedComparator" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.JobConfigurable"/> |
| <constructor name="KeyFieldBasedComparator" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="configure" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| </method> |
| <doc> |
| <![CDATA[This comparator implementation provides a subset of the features provided |
| by the Unix/GNU Sort. In particular, the supported features are: |
| -n, (Sort numerically) |
| -r, (Reverse the result of comparison) |
| -k pos1[,pos2], where pos is of the form f[.c][opts], where f is the number |
| of the field to use, and c is the number of the first character from the |
| beginning of the field. Fields and character posns are numbered starting |
| with 1; a character position of zero in pos2 indicates the field's last |
| character. If '.c' is omitted from pos1, it defaults to 1 (the beginning |
| of the field); if omitted from pos2, it defaults to 0 (the end of the |
| field). opts are ordering options (any of 'nr' as described above). |
| We assume that the fields in the key are separated by |
| {@link JobContext#MAP_OUTPUT_KEY_FIELD_SEPARATOR}]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.KeyFieldBasedComparator --> |
| <!-- start class org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner --> |
| <class name="KeyFieldBasedPartitioner" extends="org.apache.hadoop.mapreduce.lib.partition.KeyFieldBasedPartitioner" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.Partitioner"/> |
| <constructor name="KeyFieldBasedPartitioner" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="configure" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| </method> |
| <doc> |
| <![CDATA[Defines a way to partition keys based on certain key fields (also see |
| {@link KeyFieldBasedComparator}. |
| The key specification supported is of the form -k pos1[,pos2], where, |
| pos is of the form f[.c][opts], where f is the number |
| of the key field to use, and c is the number of the first character from |
| the beginning of the field. Fields and character posns are numbered |
| starting with 1; a character position of zero in pos2 indicates the |
| field's last character. If '.c' is omitted from pos1, it defaults to 1 |
| (the beginning of the field); if omitted from pos2, it defaults to 0 |
| (the end of the field).]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner --> |
| <!-- start class org.apache.hadoop.mapred.lib.LazyOutputFormat --> |
| <class name="LazyOutputFormat" extends="org.apache.hadoop.mapred.lib.FilterOutputFormat" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="LazyOutputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="setOutputFormatClass" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="theClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Set the underlying output format for LazyOutputFormat. |
| @param job the {@link JobConf} to modify |
| @param theClass the underlying class]]> |
| </doc> |
| </method> |
| <method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="ignored" type="org.apache.hadoop.fs.FileSystem"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="name" type="java.lang.String"/> |
| <param name="progress" type="org.apache.hadoop.util.Progressable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="checkOutputSpecs" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="ignored" type="org.apache.hadoop.fs.FileSystem"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[A Convenience class that creates output lazily.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.LazyOutputFormat --> |
| <!-- start class org.apache.hadoop.mapred.lib.LongSumReducer --> |
| <class name="LongSumReducer" extends="org.apache.hadoop.mapred.MapReduceBase" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.Reducer"/> |
| <constructor name="LongSumReducer" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="reduce" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="K"/> |
| <param name="values" type="java.util.Iterator"/> |
| <param name="output" type="org.apache.hadoop.mapred.OutputCollector"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[A {@link Reducer} that sums long values.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.LongSumReducer --> |
| <!-- start class org.apache.hadoop.mapred.lib.MultipleInputs --> |
| <class name="MultipleInputs" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="MultipleInputs" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="addInputPath" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="path" type="org.apache.hadoop.fs.Path"/> |
| <param name="inputFormatClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Add a {@link Path} with a custom {@link InputFormat} to the list of |
| inputs for the map-reduce job. |
| |
| @param conf The configuration of the job |
| @param path {@link Path} to be added to the list of inputs for the job |
| @param inputFormatClass {@link InputFormat} class to use for this path]]> |
| </doc> |
| </method> |
| <method name="addInputPath" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="path" type="org.apache.hadoop.fs.Path"/> |
| <param name="inputFormatClass" type="java.lang.Class"/> |
| <param name="mapperClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Add a {@link Path} with a custom {@link InputFormat} and |
| {@link Mapper} to the list of inputs for the map-reduce job. |
| |
| @param conf The configuration of the job |
| @param path {@link Path} to be added to the list of inputs for the job |
| @param inputFormatClass {@link InputFormat} class to use for this path |
| @param mapperClass {@link Mapper} class to use for this path]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[This class supports MapReduce jobs that have multiple input paths with |
| a different {@link InputFormat} and {@link Mapper} for each path]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.MultipleInputs --> |
| <!-- start class org.apache.hadoop.mapred.lib.MultipleOutputFormat --> |
| <class name="MultipleOutputFormat" extends="org.apache.hadoop.mapred.FileOutputFormat" |
| abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="MultipleOutputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="fs" type="org.apache.hadoop.fs.FileSystem"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="name" type="java.lang.String"/> |
| <param name="arg3" type="org.apache.hadoop.util.Progressable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Create a composite record writer that can write key/value data to different |
| output files |
| |
| @param fs |
| the file system to use |
| @param job |
| the job conf for the job |
| @param name |
| the leaf file name for the output file (such as part-00000") |
| @param arg3 |
| a progressable for reporting progress. |
| @return a composite record writer |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="generateLeafFileName" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="name" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Generate the leaf name for the output file name. The default behavior does |
| not change the leaf file name (such as part-00000) |
| |
| @param name |
| the leaf file name for the output file |
| @return the given leaf file name]]> |
| </doc> |
| </method> |
| <method name="generateFileNameForKeyValue" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="key" type="K"/> |
| <param name="value" type="V"/> |
| <param name="name" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Generate the file output file name based on the given key and the leaf file |
| name. The default behavior is that the file name does not depend on the |
| key. |
| |
| @param key |
| the key of the output data |
| @param name |
| the leaf file name |
| @return generated file name]]> |
| </doc> |
| </method> |
| <method name="generateActualKey" return="K" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="key" type="K"/> |
| <param name="value" type="V"/> |
| <doc> |
| <![CDATA[Generate the actual key from the given key/value. The default behavior is that |
| the actual key is equal to the given key |
| |
| @param key |
| the key of the output data |
| @param value |
| the value of the output data |
| @return the actual key derived from the given key/value]]> |
| </doc> |
| </method> |
| <method name="generateActualValue" return="V" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="key" type="K"/> |
| <param name="value" type="V"/> |
| <doc> |
| <![CDATA[Generate the actual value from the given key and value. The default behavior is that |
| the actual value is equal to the given value |
| |
| @param key |
| the key of the output data |
| @param value |
| the value of the output data |
| @return the actual value derived from the given key/value]]> |
| </doc> |
| </method> |
| <method name="getInputFileBasedOutputFileName" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="name" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Generate the outfile name based on a given anme and the input file name. If |
| the {@link JobContext#MAP_INPUT_FILE} does not exists (i.e. this is not for a map only job), |
| the given name is returned unchanged. If the config value for |
| "num.of.trailing.legs.to.use" is not set, or set 0 or negative, the given |
| name is returned unchanged. Otherwise, return a file name consisting of the |
| N trailing legs of the input file name where N is the config value for |
| "num.of.trailing.legs.to.use". |
| |
| @param job |
| the job config |
| @param name |
| the output file name |
| @return the outfile name based on a given anme and the input file name.]]> |
| </doc> |
| </method> |
| <method name="getBaseRecordWriter" return="org.apache.hadoop.mapred.RecordWriter" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="fs" type="org.apache.hadoop.fs.FileSystem"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="name" type="java.lang.String"/> |
| <param name="arg3" type="org.apache.hadoop.util.Progressable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[@param fs |
| the file system to use |
| @param job |
| a job conf object |
| @param name |
| the name of the file over which a record writer object will be |
| constructed |
| @param arg3 |
| a progressable object |
| @return A RecordWriter object over the given file |
| @throws IOException]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[This abstract class extends the FileOutputFormat, allowing to write the |
| output data to different output files. There are three basic use cases for |
| this class. |
| |
| Case one: This class is used for a map reduce job with at least one reducer. |
| The reducer wants to write data to different files depending on the actual |
| keys. It is assumed that a key (or value) encodes the actual key (value) |
| and the desired location for the actual key (value). |
| |
| Case two: This class is used for a map only job. The job wants to use an |
| output file name that is either a part of the input file name of the input |
| data, or some derivation of it. |
| |
| Case three: This class is used for a map only job. The job wants to use an |
| output file name that depends on both the keys and the input file name,]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.MultipleOutputFormat --> |
| <!-- start class org.apache.hadoop.mapred.lib.MultipleOutputs --> |
| <class name="MultipleOutputs" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="MultipleOutputs" type="org.apache.hadoop.mapred.JobConf" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Creates and initializes multiple named outputs support, it should be |
| instantiated in the Mapper/Reducer configure method. |
| |
| @param job the job configuration object]]> |
| </doc> |
| </constructor> |
| <method name="getNamedOutputsList" return="java.util.List" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <doc> |
| <![CDATA[Returns list of channel names. |
| |
| @param conf job conf |
| @return List of channel Names]]> |
| </doc> |
| </method> |
| <method name="isMultiNamedOutput" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="namedOutput" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Returns if a named output is multiple. |
| |
| @param conf job conf |
| @param namedOutput named output |
| @return <code>true</code> if the name output is multi, <code>false</code> |
| if it is single. If the name output is not defined it returns |
| <code>false</code>]]> |
| </doc> |
| </method> |
| <method name="getNamedOutputFormatClass" return="java.lang.Class" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="namedOutput" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Returns the named output OutputFormat. |
| |
| @param conf job conf |
| @param namedOutput named output |
| @return namedOutput OutputFormat]]> |
| </doc> |
| </method> |
| <method name="getNamedOutputKeyClass" return="java.lang.Class" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="namedOutput" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Returns the key class for a named output. |
| |
| @param conf job conf |
| @param namedOutput named output |
| @return class for the named output key]]> |
| </doc> |
| </method> |
| <method name="getNamedOutputValueClass" return="java.lang.Class" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="namedOutput" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Returns the value class for a named output. |
| |
| @param conf job conf |
| @param namedOutput named output |
| @return class of named output value]]> |
| </doc> |
| </method> |
| <method name="addNamedOutput" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="namedOutput" type="java.lang.String"/> |
| <param name="outputFormatClass" type="java.lang.Class"/> |
| <param name="keyClass" type="java.lang.Class"/> |
| <param name="valueClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Adds a named output for the job. |
| |
| @param conf job conf to add the named output |
| @param namedOutput named output name, it has to be a word, letters |
| and numbers only, cannot be the word 'part' as |
| that is reserved for the |
| default output. |
| @param outputFormatClass OutputFormat class. |
| @param keyClass key class |
| @param valueClass value class]]> |
| </doc> |
| </method> |
| <method name="addMultiNamedOutput" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="namedOutput" type="java.lang.String"/> |
| <param name="outputFormatClass" type="java.lang.Class"/> |
| <param name="keyClass" type="java.lang.Class"/> |
| <param name="valueClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Adds a multi named output for the job. |
| |
| @param conf job conf to add the named output |
| @param namedOutput named output name, it has to be a word, letters |
| and numbers only, cannot be the word 'part' as |
| that is reserved for the |
| default output. |
| @param outputFormatClass OutputFormat class. |
| @param keyClass key class |
| @param valueClass value class]]> |
| </doc> |
| </method> |
| <method name="setCountersEnabled" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="enabled" type="boolean"/> |
| <doc> |
| <![CDATA[Enables or disables counters for the named outputs. |
| <p> |
| By default these counters are disabled. |
| <p> |
| MultipleOutputs supports counters, by default the are disabled. |
| The counters group is the {@link MultipleOutputs} class name. |
| </p> |
| The names of the counters are the same as the named outputs. For multi |
| named outputs the name of the counter is the concatenation of the named |
| output, and underscore '_' and the multiname. |
| |
| @param conf job conf to enableadd the named output. |
| @param enabled indicates if the counters will be enabled or not.]]> |
| </doc> |
| </method> |
| <method name="getCountersEnabled" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <doc> |
| <![CDATA[Returns if the counters for the named outputs are enabled or not. |
| <p> |
| By default these counters are disabled. |
| <p> |
| MultipleOutputs supports counters, by default the are disabled. |
| The counters group is the {@link MultipleOutputs} class name. |
| </p> |
| The names of the counters are the same as the named outputs. For multi |
| named outputs the name of the counter is the concatenation of the named |
| output, and underscore '_' and the multiname. |
| |
| |
| @param conf job conf to enableadd the named output. |
| @return TRUE if the counters are enabled, FALSE if they are disabled.]]> |
| </doc> |
| </method> |
| <method name="getNamedOutputs" return="java.util.Iterator" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns iterator with the defined name outputs. |
| |
| @return iterator with the defined named outputs]]> |
| </doc> |
| </method> |
| <method name="getCollector" return="org.apache.hadoop.mapred.OutputCollector" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="namedOutput" type="java.lang.String"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Gets the output collector for a named output. |
| |
| @param namedOutput the named output name |
| @param reporter the reporter |
| @return the output collector for the given named output |
| @throws IOException thrown if output collector could not be created]]> |
| </doc> |
| </method> |
| <method name="getCollector" return="org.apache.hadoop.mapred.OutputCollector" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="namedOutput" type="java.lang.String"/> |
| <param name="multiName" type="java.lang.String"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Gets the output collector for a multi named output. |
| |
| @param namedOutput the named output name |
| @param multiName the multi name part |
| @param reporter the reporter |
| @return the output collector for the given named output |
| @throws IOException thrown if output collector could not be created]]> |
| </doc> |
| </method> |
| <method name="close" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Closes all the opened named outputs. |
| <p> |
| If overriden subclasses must invoke <code>super.close()</code> at the |
| end of their <code>close()</code> |
| |
| @throws java.io.IOException thrown if any of the MultipleOutput files |
| could not be closed properly.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[The MultipleOutputs class simplifies writing to additional outputs other |
| than the job default output via the <code>OutputCollector</code> passed to |
| the <code>map()</code> and <code>reduce()</code> methods of the |
| <code>Mapper</code> and <code>Reducer</code> implementations. |
| <p> |
| Each additional output, or named output, may be configured with its own |
| <code>OutputFormat</code>, with its own key class and with its own value |
| class. |
| <p> |
| A named output can be a single file or a multi file. The later is referred as |
| a multi named output. |
| <p> |
| A multi named output is an unbound set of files all sharing the same |
| <code>OutputFormat</code>, key class and value class configuration. |
| <p> |
| When named outputs are used within a <code>Mapper</code> implementation, |
| key/values written to a name output are not part of the reduce phase, only |
| key/values written to the job <code>OutputCollector</code> are part of the |
| reduce phase. |
| <p> |
| MultipleOutputs supports counters, by default the are disabled. The counters |
| group is the {@link MultipleOutputs} class name. |
| </p> |
| The names of the counters are the same as the named outputs. For multi |
| named outputs the name of the counter is the concatenation of the named |
| output, and underscore '_' and the multiname. |
| <p> |
| Job configuration usage pattern is: |
| <pre> |
| |
| JobConf conf = new JobConf(); |
| |
| conf.setInputPath(inDir); |
| FileOutputFormat.setOutputPath(conf, outDir); |
| |
| conf.setMapperClass(MOMap.class); |
| conf.setReducerClass(MOReduce.class); |
| ... |
| |
| // Defines additional single text based output 'text' for the job |
| MultipleOutputs.addNamedOutput(conf, "text", TextOutputFormat.class, |
| LongWritable.class, Text.class); |
| |
| // Defines additional multi sequencefile based output 'sequence' for the |
| // job |
| MultipleOutputs.addMultiNamedOutput(conf, "seq", |
| SequenceFileOutputFormat.class, |
| LongWritable.class, Text.class); |
| ... |
| |
| JobClient jc = new JobClient(); |
| RunningJob job = jc.submitJob(conf); |
| |
| ... |
| </pre> |
| <p> |
| Job configuration usage pattern is: |
| <pre> |
| |
| public class MOReduce implements |
| Reducer<WritableComparable, Writable> { |
| private MultipleOutputs mos; |
| |
| public void configure(JobConf conf) { |
| ... |
| mos = new MultipleOutputs(conf); |
| } |
| |
| public void reduce(WritableComparable key, Iterator<Writable> values, |
| OutputCollector output, Reporter reporter) |
| throws IOException { |
| ... |
| mos.getCollector("text", reporter).collect(key, new Text("Hello")); |
| mos.getCollector("seq", "A", reporter).collect(key, new Text("Bye")); |
| mos.getCollector("seq", "B", reporter).collect(key, new Text("Chau")); |
| ... |
| } |
| |
| public void close() throws IOException { |
| mos.close(); |
| ... |
| } |
| |
| } |
| </pre>]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.MultipleOutputs --> |
| <!-- start class org.apache.hadoop.mapred.lib.MultipleSequenceFileOutputFormat --> |
| <class name="MultipleSequenceFileOutputFormat" extends="org.apache.hadoop.mapred.lib.MultipleOutputFormat" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="MultipleSequenceFileOutputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getBaseRecordWriter" return="org.apache.hadoop.mapred.RecordWriter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="fs" type="org.apache.hadoop.fs.FileSystem"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="name" type="java.lang.String"/> |
| <param name="arg3" type="org.apache.hadoop.util.Progressable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[This class extends the MultipleOutputFormat, allowing to write the output data |
| to different output files in sequence file output format.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.MultipleSequenceFileOutputFormat --> |
| <!-- start class org.apache.hadoop.mapred.lib.MultipleTextOutputFormat --> |
| <class name="MultipleTextOutputFormat" extends="org.apache.hadoop.mapred.lib.MultipleOutputFormat" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="MultipleTextOutputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getBaseRecordWriter" return="org.apache.hadoop.mapred.RecordWriter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="fs" type="org.apache.hadoop.fs.FileSystem"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="name" type="java.lang.String"/> |
| <param name="arg3" type="org.apache.hadoop.util.Progressable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[This class extends the MultipleOutputFormat, allowing to write the output |
| data to different output files in Text output format.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.MultipleTextOutputFormat --> |
| <!-- start class org.apache.hadoop.mapred.lib.MultithreadedMapRunner --> |
| <class name="MultithreadedMapRunner" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.MapRunnable"/> |
| <constructor name="MultithreadedMapRunner" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="configure" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobConf" type="org.apache.hadoop.mapred.JobConf"/> |
| </method> |
| <method name="run" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="input" type="org.apache.hadoop.mapred.RecordReader"/> |
| <param name="output" type="org.apache.hadoop.mapred.OutputCollector"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[Multithreaded implementation for {@link MapRunnable}. |
| <p> |
| It can be used instead of the default implementation, |
| of {@link org.apache.hadoop.mapred.MapRunner}, when the Map |
| operation is not CPU bound in order to improve throughput. |
| <p> |
| Map implementations using this MapRunnable must be thread-safe. |
| <p> |
| The Map-Reduce job has to be configured to use this MapRunnable class (using |
| the JobConf.setMapRunnerClass method) and |
| the number of threads the thread-pool can use with the |
| <code>mapred.map.multithreadedrunner.threads</code> property, its default |
| value is 10 threads. |
| <p>]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.MultithreadedMapRunner --> |
| <!-- start class org.apache.hadoop.mapred.lib.NLineInputFormat --> |
| <class name="NLineInputFormat" extends="org.apache.hadoop.mapred.FileInputFormat" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.JobConfigurable"/> |
| <constructor name="NLineInputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="genericSplit" type="org.apache.hadoop.mapred.InputSplit"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="numSplits" type="int"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Logically splits the set of input files for the job, splits N lines |
| of the input as one split. |
| |
| @see org.apache.hadoop.mapred.FileInputFormat#getSplits(JobConf, int)]]> |
| </doc> |
| </method> |
| <method name="configure" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| </method> |
| <method name="createFileSplit" return="org.apache.hadoop.mapred.FileSplit" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="fileName" type="org.apache.hadoop.fs.Path"/> |
| <param name="begin" type="long"/> |
| <param name="length" type="long"/> |
| <doc> |
| <![CDATA[NLineInputFormat uses LineRecordReader, which always reads |
| (and consumes) at least one character out of its upper split |
| boundary. So to make sure that each mapper gets N lines, we |
| move back the upper split limits of each split |
| by one character here. |
| @param fileName Path of file |
| @param begin the position of the first byte in the file to process |
| @param length number of bytes in InputSplit |
| @return FileSplit]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[NLineInputFormat which splits N lines of input as one split. |
| |
| In many "pleasantly" parallel applications, each process/mapper |
| processes the same input file (s), but with computations are |
| controlled by different parameters.(Referred to as "parameter sweeps"). |
| One way to achieve this, is to specify a set of parameters |
| (one set per line) as input in a control file |
| (which is the input path to the map-reduce application, |
| where as the input dataset is specified |
| via a config variable in JobConf.). |
| |
| The NLineInputFormat can be used in such applications, that splits |
| the input file such that by default, one line is fed as |
| a value to one map task, and key is the offset. |
| i.e. (k,v) is (LongWritable, Text). |
| The location hints will span the whole mapred cluster.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.NLineInputFormat --> |
| <!-- start class org.apache.hadoop.mapred.lib.NullOutputFormat --> |
| <class name="NullOutputFormat" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.OutputFormat"/> |
| <constructor name="NullOutputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="ignored" type="org.apache.hadoop.fs.FileSystem"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="name" type="java.lang.String"/> |
| <param name="progress" type="org.apache.hadoop.util.Progressable"/> |
| </method> |
| <method name="checkOutputSpecs" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="ignored" type="org.apache.hadoop.fs.FileSystem"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| </method> |
| <doc> |
| <![CDATA[Consume all outputs and put them in /dev/null.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.NullOutputFormat --> |
| <!-- start class org.apache.hadoop.mapred.lib.RegexMapper --> |
| <class name="RegexMapper" extends="org.apache.hadoop.mapred.MapReduceBase" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.Mapper"/> |
| <constructor name="RegexMapper" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="configure" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| </method> |
| <method name="map" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="K"/> |
| <param name="value" type="org.apache.hadoop.io.Text"/> |
| <param name="output" type="org.apache.hadoop.mapred.OutputCollector"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[A {@link Mapper} that extracts text matching a regular expression.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.RegexMapper --> |
| <!-- start class org.apache.hadoop.mapred.lib.TokenCountMapper --> |
| <class name="TokenCountMapper" extends="org.apache.hadoop.mapred.MapReduceBase" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.Mapper"/> |
| <constructor name="TokenCountMapper" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="map" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="K"/> |
| <param name="value" type="org.apache.hadoop.io.Text"/> |
| <param name="output" type="org.apache.hadoop.mapred.OutputCollector"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[A {@link Mapper} that maps text values into <token,freq> pairs. Uses |
| {@link StringTokenizer} to break text into tokens.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.TokenCountMapper --> |
| <!-- start class org.apache.hadoop.mapred.lib.TotalOrderPartitioner --> |
| <class name="TotalOrderPartitioner" extends="org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.Partitioner"/> |
| <constructor name="TotalOrderPartitioner" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="configure" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| </method> |
| <method name="setPartitionFile" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="Use |
| {@link #setPartitionFile(Configuration, Path)} |
| instead"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="p" type="org.apache.hadoop.fs.Path"/> |
| <doc> |
| <![CDATA[Set the path to the SequenceFile storing the sorted partition keyset. |
| It must be the case that for <tt>R</tt> reduces, there are <tt>R-1</tt> |
| keys in the SequenceFile. |
| @deprecated Use |
| {@link #setPartitionFile(Configuration, Path)} |
| instead]]> |
| </doc> |
| </method> |
| <method name="getPartitionFile" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="Use |
| {@link #getPartitionFile(Configuration)} |
| instead"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <doc> |
| <![CDATA[Get the path to the SequenceFile storing the sorted partition keyset. |
| @see #setPartitionFile(JobConf,Path) |
| @deprecated Use |
| {@link #getPartitionFile(Configuration)} |
| instead]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Partitioner effecting a total order by reading split points from |
| an externally generated source.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.TotalOrderPartitioner --> |
| </package> |
| <package name="org.apache.hadoop.mapred.lib.aggregate"> |
| <!-- start class org.apache.hadoop.mapred.lib.aggregate.DoubleValueSum --> |
| <class name="DoubleValueSum" extends="org.apache.hadoop.mapreduce.lib.aggregate.DoubleValueSum" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"/> |
| <constructor name="DoubleValueSum" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <doc> |
| <![CDATA[This class implements a value aggregator that sums up a sequence of double |
| values.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.aggregate.DoubleValueSum --> |
| <!-- start class org.apache.hadoop.mapred.lib.aggregate.LongValueMax --> |
| <class name="LongValueMax" extends="org.apache.hadoop.mapreduce.lib.aggregate.LongValueMax" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"/> |
| <constructor name="LongValueMax" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <doc> |
| <![CDATA[This class implements a value aggregator that maintain the maximum of |
| a sequence of long values.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.aggregate.LongValueMax --> |
| <!-- start class org.apache.hadoop.mapred.lib.aggregate.LongValueMin --> |
| <class name="LongValueMin" extends="org.apache.hadoop.mapreduce.lib.aggregate.LongValueMin" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"/> |
| <constructor name="LongValueMin" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <doc> |
| <![CDATA[This class implements a value aggregator that maintain the minimum of |
| a sequence of long values.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.aggregate.LongValueMin --> |
| <!-- start class org.apache.hadoop.mapred.lib.aggregate.LongValueSum --> |
| <class name="LongValueSum" extends="org.apache.hadoop.mapreduce.lib.aggregate.LongValueSum" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"/> |
| <constructor name="LongValueSum" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <doc> |
| <![CDATA[This class implements a value aggregator that sums up |
| a sequence of long values.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.aggregate.LongValueSum --> |
| <!-- start class org.apache.hadoop.mapred.lib.aggregate.StringValueMax --> |
| <class name="StringValueMax" extends="org.apache.hadoop.mapreduce.lib.aggregate.StringValueMax" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"/> |
| <constructor name="StringValueMax" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <doc> |
| <![CDATA[This class implements a value aggregator that maintain the biggest of |
| a sequence of strings.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.aggregate.StringValueMax --> |
| <!-- start class org.apache.hadoop.mapred.lib.aggregate.StringValueMin --> |
| <class name="StringValueMin" extends="org.apache.hadoop.mapreduce.lib.aggregate.StringValueMin" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"/> |
| <constructor name="StringValueMin" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <doc> |
| <![CDATA[This class implements a value aggregator that maintain the smallest of |
| a sequence of strings.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.aggregate.StringValueMin --> |
| <!-- start class org.apache.hadoop.mapred.lib.aggregate.UniqValueCount --> |
| <class name="UniqValueCount" extends="org.apache.hadoop.mapreduce.lib.aggregate.UniqValueCount" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"/> |
| <constructor name="UniqValueCount" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[the default constructor]]> |
| </doc> |
| </constructor> |
| <constructor name="UniqValueCount" type="long" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[constructor |
| @param maxNum the limit in the number of unique values to keep.]]> |
| </doc> |
| </constructor> |
| <doc> |
| <![CDATA[This class implements a value aggregator that dedupes a sequence of objects.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.aggregate.UniqValueCount --> |
| <!-- start class org.apache.hadoop.mapred.lib.aggregate.UserDefinedValueAggregatorDescriptor --> |
| <class name="UserDefinedValueAggregatorDescriptor" extends="org.apache.hadoop.mapreduce.lib.aggregate.UserDefinedValueAggregatorDescriptor" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorDescriptor"/> |
| <constructor name="UserDefinedValueAggregatorDescriptor" type="java.lang.String, org.apache.hadoop.mapred.JobConf" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@param className the class name of the user defined descriptor class |
| @param job a configure object used for decriptor configuration]]> |
| </doc> |
| </constructor> |
| <method name="createInstance" return="java.lang.Object" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="className" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Create an instance of the given class |
| @param className the name of the class |
| @return a dynamically created instance of the given class]]> |
| </doc> |
| </method> |
| <method name="configure" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <doc> |
| <![CDATA[Do nothing.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[This class implements a wrapper for a user defined value aggregator |
| descriptor. |
| It serves two functions: One is to create an object of |
| ValueAggregatorDescriptor from the name of a user defined class that may be |
| dynamically loaded. The other is to delegate invocations of |
| generateKeyValPairs function to the created object.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.aggregate.UserDefinedValueAggregatorDescriptor --> |
| <!-- start interface org.apache.hadoop.mapred.lib.aggregate.ValueAggregator --> |
| <interface name="ValueAggregator" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator"/> |
| <doc> |
| <![CDATA[This interface defines the minimal protocol for value aggregators.]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapred.lib.aggregate.ValueAggregator --> |
| <!-- start class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorBaseDescriptor --> |
| <class name="ValueAggregatorBaseDescriptor" extends="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorBaseDescriptor" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorDescriptor"/> |
| <constructor name="ValueAggregatorBaseDescriptor" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="generateEntry" return="java.util.Map.Entry" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="type" type="java.lang.String"/> |
| <param name="id" type="java.lang.String"/> |
| <param name="val" type="org.apache.hadoop.io.Text"/> |
| <doc> |
| <![CDATA[@param type the aggregation type |
| @param id the aggregation id |
| @param val the val associated with the id to be aggregated |
| @return an Entry whose key is the aggregation id prefixed with |
| the aggregation type.]]> |
| </doc> |
| </method> |
| <method name="generateValueAggregator" return="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="type" type="java.lang.String"/> |
| <doc> |
| <![CDATA[@param type the aggregation type |
| @return a value aggregator of the given type.]]> |
| </doc> |
| </method> |
| <method name="configure" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <doc> |
| <![CDATA[get the input file name. |
| |
| @param job a job configuration object]]> |
| </doc> |
| </method> |
| <field name="UNIQ_VALUE_COUNT" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="LONG_VALUE_SUM" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="DOUBLE_VALUE_SUM" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="VALUE_HISTOGRAM" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="LONG_VALUE_MAX" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="LONG_VALUE_MIN" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="STRING_VALUE_MAX" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="STRING_VALUE_MIN" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[This class implements the common functionalities of |
| the subclasses of ValueAggregatorDescriptor class.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorBaseDescriptor --> |
| <!-- start class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorCombiner --> |
| <class name="ValueAggregatorCombiner" extends="org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorJobBase" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="ValueAggregatorCombiner" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="configure" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <doc> |
| <![CDATA[Combiner does not need to configure.]]> |
| </doc> |
| </method> |
| <method name="reduce" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="org.apache.hadoop.io.Text"/> |
| <param name="values" type="java.util.Iterator"/> |
| <param name="output" type="org.apache.hadoop.mapred.OutputCollector"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Combines values for a given key. |
| @param key the key is expected to be a Text object, whose prefix indicates |
| the type of aggregation to aggregate the values. |
| @param values the values to combine |
| @param output to collect combined values]]> |
| </doc> |
| </method> |
| <method name="close" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Do nothing.]]> |
| </doc> |
| </method> |
| <method name="map" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="arg0" type="K1"/> |
| <param name="arg1" type="V1"/> |
| <param name="arg2" type="org.apache.hadoop.mapred.OutputCollector"/> |
| <param name="arg3" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Do nothing. Should not be called.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[This class implements the generic combiner of Aggregate.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorCombiner --> |
| <!-- start interface org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorDescriptor --> |
| <interface name="ValueAggregatorDescriptor" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorDescriptor"/> |
| <method name="configure" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <doc> |
| <![CDATA[Configure the object |
| |
| @param job |
| a JobConf object that may contain the information that can be used |
| to configure the object.]]> |
| </doc> |
| </method> |
| <field name="TYPE_SEPARATOR" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="ONE" type="org.apache.hadoop.io.Text" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[This interface defines the contract a value aggregator descriptor must |
| support. Such a descriptor can be configured with a JobConf object. Its main |
| function is to generate a list of aggregation-id/value pairs. An aggregation |
| id encodes an aggregation type which is used to guide the way to aggregate |
| the value in the reduce/combiner phrase of an Aggregate based job.The mapper in |
| an Aggregate based map/reduce job may create one or more of |
| ValueAggregatorDescriptor objects at configuration time. For each input |
| key/value pair, the mapper will use those objects to create aggregation |
| id/value pairs.]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorDescriptor --> |
| <!-- start class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorJob --> |
| <class name="ValueAggregatorJob" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="ValueAggregatorJob" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="createValueAggregatorJobs" return="org.apache.hadoop.mapred.jobcontrol.JobControl" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="args" type="java.lang.String[]"/> |
| <param name="descriptors" type="java.lang.Class[]"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="createValueAggregatorJobs" return="org.apache.hadoop.mapred.jobcontrol.JobControl" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="args" type="java.lang.String[]"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="createValueAggregatorJob" return="org.apache.hadoop.mapred.JobConf" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="args" type="java.lang.String[]"/> |
| <param name="caller" type="java.lang.Class"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Create an Aggregate based map/reduce job. |
| |
| @param args the arguments used for job creation. Generic hadoop |
| arguments are accepted. |
| @param caller the the caller class. |
| @return a JobConf object ready for submission. |
| |
| @throws IOException |
| @see GenericOptionsParser]]> |
| </doc> |
| </method> |
| <method name="createValueAggregatorJob" return="org.apache.hadoop.mapred.JobConf" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="args" type="java.lang.String[]"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Create an Aggregate based map/reduce job. |
| |
| @param args the arguments used for job creation. Generic hadoop |
| arguments are accepted. |
| @return a JobConf object ready for submission. |
| |
| @throws IOException |
| @see GenericOptionsParser]]> |
| </doc> |
| </method> |
| <method name="createValueAggregatorJob" return="org.apache.hadoop.mapred.JobConf" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="args" type="java.lang.String[]"/> |
| <param name="descriptors" type="java.lang.Class[]"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="setAggregatorDescriptors" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="descriptors" type="java.lang.Class[]"/> |
| </method> |
| <method name="createValueAggregatorJob" return="org.apache.hadoop.mapred.JobConf" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="args" type="java.lang.String[]"/> |
| <param name="descriptors" type="java.lang.Class[]"/> |
| <param name="caller" type="java.lang.Class"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="main" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="args" type="java.lang.String[]"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[create and run an Aggregate based map/reduce job. |
| |
| @param args the arguments used for job creation |
| @throws IOException]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[This is the main class for creating a map/reduce job using Aggregate |
| framework. The Aggregate is a specialization of map/reduce framework, |
| specilizing for performing various simple aggregations. |
| |
| Generally speaking, in order to implement an application using Map/Reduce |
| model, the developer is to implement Map and Reduce functions (and possibly |
| combine function). However, a lot of applications related to counting and |
| statistics computing have very similar characteristics. Aggregate abstracts |
| out the general patterns of these functions and implementing those patterns. |
| In particular, the package provides generic mapper/redducer/combiner classes, |
| and a set of built-in value aggregators, and a generic utility class that |
| helps user create map/reduce jobs using the generic class. The built-in |
| aggregators include: |
| |
| sum over numeric values count the number of distinct values compute the |
| histogram of values compute the minimum, maximum, media,average, standard |
| deviation of numeric values |
| |
| The developer using Aggregate will need only to provide a plugin class |
| conforming to the following interface: |
| |
| public interface ValueAggregatorDescriptor { public ArrayList<Entry> |
| generateKeyValPairs(Object key, Object value); public void |
| configure(JobConfjob); } |
| |
| The package also provides a base class, ValueAggregatorBaseDescriptor, |
| implementing the above interface. The user can extend the base class and |
| implement generateKeyValPairs accordingly. |
| |
| The primary work of generateKeyValPairs is to emit one or more key/value |
| pairs based on the input key/value pair. The key in an output key/value pair |
| encode two pieces of information: aggregation type and aggregation id. The |
| value will be aggregated onto the aggregation id according the aggregation |
| type. |
| |
| This class offers a function to generate a map/reduce job using Aggregate |
| framework. The function takes the following parameters: input directory spec |
| input format (text or sequence file) output directory a file specifying the |
| user plugin class]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorJob --> |
| <!-- start class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorJobBase --> |
| <class name="ValueAggregatorJobBase" extends="java.lang.Object" |
| abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.Mapper"/> |
| <implements name="org.apache.hadoop.mapred.Reducer"/> |
| <constructor name="ValueAggregatorJobBase" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="configure" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| </method> |
| <method name="logSpec" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </method> |
| <method name="close" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <field name="aggregatorDescriptorList" type="java.util.ArrayList" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[This abstract class implements some common functionalities of the |
| the generic mapper, reducer and combiner classes of Aggregate.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorJobBase --> |
| <!-- start class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorMapper --> |
| <class name="ValueAggregatorMapper" extends="org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorJobBase" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="ValueAggregatorMapper" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="map" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="K1"/> |
| <param name="value" type="V1"/> |
| <param name="output" type="org.apache.hadoop.mapred.OutputCollector"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[the map function. It iterates through the value aggregator descriptor |
| list to generate aggregation id/value pairs and emit them.]]> |
| </doc> |
| </method> |
| <method name="reduce" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="arg0" type="org.apache.hadoop.io.Text"/> |
| <param name="arg1" type="java.util.Iterator"/> |
| <param name="arg2" type="org.apache.hadoop.mapred.OutputCollector"/> |
| <param name="arg3" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Do nothing. Should not be called.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[This class implements the generic mapper of Aggregate.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorMapper --> |
| <!-- start class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorReducer --> |
| <class name="ValueAggregatorReducer" extends="org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorJobBase" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="ValueAggregatorReducer" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="reduce" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="org.apache.hadoop.io.Text"/> |
| <param name="values" type="java.util.Iterator"/> |
| <param name="output" type="org.apache.hadoop.mapred.OutputCollector"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[@param key |
| the key is expected to be a Text object, whose prefix indicates |
| the type of aggregation to aggregate the values. In effect, data |
| driven computing is achieved. It is assumed that each aggregator's |
| getReport method emits appropriate output for the aggregator. This |
| may be further customiized. |
| @param values |
| the values to be aggregated]]> |
| </doc> |
| </method> |
| <method name="map" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="arg0" type="K1"/> |
| <param name="arg1" type="V1"/> |
| <param name="arg2" type="org.apache.hadoop.mapred.OutputCollector"/> |
| <param name="arg3" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Do nothing. Should not be called]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[This class implements the generic reducer of Aggregate.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorReducer --> |
| <!-- start class org.apache.hadoop.mapred.lib.aggregate.ValueHistogram --> |
| <class name="ValueHistogram" extends="org.apache.hadoop.mapreduce.lib.aggregate.ValueHistogram" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"/> |
| <constructor name="ValueHistogram" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <doc> |
| <![CDATA[This class implements a value aggregator that computes the |
| histogram of a sequence of strings.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.aggregate.ValueHistogram --> |
| </package> |
| <package name="org.apache.hadoop.mapred.lib.db"> |
| <!-- start class org.apache.hadoop.mapred.lib.db.DBConfiguration --> |
| <class name="DBConfiguration" extends="org.apache.hadoop.mapreduce.lib.db.DBConfiguration" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <method name="configureDB" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="driverClass" type="java.lang.String"/> |
| <param name="dbUrl" type="java.lang.String"/> |
| <param name="userName" type="java.lang.String"/> |
| <param name="passwd" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Sets the DB access related fields in the JobConf. |
| @param job the job |
| @param driverClass JDBC Driver class name |
| @param dbUrl JDBC DB access URL. |
| @param userName DB access username |
| @param passwd DB access passwd]]> |
| </doc> |
| </method> |
| <method name="configureDB" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="driverClass" type="java.lang.String"/> |
| <param name="dbUrl" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Sets the DB access related fields in the JobConf. |
| @param job the job |
| @param driverClass JDBC Driver class name |
| @param dbUrl JDBC DB access URL.]]> |
| </doc> |
| </method> |
| <field name="DRIVER_CLASS_PROPERTY" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[The JDBC Driver class name]]> |
| </doc> |
| </field> |
| <field name="URL_PROPERTY" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[JDBC Database access URL]]> |
| </doc> |
| </field> |
| <field name="USERNAME_PROPERTY" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[User name to access the database]]> |
| </doc> |
| </field> |
| <field name="PASSWORD_PROPERTY" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Password to access the database]]> |
| </doc> |
| </field> |
| <field name="INPUT_TABLE_NAME_PROPERTY" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Input table name]]> |
| </doc> |
| </field> |
| <field name="INPUT_FIELD_NAMES_PROPERTY" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Field names in the Input table]]> |
| </doc> |
| </field> |
| <field name="INPUT_CONDITIONS_PROPERTY" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[WHERE clause in the input SELECT statement]]> |
| </doc> |
| </field> |
| <field name="INPUT_ORDER_BY_PROPERTY" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[ORDER BY clause in the input SELECT statement]]> |
| </doc> |
| </field> |
| <field name="INPUT_QUERY" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Whole input query, exluding LIMIT...OFFSET]]> |
| </doc> |
| </field> |
| <field name="INPUT_COUNT_QUERY" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Input query to get the count of records]]> |
| </doc> |
| </field> |
| <field name="INPUT_CLASS_PROPERTY" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Class name implementing DBWritable which will hold input tuples]]> |
| </doc> |
| </field> |
| <field name="OUTPUT_TABLE_NAME_PROPERTY" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Output table name]]> |
| </doc> |
| </field> |
| <field name="OUTPUT_FIELD_NAMES_PROPERTY" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Field names in the Output table]]> |
| </doc> |
| </field> |
| <field name="OUTPUT_FIELD_COUNT_PROPERTY" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Number of fields in the Output table]]> |
| </doc> |
| </field> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.db.DBConfiguration --> |
| <!-- start class org.apache.hadoop.mapred.lib.db.DBInputFormat --> |
| <class name="DBInputFormat" extends="org.apache.hadoop.mapreduce.lib.db.DBInputFormat" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.InputFormat"/> |
| <implements name="org.apache.hadoop.mapred.JobConfigurable"/> |
| <constructor name="DBInputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="configure" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <doc> |
| <![CDATA[{@inheritDoc}]]> |
| </doc> |
| </method> |
| <method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="split" type="org.apache.hadoop.mapred.InputSplit"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[{@inheritDoc}]]> |
| </doc> |
| </method> |
| <method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="chunks" type="int"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[{@inheritDoc}]]> |
| </doc> |
| </method> |
| <method name="setInput" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="inputClass" type="java.lang.Class"/> |
| <param name="tableName" type="java.lang.String"/> |
| <param name="conditions" type="java.lang.String"/> |
| <param name="orderBy" type="java.lang.String"/> |
| <param name="fieldNames" type="java.lang.String[]"/> |
| <doc> |
| <![CDATA[Initializes the map-part of the job with the appropriate input settings. |
| |
| @param job The job |
| @param inputClass the class object implementing DBWritable, which is the |
| Java object holding tuple fields. |
| @param tableName The table to read data from |
| @param conditions The condition which to select data with, eg. '(updated > |
| 20070101 AND length > 0)' |
| @param orderBy the fieldNames in the orderBy clause. |
| @param fieldNames The field names in the table |
| @see #setInput(JobConf, Class, String, String)]]> |
| </doc> |
| </method> |
| <method name="setInput" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="inputClass" type="java.lang.Class"/> |
| <param name="inputQuery" type="java.lang.String"/> |
| <param name="inputCountQuery" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Initializes the map-part of the job with the appropriate input settings. |
| |
| @param job The job |
| @param inputClass the class object implementing DBWritable, which is the |
| Java object holding tuple fields. |
| @param inputQuery the input query to select fields. Example : |
| "SELECT f1, f2, f3 FROM Mytable ORDER BY f1" |
| @param inputCountQuery the input query that returns the number of records in |
| the table. |
| Example : "SELECT COUNT(f1) FROM Mytable" |
| @see #setInput(JobConf, Class, String, String, String, String...)]]> |
| </doc> |
| </method> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.db.DBInputFormat --> |
| <!-- start class org.apache.hadoop.mapred.lib.db.DBOutputFormat --> |
| <class name="DBOutputFormat" extends="org.apache.hadoop.mapreduce.lib.db.DBOutputFormat" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.OutputFormat"/> |
| <constructor name="DBOutputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="checkOutputSpecs" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="filesystem" type="org.apache.hadoop.fs.FileSystem"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[{@inheritDoc}]]> |
| </doc> |
| </method> |
| <method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="filesystem" type="org.apache.hadoop.fs.FileSystem"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="name" type="java.lang.String"/> |
| <param name="progress" type="org.apache.hadoop.util.Progressable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[{@inheritDoc}]]> |
| </doc> |
| </method> |
| <method name="setOutput" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="tableName" type="java.lang.String"/> |
| <param name="fieldNames" type="java.lang.String[]"/> |
| <doc> |
| <![CDATA[Initializes the reduce-part of the job with the appropriate output settings |
| |
| @param job The job |
| @param tableName The table to insert data into |
| @param fieldNames The field names in the table.]]> |
| </doc> |
| </method> |
| <method name="setOutput" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="tableName" type="java.lang.String"/> |
| <param name="fieldCount" type="int"/> |
| <doc> |
| <![CDATA[Initializes the reduce-part of the job with the appropriate output settings |
| |
| @param job The job |
| @param tableName The table to insert data into |
| @param fieldCount the number of fields in the table.]]> |
| </doc> |
| </method> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.db.DBOutputFormat --> |
| <!-- start interface org.apache.hadoop.mapred.lib.db.DBWritable --> |
| <interface name="DBWritable" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapreduce.lib.db.DBWritable"/> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapred.lib.db.DBWritable --> |
| </package> |
| <package name="org.apache.hadoop.mapred.pipes"> |
| <!-- start class org.apache.hadoop.mapred.pipes.Submitter --> |
| <class name="Submitter" extends="org.apache.hadoop.conf.Configured" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.util.Tool"/> |
| <constructor name="Submitter" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <constructor name="Submitter" type="org.apache.hadoop.conf.Configuration" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getExecutable" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <doc> |
| <![CDATA[Get the URI of the application's executable. |
| @param conf |
| @return the URI where the application's executable is located]]> |
| </doc> |
| </method> |
| <method name="setExecutable" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="executable" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the URI for the application's executable. Normally this is a hdfs: |
| location. |
| @param conf |
| @param executable The URI of the application's executable.]]> |
| </doc> |
| </method> |
| <method name="setIsJavaRecordReader" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="value" type="boolean"/> |
| <doc> |
| <![CDATA[Set whether the job is using a Java RecordReader. |
| @param conf the configuration to modify |
| @param value the new value]]> |
| </doc> |
| </method> |
| <method name="getIsJavaRecordReader" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <doc> |
| <![CDATA[Check whether the job is using a Java RecordReader |
| @param conf the configuration to check |
| @return is it a Java RecordReader?]]> |
| </doc> |
| </method> |
| <method name="setIsJavaMapper" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="value" type="boolean"/> |
| <doc> |
| <![CDATA[Set whether the Mapper is written in Java. |
| @param conf the configuration to modify |
| @param value the new value]]> |
| </doc> |
| </method> |
| <method name="getIsJavaMapper" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <doc> |
| <![CDATA[Check whether the job is using a Java Mapper. |
| @param conf the configuration to check |
| @return is it a Java Mapper?]]> |
| </doc> |
| </method> |
| <method name="setIsJavaReducer" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="value" type="boolean"/> |
| <doc> |
| <![CDATA[Set whether the Reducer is written in Java. |
| @param conf the configuration to modify |
| @param value the new value]]> |
| </doc> |
| </method> |
| <method name="getIsJavaReducer" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <doc> |
| <![CDATA[Check whether the job is using a Java Reducer. |
| @param conf the configuration to check |
| @return is it a Java Reducer?]]> |
| </doc> |
| </method> |
| <method name="setIsJavaRecordWriter" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="value" type="boolean"/> |
| <doc> |
| <![CDATA[Set whether the job will use a Java RecordWriter. |
| @param conf the configuration to modify |
| @param value the new value to set]]> |
| </doc> |
| </method> |
| <method name="getIsJavaRecordWriter" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <doc> |
| <![CDATA[Will the reduce use a Java RecordWriter? |
| @param conf the configuration to check |
| @return true, if the output of the job will be written by Java]]> |
| </doc> |
| </method> |
| <method name="getKeepCommandFile" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <doc> |
| <![CDATA[Does the user want to keep the command file for debugging? If this is |
| true, pipes will write a copy of the command data to a file in the |
| task directory named "downlink.data", which may be used to run the C++ |
| program under the debugger. You probably also want to set |
| JobConf.setKeepFailedTaskFiles(true) to keep the entire directory from |
| being deleted. |
| To run using the data file, set the environment variable |
| "mapreduce.pipes.commandfile" to point to the file. |
| @param conf the configuration to check |
| @return will the framework save the command file?]]> |
| </doc> |
| </method> |
| <method name="setKeepCommandFile" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="keep" type="boolean"/> |
| <doc> |
| <![CDATA[Set whether to keep the command file for debugging |
| @param conf the configuration to modify |
| @param keep the new value]]> |
| </doc> |
| </method> |
| <method name="submitJob" return="org.apache.hadoop.mapred.RunningJob" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="Use {@link Submitter#runJob(JobConf)}"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Submit a job to the map/reduce cluster. All of the necessary modifications |
| to the job to run under pipes are made to the configuration. |
| @param conf the job to submit to the cluster (MODIFIED) |
| @throws IOException |
| @deprecated Use {@link Submitter#runJob(JobConf)}]]> |
| </doc> |
| </method> |
| <method name="runJob" return="org.apache.hadoop.mapred.RunningJob" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Submit a job to the map/reduce cluster. All of the necessary modifications |
| to the job to run under pipes are made to the configuration. |
| @param conf the job to submit to the cluster (MODIFIED) |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="jobSubmit" return="org.apache.hadoop.mapred.RunningJob" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Submit a job to the Map-Reduce framework. |
| This returns a handle to the {@link RunningJob} which can be used to track |
| the running-job. |
| |
| @param conf the job configuration. |
| @return a handle to the {@link RunningJob} which can be used to track the |
| running-job. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="run" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="args" type="java.lang.String[]"/> |
| <exception name="Exception" type="java.lang.Exception"/> |
| </method> |
| <method name="main" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="args" type="java.lang.String[]"/> |
| <exception name="Exception" type="java.lang.Exception"/> |
| <doc> |
| <![CDATA[Submit a pipes job based on the command line arguments. |
| @param args]]> |
| </doc> |
| </method> |
| <field name="LOG" type="org.slf4j.Logger" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <field name="PRESERVE_COMMANDFILE" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="EXECUTABLE" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="INTERPRETOR" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="IS_JAVA_MAP" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="IS_JAVA_RR" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="IS_JAVA_RW" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="IS_JAVA_REDUCE" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="PARTITIONER" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="INPUT_FORMAT" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="PORT" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[The main entry point and job submitter. It may either be used as a command |
| line-based or API-based method to launch Pipes jobs.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.pipes.Submitter --> |
| </package> |
| <package name="org.apache.hadoop.mapreduce"> |
| <!-- start class org.apache.hadoop.mapreduce.Cluster --> |
| <class name="Cluster" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="Cluster" type="org.apache.hadoop.conf.Configuration" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </constructor> |
| <constructor name="Cluster" type="java.net.InetSocketAddress, org.apache.hadoop.conf.Configuration" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </constructor> |
| <method name="close" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Close the <code>Cluster</code>. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getFileSystem" return="org.apache.hadoop.fs.FileSystem" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Get the file system where job-specific files are stored |
| |
| @return object of FileSystem |
| @throws IOException |
| @throws InterruptedException]]> |
| </doc> |
| </method> |
| <method name="getJob" return="org.apache.hadoop.mapreduce.Job" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobId" type="org.apache.hadoop.mapreduce.JobID"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Get job corresponding to jobid. |
| |
| @param jobId |
| @return object of {@link Job} |
| @throws IOException |
| @throws InterruptedException]]> |
| </doc> |
| </method> |
| <method name="getQueues" return="org.apache.hadoop.mapreduce.QueueInfo[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Get all the queues in cluster. |
| |
| @return array of {@link QueueInfo} |
| @throws IOException |
| @throws InterruptedException]]> |
| </doc> |
| </method> |
| <method name="getQueue" return="org.apache.hadoop.mapreduce.QueueInfo" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="name" type="java.lang.String"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Get queue information for the specified name. |
| |
| @param name queuename |
| @return object of {@link QueueInfo} |
| @throws IOException |
| @throws InterruptedException]]> |
| </doc> |
| </method> |
| <method name="getLogParams" return="org.apache.hadoop.mapreduce.v2.LogParams" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobID" type="org.apache.hadoop.mapreduce.JobID"/> |
| <param name="taskAttemptID" type="org.apache.hadoop.mapreduce.TaskAttemptID"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Get log parameters for the specified jobID or taskAttemptID |
| @param jobID the job id. |
| @param taskAttemptID the task attempt id. Optional. |
| @return the LogParams |
| @throws IOException |
| @throws InterruptedException]]> |
| </doc> |
| </method> |
| <method name="getClusterStatus" return="org.apache.hadoop.mapreduce.ClusterMetrics" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Get current cluster status. |
| |
| @return object of {@link ClusterMetrics} |
| @throws IOException |
| @throws InterruptedException]]> |
| </doc> |
| </method> |
| <method name="getActiveTaskTrackers" return="org.apache.hadoop.mapreduce.TaskTrackerInfo[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Get all active trackers in the cluster. |
| |
| @return array of {@link TaskTrackerInfo} |
| @throws IOException |
| @throws InterruptedException]]> |
| </doc> |
| </method> |
| <method name="getBlackListedTaskTrackers" return="org.apache.hadoop.mapreduce.TaskTrackerInfo[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Get blacklisted trackers. |
| |
| @return array of {@link TaskTrackerInfo} |
| @throws IOException |
| @throws InterruptedException]]> |
| </doc> |
| </method> |
| <method name="getAllJobs" return="org.apache.hadoop.mapreduce.Job[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="Use {@link #getAllJobStatuses()} instead."> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Get all the jobs in cluster. |
| |
| @return array of {@link Job} |
| @throws IOException |
| @throws InterruptedException |
| @deprecated Use {@link #getAllJobStatuses()} instead.]]> |
| </doc> |
| </method> |
| <method name="getAllJobStatuses" return="org.apache.hadoop.mapreduce.JobStatus[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Get job status for all jobs in the cluster. |
| @return job status for all jobs in cluster |
| @throws IOException |
| @throws InterruptedException]]> |
| </doc> |
| </method> |
| <method name="getSystemDir" return="org.apache.hadoop.fs.Path" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Grab the jobtracker system directory path where |
| job-specific files will be placed. |
| |
| @return the system directory where job-specific files are to be placed.]]> |
| </doc> |
| </method> |
| <method name="getStagingAreaDir" return="org.apache.hadoop.fs.Path" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Grab the jobtracker's view of the staging directory path where |
| job-specific files will be placed. |
| |
| @return the staging directory where job-specific files are to be placed.]]> |
| </doc> |
| </method> |
| <method name="getJobHistoryUrl" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobId" type="org.apache.hadoop.mapreduce.JobID"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Get the job history file path for a given job id. The job history file at |
| this path may or may not be existing depending on the job completion state. |
| The file is present only for the completed jobs. |
| @param jobId the JobID of the job submitted by the current user. |
| @return the file path of the job history file |
| @throws IOException |
| @throws InterruptedException]]> |
| </doc> |
| </method> |
| <method name="getQueueAclsForCurrentUser" return="org.apache.hadoop.mapreduce.QueueAclsInfo[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Gets the Queue ACLs for current user |
| @return array of QueueAclsInfo object for current user. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getRootQueues" return="org.apache.hadoop.mapreduce.QueueInfo[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Gets the root level queues. |
| @return array of JobQueueInfo object. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getChildQueues" return="org.apache.hadoop.mapreduce.QueueInfo[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="queueName" type="java.lang.String"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Returns immediate children of queueName. |
| @param queueName |
| @return array of JobQueueInfo which are children of queueName |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getJobTrackerStatus" return="org.apache.hadoop.mapreduce.Cluster.JobTrackerStatus" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Get the JobTracker's status. |
| |
| @return {@link JobTrackerStatus} of the JobTracker |
| @throws IOException |
| @throws InterruptedException]]> |
| </doc> |
| </method> |
| <method name="getTaskTrackerExpiryInterval" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Get the tasktracker expiry interval for the cluster |
| @return the expiry interval in msec]]> |
| </doc> |
| </method> |
| <method name="getDelegationToken" return="org.apache.hadoop.security.token.Token" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="renewer" type="org.apache.hadoop.io.Text"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Get a delegation token for the user from the JobTracker. |
| @param renewer the user who can renew the token |
| @return the new token |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="renewDelegationToken" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="Use {@link Token#renew} instead"> |
| <param name="token" type="org.apache.hadoop.security.token.Token"/> |
| <exception name="SecretManager.InvalidToken" type="org.apache.hadoop.security.token.SecretManager.InvalidToken"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Renew a delegation token |
| @param token the token to renew |
| @return the new expiration time |
| @throws InvalidToken |
| @throws IOException |
| @deprecated Use {@link Token#renew} instead]]> |
| </doc> |
| </method> |
| <method name="cancelDelegationToken" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="Use {@link Token#cancel} instead"> |
| <param name="token" type="org.apache.hadoop.security.token.Token"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Cancel a delegation token from the JobTracker |
| @param token the token to cancel |
| @throws IOException |
| @deprecated Use {@link Token#cancel} instead]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Provides a way to access information about the map/reduce cluster.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.Cluster --> |
| <!-- start class org.apache.hadoop.mapreduce.ClusterMetrics --> |
| <class name="ClusterMetrics" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.io.Writable"/> |
| <constructor name="ClusterMetrics" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <constructor name="ClusterMetrics" type="int, int, int, int, int, int, int, int, int, int, int, int" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <constructor name="ClusterMetrics" type="int, int, int, int, int, int, int, int, int, int, int, int, int" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getRunningMaps" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the number of running map tasks in the cluster. |
| |
| @return running maps]]> |
| </doc> |
| </method> |
| <method name="getRunningReduces" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the number of running reduce tasks in the cluster. |
| |
| @return running reduces]]> |
| </doc> |
| </method> |
| <method name="getOccupiedMapSlots" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get number of occupied map slots in the cluster. |
| |
| @return occupied map slot count]]> |
| </doc> |
| </method> |
| <method name="getOccupiedReduceSlots" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the number of occupied reduce slots in the cluster. |
| |
| @return occupied reduce slot count]]> |
| </doc> |
| </method> |
| <method name="getReservedMapSlots" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get number of reserved map slots in the cluster. |
| |
| @return reserved map slot count]]> |
| </doc> |
| </method> |
| <method name="getReservedReduceSlots" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the number of reserved reduce slots in the cluster. |
| |
| @return reserved reduce slot count]]> |
| </doc> |
| </method> |
| <method name="getMapSlotCapacity" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the total number of map slots in the cluster. |
| |
| @return map slot capacity]]> |
| </doc> |
| </method> |
| <method name="getReduceSlotCapacity" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the total number of reduce slots in the cluster. |
| |
| @return reduce slot capacity]]> |
| </doc> |
| </method> |
| <method name="getTotalJobSubmissions" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the total number of job submissions in the cluster. |
| |
| @return total number of job submissions]]> |
| </doc> |
| </method> |
| <method name="getTaskTrackerCount" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the number of active trackers in the cluster. |
| |
| @return active tracker count.]]> |
| </doc> |
| </method> |
| <method name="getBlackListedTaskTrackerCount" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the number of blacklisted trackers in the cluster. |
| |
| @return blacklisted tracker count]]> |
| </doc> |
| </method> |
| <method name="getGrayListedTaskTrackerCount" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the number of graylisted trackers in the cluster. |
| |
| @return graylisted tracker count]]> |
| </doc> |
| </method> |
| <method name="getDecommissionedTaskTrackerCount" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the number of decommissioned trackers in the cluster. |
| |
| @return decommissioned tracker count]]> |
| </doc> |
| </method> |
| <method name="readFields" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="in" type="java.io.DataInput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="write" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="out" type="java.io.DataOutput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[Status information on the current state of the Map-Reduce cluster. |
| |
| <p><code>ClusterMetrics</code> provides clients with information such as: |
| <ol> |
| <li> |
| Size of the cluster. |
| </li> |
| <li> |
| Number of blacklisted and decommissioned trackers. |
| </li> |
| <li> |
| Slot capacity of the cluster. |
| </li> |
| <li> |
| The number of currently occupied/reserved map and reduce slots. |
| </li> |
| <li> |
| The number of currently running map and reduce tasks. |
| </li> |
| <li> |
| The number of job submissions. |
| </li> |
| </ol> |
| |
| <p>Clients can query for the latest <code>ClusterMetrics</code>, via |
| {@link Cluster#getClusterStatus()}.</p> |
| |
| @see Cluster]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.ClusterMetrics --> |
| <!-- start interface org.apache.hadoop.mapreduce.Counter --> |
| <interface name="Counter" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.io.Writable"/> |
| <method name="setDisplayName" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="(and no-op by default)"> |
| <param name="displayName" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the display name of the counter |
| @param displayName of the counter |
| @deprecated (and no-op by default)]]> |
| </doc> |
| </method> |
| <method name="getName" return="java.lang.String" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the name of the counter]]> |
| </doc> |
| </method> |
| <method name="getDisplayName" return="java.lang.String" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the display name of the counter. |
| @return the user facing name of the counter]]> |
| </doc> |
| </method> |
| <method name="getValue" return="long" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[What is the current value of this counter? |
| @return the current value]]> |
| </doc> |
| </method> |
| <method name="setValue" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="value" type="long"/> |
| <doc> |
| <![CDATA[Set this counter by the given value |
| @param value the value to set]]> |
| </doc> |
| </method> |
| <method name="increment" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="incr" type="long"/> |
| <doc> |
| <![CDATA[Increment this counter by the given value |
| @param incr the value to increase this counter by]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[A named counter that tracks the progress of a map/reduce job. |
| |
| <p><code>Counters</code> represent global counters, defined either by the |
| Map-Reduce framework or applications. Each <code>Counter</code> is named by |
| an {@link Enum} and has a long for the value.</p> |
| |
| <p><code>Counters</code> are bunched into Groups, each comprising of |
| counters from a particular <code>Enum</code> class.]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapreduce.Counter --> |
| <!-- start interface org.apache.hadoop.mapreduce.CounterGroup --> |
| <interface name="CounterGroup" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapreduce.counters.CounterGroupBase"/> |
| <doc> |
| <![CDATA[A group of {@link Counter}s that logically belong together. Typically, |
| it is an {@link Enum} subclass and the counters are the values.]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapreduce.CounterGroup --> |
| <!-- start class org.apache.hadoop.mapreduce.Counters --> |
| <class name="Counters" extends="org.apache.hadoop.mapreduce.counters.AbstractCounters" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="Counters" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Default constructor]]> |
| </doc> |
| </constructor> |
| <constructor name="Counters" type="org.apache.hadoop.mapreduce.counters.AbstractCounters" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Construct the Counters object from the another counters object |
| @param <C> the type of counter |
| @param <G> the type of counter group |
| @param counters the old counters object]]> |
| </doc> |
| </constructor> |
| <doc> |
| <![CDATA[<p><code>Counters</code> holds per job/task counters, defined either by the |
| Map-Reduce framework or applications. Each <code>Counter</code> can be of |
| any {@link Enum} type.</p> |
| |
| <p><code>Counters</code> are bunched into {@link CounterGroup}s, each |
| comprising of counters from a particular <code>Enum</code> class.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.Counters --> |
| <!-- start class org.apache.hadoop.mapreduce.ID --> |
| <class name="ID" extends="java.lang.Object" |
| abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.io.WritableComparable"/> |
| <constructor name="ID" type="int" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[constructs an ID object from the given int]]> |
| </doc> |
| </constructor> |
| <constructor name="ID" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getId" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[returns the int which represents the identifier]]> |
| </doc> |
| </method> |
| <method name="toString" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="hashCode" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="equals" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="o" type="java.lang.Object"/> |
| </method> |
| <method name="compareTo" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="that" type="org.apache.hadoop.mapreduce.ID"/> |
| <doc> |
| <![CDATA[Compare IDs by associated numbers]]> |
| </doc> |
| </method> |
| <method name="readFields" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="in" type="java.io.DataInput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="write" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="out" type="java.io.DataOutput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <field name="SEPARATOR" type="char" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <field name="id" type="int" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[A general identifier, which internally stores the id |
| as an integer. This is the super class of {@link JobID}, |
| {@link TaskID} and {@link TaskAttemptID}. |
| |
| @see JobID |
| @see TaskID |
| @see TaskAttemptID]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.ID --> |
| <!-- start class org.apache.hadoop.mapreduce.InputFormat --> |
| <class name="InputFormat" extends="java.lang.Object" |
| abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="InputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getSplits" return="java.util.List" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Logically split the set of input files for the job. |
| |
| <p>Each {@link InputSplit} is then assigned to an individual {@link Mapper} |
| for processing.</p> |
| |
| <p><i>Note</i>: The split is a <i>logical</i> split of the inputs and the |
| input files are not physically split into chunks. For e.g. a split could |
| be <i><input-file-path, start, offset></i> tuple. The InputFormat |
| also creates the {@link RecordReader} to read the {@link InputSplit}. |
| |
| @param context job configuration. |
| @return an array of {@link InputSplit}s for the job.]]> |
| </doc> |
| </method> |
| <method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Create a record reader for a given split. The framework will call |
| {@link RecordReader#initialize(InputSplit, TaskAttemptContext)} before |
| the split is used. |
| @param split the split to be read |
| @param context the information about the task |
| @return a new record reader |
| @throws IOException |
| @throws InterruptedException]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[<code>InputFormat</code> describes the input-specification for a |
| Map-Reduce job. |
| |
| <p>The Map-Reduce framework relies on the <code>InputFormat</code> of the |
| job to:<p> |
| <ol> |
| <li> |
| Validate the input-specification of the job. |
| <li> |
| Split-up the input file(s) into logical {@link InputSplit}s, each of |
| which is then assigned to an individual {@link Mapper}. |
| </li> |
| <li> |
| Provide the {@link RecordReader} implementation to be used to glean |
| input records from the logical <code>InputSplit</code> for processing by |
| the {@link Mapper}. |
| </li> |
| </ol> |
| |
| <p>The default behavior of file-based {@link InputFormat}s, typically |
| sub-classes of {@link FileInputFormat}, is to split the |
| input into <i>logical</i> {@link InputSplit}s based on the total size, in |
| bytes, of the input files. However, the {@link FileSystem} blocksize of |
| the input files is treated as an upper bound for input splits. A lower bound |
| on the split size can be set via |
| <a href="{@docRoot}/../hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml#mapreduce.input.fileinputformat.split.minsize"> |
| mapreduce.input.fileinputformat.split.minsize</a>.</p> |
| |
| <p>Clearly, logical splits based on input-size is insufficient for many |
| applications since record boundaries are to respected. In such cases, the |
| application has to also implement a {@link RecordReader} on whom lies the |
| responsibility to respect record-boundaries and present a record-oriented |
| view of the logical <code>InputSplit</code> to the individual task. |
| |
| @see InputSplit |
| @see RecordReader |
| @see FileInputFormat]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.InputFormat --> |
| <!-- start class org.apache.hadoop.mapreduce.InputSplit --> |
| <class name="InputSplit" extends="java.lang.Object" |
| abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="InputSplit" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getLength" return="long" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Get the size of the split, so that the input splits can be sorted by size. |
| @return the number of bytes in the split |
| @throws IOException |
| @throws InterruptedException]]> |
| </doc> |
| </method> |
| <method name="getLocations" return="java.lang.String[]" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Get the list of nodes by name where the data for the split would be local. |
| The locations do not need to be serialized. |
| |
| @return a new array of the node nodes. |
| @throws IOException |
| @throws InterruptedException]]> |
| </doc> |
| </method> |
| <method name="getLocationInfo" return="org.apache.hadoop.mapred.SplitLocationInfo[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Gets info about which nodes the input split is stored on and how it is |
| stored at each location. |
| |
| @return list of <code>SplitLocationInfo</code>s describing how the split |
| data is stored at each location. A null value indicates that all the |
| locations have the data stored on disk. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[<code>InputSplit</code> represents the data to be processed by an |
| individual {@link Mapper}. |
| |
| <p>Typically, it presents a byte-oriented view on the input and is the |
| responsibility of {@link RecordReader} of the job to process this and present |
| a record-oriented view. |
| |
| @see InputFormat |
| @see RecordReader]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.InputSplit --> |
| <!-- start class org.apache.hadoop.mapreduce.Job --> |
| <class name="Job" extends="org.apache.hadoop.mapreduce.task.JobContextImpl" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapreduce.JobContext"/> |
| <implements name="java.lang.AutoCloseable"/> |
| <constructor name="Job" |
| static="false" final="false" visibility="public" |
| deprecated="Use {@link #getInstance()}"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[@deprecated Use {@link #getInstance()}]]> |
| </doc> |
| </constructor> |
| <constructor name="Job" type="org.apache.hadoop.conf.Configuration" |
| static="false" final="false" visibility="public" |
| deprecated="Use {@link #getInstance(Configuration)}"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[@deprecated Use {@link #getInstance(Configuration)}]]> |
| </doc> |
| </constructor> |
| <constructor name="Job" type="org.apache.hadoop.conf.Configuration, java.lang.String" |
| static="false" final="false" visibility="public" |
| deprecated="Use {@link #getInstance(Configuration, String)}"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[@deprecated Use {@link #getInstance(Configuration, String)}]]> |
| </doc> |
| </constructor> |
| <method name="getInstance" return="org.apache.hadoop.mapreduce.Job" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Creates a new {@link Job} with no particular {@link Cluster} . |
| A Cluster will be created with a generic {@link Configuration}. |
| |
| @return the {@link Job} , with no connection to a cluster yet. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getInstance" return="org.apache.hadoop.mapreduce.Job" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Creates a new {@link Job} with no particular {@link Cluster} and a |
| given {@link Configuration}. |
| |
| The <code>Job</code> makes a copy of the <code>Configuration</code> so |
| that any necessary internal modifications do not reflect on the incoming |
| parameter. |
| |
| A Cluster will be created from the conf parameter only when it's needed. |
| |
| @param conf the configuration |
| @return the {@link Job} , with no connection to a cluster yet. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getInstance" return="org.apache.hadoop.mapreduce.Job" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <param name="jobName" type="java.lang.String"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Creates a new {@link Job} with no particular {@link Cluster} and a given jobName. |
| A Cluster will be created from the conf parameter only when it's needed. |
| |
| The <code>Job</code> makes a copy of the <code>Configuration</code> so |
| that any necessary internal modifications do not reflect on the incoming |
| parameter. |
| |
| @param conf the configuration |
| @return the {@link Job} , with no connection to a cluster yet. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getInstance" return="org.apache.hadoop.mapreduce.Job" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="status" type="org.apache.hadoop.mapreduce.JobStatus"/> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Creates a new {@link Job} with no particular {@link Cluster} and given |
| {@link Configuration} and {@link JobStatus}. |
| A Cluster will be created from the conf parameter only when it's needed. |
| |
| The <code>Job</code> makes a copy of the <code>Configuration</code> so |
| that any necessary internal modifications do not reflect on the incoming |
| parameter. |
| |
| @param status job status |
| @param conf job configuration |
| @return the {@link Job} , with no connection to a cluster yet. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getInstance" return="org.apache.hadoop.mapreduce.Job" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="Use {@link #getInstance()}"> |
| <param name="ignored" type="org.apache.hadoop.mapreduce.Cluster"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Creates a new {@link Job} with no particular {@link Cluster}. |
| A Cluster will be created from the conf parameter only when it's needed. |
| |
| The <code>Job</code> makes a copy of the <code>Configuration</code> so |
| that any necessary internal modifications do not reflect on the incoming |
| parameter. |
| |
| @param ignored |
| @return the {@link Job} , with no connection to a cluster yet. |
| @throws IOException |
| @deprecated Use {@link #getInstance()}]]> |
| </doc> |
| </method> |
| <method name="getInstance" return="org.apache.hadoop.mapreduce.Job" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="Use {@link #getInstance(Configuration)}"> |
| <param name="ignored" type="org.apache.hadoop.mapreduce.Cluster"/> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Creates a new {@link Job} with no particular {@link Cluster} and given |
| {@link Configuration}. |
| A Cluster will be created from the conf parameter only when it's needed. |
| |
| The <code>Job</code> makes a copy of the <code>Configuration</code> so |
| that any necessary internal modifications do not reflect on the incoming |
| parameter. |
| |
| @param ignored |
| @param conf job configuration |
| @return the {@link Job} , with no connection to a cluster yet. |
| @throws IOException |
| @deprecated Use {@link #getInstance(Configuration)}]]> |
| </doc> |
| </method> |
| <method name="getStatus" return="org.apache.hadoop.mapreduce.JobStatus" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </method> |
| <method name="getJobState" return="org.apache.hadoop.mapreduce.JobStatus.State" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Returns the current state of the Job. |
| |
| @return JobStatus#State |
| @throws IOException |
| @throws InterruptedException]]> |
| </doc> |
| </method> |
| <method name="getTrackingURL" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the URL where some job progress information will be displayed. |
| |
| @return the URL where some job progress information will be displayed.]]> |
| </doc> |
| </method> |
| <method name="getJobFile" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the path of the submitted job configuration. |
| |
| @return the path of the submitted job configuration.]]> |
| </doc> |
| </method> |
| <method name="getStartTime" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get start time of the job. |
| |
| @return the start time of the job]]> |
| </doc> |
| </method> |
| <method name="getFinishTime" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Get finish time of the job. |
| |
| @return the finish time of the job]]> |
| </doc> |
| </method> |
| <method name="getSchedulingInfo" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get scheduling info of the job. |
| |
| @return the scheduling info of the job]]> |
| </doc> |
| </method> |
| <method name="getPriority" return="org.apache.hadoop.mapreduce.JobPriority" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Get scheduling info of the job. |
| |
| @return the priority info of the job]]> |
| </doc> |
| </method> |
| <method name="getJobName" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[The user-specified job name.]]> |
| </doc> |
| </method> |
| <method name="getHistoryUrl" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </method> |
| <method name="isRetired" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </method> |
| <method name="toString" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Dump stats to screen.]]> |
| </doc> |
| </method> |
| <method name="getTaskReports" return="org.apache.hadoop.mapreduce.TaskReport[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="type" type="org.apache.hadoop.mapreduce.TaskType"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Get the information of the current state of the tasks of a job. |
| |
| @param type Type of the task |
| @return the list of all of the map tips. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="mapProgress" return="float" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get the <i>progress</i> of the job's map-tasks, as a float between 0.0 |
| and 1.0. When all map tasks have completed, the function returns 1.0. |
| |
| @return the progress of the job's map-tasks. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="reduceProgress" return="float" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get the <i>progress</i> of the job's reduce-tasks, as a float between 0.0 |
| and 1.0. When all reduce tasks have completed, the function returns 1.0. |
| |
| @return the progress of the job's reduce-tasks. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="cleanupProgress" return="float" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Get the <i>progress</i> of the job's cleanup-tasks, as a float between 0.0 |
| and 1.0. When all cleanup tasks have completed, the function returns 1.0. |
| |
| @return the progress of the job's cleanup-tasks. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="setupProgress" return="float" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get the <i>progress</i> of the job's setup-tasks, as a float between 0.0 |
| and 1.0. When all setup tasks have completed, the function returns 1.0. |
| |
| @return the progress of the job's setup-tasks. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="isComplete" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Check if the job is finished or not. |
| This is a non-blocking call. |
| |
| @return <code>true</code> if the job is complete, else <code>false</code>. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="isSuccessful" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Check if the job completed successfully. |
| |
| @return <code>true</code> if the job succeeded, else <code>false</code>. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="killJob" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Kill the running job. Blocks until all job tasks have been |
| killed as well. If the job is no longer running, it simply returns. |
| |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="setPriority" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobPriority" type="org.apache.hadoop.mapreduce.JobPriority"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Set the priority of a running job. |
| @param jobPriority the new priority for the job. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="setPriorityAsInteger" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobPriority" type="int"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Set the priority of a running job. |
| |
| @param jobPriority |
| the new priority for the job. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getTaskCompletionEvents" return="org.apache.hadoop.mapreduce.TaskCompletionEvent[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="startFrom" type="int"/> |
| <param name="numEvents" type="int"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Get events indicating completion (success/failure) of component tasks. |
| |
| @param startFrom index to start fetching events from |
| @param numEvents number of events to fetch |
| @return an array of {@link TaskCompletionEvent}s |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getTaskCompletionEvents" return="org.apache.hadoop.mapred.TaskCompletionEvent[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="startFrom" type="int"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get events indicating completion (success/failure) of component tasks. |
| |
| @param startFrom index to start fetching events from |
| @return an array of {@link org.apache.hadoop.mapred.TaskCompletionEvent}s |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="killTask" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskId" type="org.apache.hadoop.mapreduce.TaskAttemptID"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Kill indicated task attempt. |
| |
| @param taskId the id of the task to be terminated. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="failTask" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskId" type="org.apache.hadoop.mapreduce.TaskAttemptID"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Fail indicated task attempt. |
| |
| @param taskId the id of the task to be terminated. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getCounters" return="org.apache.hadoop.mapreduce.Counters" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Gets the counters for this job. May return null if the job has been |
| retired and the job is no longer in the completed job store. |
| |
| @return the counters for this job. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getTaskDiagnostics" return="java.lang.String[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskid" type="org.apache.hadoop.mapreduce.TaskAttemptID"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Gets the diagnostic messages for a given task attempt. |
| @param taskid |
| @return the list of diagnostic messages for the task |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="setNumReduceTasks" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="tasks" type="int"/> |
| <exception name="IllegalStateException" type="java.lang.IllegalStateException"/> |
| <doc> |
| <![CDATA[Set the number of reduce tasks for the job. |
| @param tasks the number of reduce tasks |
| @throws IllegalStateException if the job is submitted]]> |
| </doc> |
| </method> |
| <method name="setWorkingDirectory" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="dir" type="org.apache.hadoop.fs.Path"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Set the current working directory for the default file system. |
| |
| @param dir the new current working directory. |
| @throws IllegalStateException if the job is submitted]]> |
| </doc> |
| </method> |
| <method name="setInputFormatClass" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="cls" type="java.lang.Class"/> |
| <exception name="IllegalStateException" type="java.lang.IllegalStateException"/> |
| <doc> |
| <![CDATA[Set the {@link InputFormat} for the job. |
| @param cls the <code>InputFormat</code> to use |
| @throws IllegalStateException if the job is submitted]]> |
| </doc> |
| </method> |
| <method name="setOutputFormatClass" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="cls" type="java.lang.Class"/> |
| <exception name="IllegalStateException" type="java.lang.IllegalStateException"/> |
| <doc> |
| <![CDATA[Set the {@link OutputFormat} for the job. |
| @param cls the <code>OutputFormat</code> to use |
| @throws IllegalStateException if the job is submitted]]> |
| </doc> |
| </method> |
| <method name="setMapperClass" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="cls" type="java.lang.Class"/> |
| <exception name="IllegalStateException" type="java.lang.IllegalStateException"/> |
| <doc> |
| <![CDATA[Set the {@link Mapper} for the job. |
| @param cls the <code>Mapper</code> to use |
| @throws IllegalStateException if the job is submitted]]> |
| </doc> |
| </method> |
| <method name="setJarByClass" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="cls" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Set the Jar by finding where a given class came from. |
| @param cls the example class]]> |
| </doc> |
| </method> |
| <method name="setJar" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jar" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the job jar]]> |
| </doc> |
| </method> |
| <method name="setUser" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="user" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the reported username for this job. |
| |
| @param user the username for this job.]]> |
| </doc> |
| </method> |
| <method name="setCombinerClass" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="cls" type="java.lang.Class"/> |
| <exception name="IllegalStateException" type="java.lang.IllegalStateException"/> |
| <doc> |
| <![CDATA[Set the combiner class for the job. |
| @param cls the combiner to use |
| @throws IllegalStateException if the job is submitted]]> |
| </doc> |
| </method> |
| <method name="setReducerClass" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="cls" type="java.lang.Class"/> |
| <exception name="IllegalStateException" type="java.lang.IllegalStateException"/> |
| <doc> |
| <![CDATA[Set the {@link Reducer} for the job. |
| @param cls the <code>Reducer</code> to use |
| @throws IllegalStateException if the job is submitted]]> |
| </doc> |
| </method> |
| <method name="setPartitionerClass" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="cls" type="java.lang.Class"/> |
| <exception name="IllegalStateException" type="java.lang.IllegalStateException"/> |
| <doc> |
| <![CDATA[Set the {@link Partitioner} for the job. |
| @param cls the <code>Partitioner</code> to use |
| @throws IllegalStateException if the job is submitted]]> |
| </doc> |
| </method> |
| <method name="setMapOutputKeyClass" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="theClass" type="java.lang.Class"/> |
| <exception name="IllegalStateException" type="java.lang.IllegalStateException"/> |
| <doc> |
| <![CDATA[Set the key class for the map output data. This allows the user to |
| specify the map output key class to be different than the final output |
| value class. |
| |
| @param theClass the map output key class. |
| @throws IllegalStateException if the job is submitted]]> |
| </doc> |
| </method> |
| <method name="setMapOutputValueClass" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="theClass" type="java.lang.Class"/> |
| <exception name="IllegalStateException" type="java.lang.IllegalStateException"/> |
| <doc> |
| <![CDATA[Set the value class for the map output data. This allows the user to |
| specify the map output value class to be different than the final output |
| value class. |
| |
| @param theClass the map output value class. |
| @throws IllegalStateException if the job is submitted]]> |
| </doc> |
| </method> |
| <method name="setOutputKeyClass" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="theClass" type="java.lang.Class"/> |
| <exception name="IllegalStateException" type="java.lang.IllegalStateException"/> |
| <doc> |
| <![CDATA[Set the key class for the job output data. |
| |
| @param theClass the key class for the job output data. |
| @throws IllegalStateException if the job is submitted]]> |
| </doc> |
| </method> |
| <method name="setOutputValueClass" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="theClass" type="java.lang.Class"/> |
| <exception name="IllegalStateException" type="java.lang.IllegalStateException"/> |
| <doc> |
| <![CDATA[Set the value class for job outputs. |
| |
| @param theClass the value class for job outputs. |
| @throws IllegalStateException if the job is submitted]]> |
| </doc> |
| </method> |
| <method name="setCombinerKeyGroupingComparatorClass" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="cls" type="java.lang.Class"/> |
| <exception name="IllegalStateException" type="java.lang.IllegalStateException"/> |
| <doc> |
| <![CDATA[Define the comparator that controls which keys are grouped together |
| for a single call to combiner, |
| {@link Reducer#reduce(Object, Iterable, |
| org.apache.hadoop.mapreduce.Reducer.Context)} |
| |
| @param cls the raw comparator to use |
| @throws IllegalStateException if the job is submitted]]> |
| </doc> |
| </method> |
| <method name="setSortComparatorClass" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="cls" type="java.lang.Class"/> |
| <exception name="IllegalStateException" type="java.lang.IllegalStateException"/> |
| <doc> |
| <![CDATA[Define the comparator that controls how the keys are sorted before they |
| are passed to the {@link Reducer}. |
| @param cls the raw comparator |
| @throws IllegalStateException if the job is submitted |
| @see #setCombinerKeyGroupingComparatorClass(Class)]]> |
| </doc> |
| </method> |
| <method name="setGroupingComparatorClass" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="cls" type="java.lang.Class"/> |
| <exception name="IllegalStateException" type="java.lang.IllegalStateException"/> |
| <doc> |
| <![CDATA[Define the comparator that controls which keys are grouped together |
| for a single call to |
| {@link Reducer#reduce(Object, Iterable, |
| org.apache.hadoop.mapreduce.Reducer.Context)} |
| @param cls the raw comparator to use |
| @throws IllegalStateException if the job is submitted |
| @see #setCombinerKeyGroupingComparatorClass(Class)]]> |
| </doc> |
| </method> |
| <method name="setJobName" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="name" type="java.lang.String"/> |
| <exception name="IllegalStateException" type="java.lang.IllegalStateException"/> |
| <doc> |
| <![CDATA[Set the user-specified job name. |
| |
| @param name the job's new name. |
| @throws IllegalStateException if the job is submitted]]> |
| </doc> |
| </method> |
| <method name="setSpeculativeExecution" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="speculativeExecution" type="boolean"/> |
| <doc> |
| <![CDATA[Turn speculative execution on or off for this job. |
| |
| @param speculativeExecution <code>true</code> if speculative execution |
| should be turned on, else <code>false</code>.]]> |
| </doc> |
| </method> |
| <method name="setMapSpeculativeExecution" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="speculativeExecution" type="boolean"/> |
| <doc> |
| <![CDATA[Turn speculative execution on or off for this job for map tasks. |
| |
| @param speculativeExecution <code>true</code> if speculative execution |
| should be turned on for map tasks, |
| else <code>false</code>.]]> |
| </doc> |
| </method> |
| <method name="setReduceSpeculativeExecution" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="speculativeExecution" type="boolean"/> |
| <doc> |
| <![CDATA[Turn speculative execution on or off for this job for reduce tasks. |
| |
| @param speculativeExecution <code>true</code> if speculative execution |
| should be turned on for reduce tasks, |
| else <code>false</code>.]]> |
| </doc> |
| </method> |
| <method name="setJobSetupCleanupNeeded" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="needed" type="boolean"/> |
| <doc> |
| <![CDATA[Specify whether job-setup and job-cleanup is needed for the job |
| |
| @param needed If <code>true</code>, job-setup and job-cleanup will be |
| considered from {@link OutputCommitter} |
| else ignored.]]> |
| </doc> |
| </method> |
| <method name="setCacheArchives" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="archives" type="java.net.URI[]"/> |
| <doc> |
| <![CDATA[Set the given set of archives |
| @param archives The list of archives that need to be localized]]> |
| </doc> |
| </method> |
| <method name="setCacheFiles" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="files" type="java.net.URI[]"/> |
| <doc> |
| <![CDATA[Set the given set of files |
| @param files The list of files that need to be localized]]> |
| </doc> |
| </method> |
| <method name="addCacheArchive" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="uri" type="java.net.URI"/> |
| <doc> |
| <![CDATA[Add a archives to be localized |
| @param uri The uri of the cache to be localized]]> |
| </doc> |
| </method> |
| <method name="addCacheFile" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="uri" type="java.net.URI"/> |
| <doc> |
| <![CDATA[Add a file to be localized |
| @param uri The uri of the cache to be localized]]> |
| </doc> |
| </method> |
| <method name="addFileToClassPath" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="file" type="org.apache.hadoop.fs.Path"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Add an file path to the current set of classpath entries It adds the file |
| to cache as well. |
| |
| Files added with this method will not be unpacked while being added to the |
| classpath. |
| To add archives to classpath, use the {@link #addArchiveToClassPath(Path)} |
| method instead. |
| |
| @param file Path of the file to be added]]> |
| </doc> |
| </method> |
| <method name="addArchiveToClassPath" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="archive" type="org.apache.hadoop.fs.Path"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Add an archive path to the current set of classpath entries. It adds the |
| archive to cache as well. |
| |
| Archive files will be unpacked and added to the classpath |
| when being distributed. |
| |
| @param archive Path of the archive to be added]]> |
| </doc> |
| </method> |
| <method name="createSymlink" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Originally intended to enable symlinks, but currently symlinks cannot be |
| disabled.]]> |
| </doc> |
| </method> |
| <method name="setMaxMapAttempts" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="n" type="int"/> |
| <doc> |
| <![CDATA[Expert: Set the number of maximum attempts that will be made to run a |
| map task. |
| |
| @param n the number of attempts per map task.]]> |
| </doc> |
| </method> |
| <method name="setMaxReduceAttempts" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="n" type="int"/> |
| <doc> |
| <![CDATA[Expert: Set the number of maximum attempts that will be made to run a |
| reduce task. |
| |
| @param n the number of attempts per reduce task.]]> |
| </doc> |
| </method> |
| <method name="setProfileEnabled" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="newValue" type="boolean"/> |
| <doc> |
| <![CDATA[Set whether the system should collect profiler information for some of |
| the tasks in this job? The information is stored in the user log |
| directory. |
| @param newValue true means it should be gathered]]> |
| </doc> |
| </method> |
| <method name="setProfileParams" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="value" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the profiler configuration arguments. If the string contains a '%s' it |
| will be replaced with the name of the profiling output file when the task |
| runs. |
| |
| This value is passed to the task child JVM on the command line. |
| |
| @param value the configuration string]]> |
| </doc> |
| </method> |
| <method name="setProfileTaskRange" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="isMap" type="boolean"/> |
| <param name="newValue" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the ranges of maps or reduces to profile. setProfileEnabled(true) |
| must also be called. |
| @param newValue a set of integer ranges of the map ids]]> |
| </doc> |
| </method> |
| <method name="setCancelDelegationTokenUponJobCompletion" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="value" type="boolean"/> |
| <doc> |
| <![CDATA[Sets the flag that will allow the JobTracker to cancel the HDFS delegation |
| tokens upon job completion. Defaults to true.]]> |
| </doc> |
| </method> |
| <method name="addFileToSharedCache" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="resource" type="java.net.URI"/> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <doc> |
| <![CDATA[Add a file to job config for shared cache processing. If shared cache is |
| enabled, it will return true, otherwise, return false. We don't check with |
| SCM here given application might not be able to provide the job id; |
| ClientSCMProtocol.use requires the application id. Job Submitter will read |
| the files from job config and take care of things. |
| |
| @param resource The resource that Job Submitter will process later using |
| shared cache. |
| @param conf Configuration to add the resource to |
| @return whether the resource has been added to the configuration]]> |
| </doc> |
| </method> |
| <method name="addFileToSharedCacheAndClasspath" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="resource" type="java.net.URI"/> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <doc> |
| <![CDATA[Add a file to job config for shared cache processing. If shared cache is |
| enabled, it will return true, otherwise, return false. We don't check with |
| SCM here given application might not be able to provide the job id; |
| ClientSCMProtocol.use requires the application id. Job Submitter will read |
| the files from job config and take care of things. Job Submitter will also |
| add the file to classpath. Intended to be used by user code. |
| |
| @param resource The resource that Job Submitter will process later using |
| shared cache. |
| @param conf Configuration to add the resource to |
| @return whether the resource has been added to the configuration]]> |
| </doc> |
| </method> |
| <method name="addArchiveToSharedCache" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="resource" type="java.net.URI"/> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <doc> |
| <![CDATA[Add an archive to job config for shared cache processing. If shared cache |
| is enabled, it will return true, otherwise, return false. We don't check |
| with SCM here given application might not be able to provide the job id; |
| ClientSCMProtocol.use requires the application id. Job Submitter will read |
| the files from job config and take care of things. Intended to be used by |
| user code. |
| |
| @param resource The resource that Job Submitter will process later using |
| shared cache. |
| @param conf Configuration to add the resource to |
| @return whether the resource has been added to the configuration]]> |
| </doc> |
| </method> |
| <method name="setFileSharedCacheUploadPolicies" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <param name="policies" type="java.util.Map"/> |
| <doc> |
| <![CDATA[This is to set the shared cache upload policies for files. If the parameter |
| was previously set, this method will replace the old value with the new |
| provided map. |
| |
| @param conf Configuration which stores the shared cache upload policies |
| @param policies A map containing the shared cache upload policies for a set |
| of resources. The key is the url of the resource and the value is |
| the upload policy. True if it should be uploaded, false otherwise.]]> |
| </doc> |
| </method> |
| <method name="setArchiveSharedCacheUploadPolicies" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <param name="policies" type="java.util.Map"/> |
| <doc> |
| <![CDATA[This is to set the shared cache upload policies for archives. If the |
| parameter was previously set, this method will replace the old value with |
| the new provided map. |
| |
| @param conf Configuration which stores the shared cache upload policies |
| @param policies A map containing the shared cache upload policies for a set |
| of resources. The key is the url of the resource and the value is |
| the upload policy. True if it should be uploaded, false otherwise.]]> |
| </doc> |
| </method> |
| <method name="getFileSharedCacheUploadPolicies" return="java.util.Map" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <doc> |
| <![CDATA[This is to get the shared cache upload policies for files. |
| |
| @param conf Configuration which stores the shared cache upload policies |
| @return A map containing the shared cache upload policies for a set of |
| resources. The key is the url of the resource and the value is the |
| upload policy. True if it should be uploaded, false otherwise.]]> |
| </doc> |
| </method> |
| <method name="getArchiveSharedCacheUploadPolicies" return="java.util.Map" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <doc> |
| <![CDATA[This is to get the shared cache upload policies for archives. |
| |
| @param conf Configuration which stores the shared cache upload policies |
| @return A map containing the shared cache upload policies for a set of |
| resources. The key is the url of the resource and the value is the |
| upload policy. True if it should be uploaded, false otherwise.]]> |
| </doc> |
| </method> |
| <method name="submit" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/> |
| <doc> |
| <![CDATA[Submit the job to the cluster and return immediately. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="waitForCompletion" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="verbose" type="boolean"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/> |
| <doc> |
| <![CDATA[Submit the job to the cluster and wait for it to finish. |
| @param verbose print the progress to the user |
| @return true if the job succeeded |
| @throws IOException thrown if the communication with the |
| <code>JobTracker</code> is lost]]> |
| </doc> |
| </method> |
| <method name="monitorAndPrintJob" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Monitor a job and print status in real-time as progress is made and tasks |
| fail. |
| @return true if the job succeeded |
| @throws IOException if communication to the JobTracker fails]]> |
| </doc> |
| </method> |
| <method name="getProgressPollInterval" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <doc> |
| <![CDATA[The interval at which monitorAndPrintJob() prints status]]> |
| </doc> |
| </method> |
| <method name="getCompletionPollInterval" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <doc> |
| <![CDATA[The interval at which waitForCompletion() should check.]]> |
| </doc> |
| </method> |
| <method name="getTaskOutputFilter" return="org.apache.hadoop.mapreduce.Job.TaskStatusFilter" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <doc> |
| <![CDATA[Get the task output filter. |
| |
| @param conf the configuration. |
| @return the filter level.]]> |
| </doc> |
| </method> |
| <method name="setTaskOutputFilter" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <param name="newValue" type="org.apache.hadoop.mapreduce.Job.TaskStatusFilter"/> |
| <doc> |
| <![CDATA[Modify the Configuration to set the task output filter. |
| |
| @param conf the Configuration to modify. |
| @param newValue the value to set.]]> |
| </doc> |
| </method> |
| <method name="isUber" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </method> |
| <method name="getReservationId" return="org.apache.hadoop.yarn.api.records.ReservationId" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the reservation to which the job is submitted to, if any |
| |
| @return the reservationId the identifier of the job's reservation, null if |
| the job does not have any reservation associated with it]]> |
| </doc> |
| </method> |
| <method name="setReservationId" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="reservationId" type="org.apache.hadoop.yarn.api.records.ReservationId"/> |
| <doc> |
| <![CDATA[Set the reservation to which the job is submitted to |
| |
| @param reservationId the reservationId to set]]> |
| </doc> |
| </method> |
| <method name="close" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Close the <code>Job</code>. |
| @throws IOException if fail to close.]]> |
| </doc> |
| </method> |
| <field name="OUTPUT_FILTER" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="COMPLETION_POLL_INTERVAL_KEY" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Key in mapred-*.xml that sets completionPollInvervalMillis]]> |
| </doc> |
| </field> |
| <field name="PROGRESS_MONITOR_POLL_INTERVAL_KEY" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Key in mapred-*.xml that sets progMonitorPollIntervalMillis]]> |
| </doc> |
| </field> |
| <field name="USED_GENERIC_PARSER" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="SUBMIT_REPLICATION" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="DEFAULT_SUBMIT_REPLICATION" type="int" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="USE_WILDCARD_FOR_LIBJARS" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="DEFAULT_USE_WILDCARD_FOR_LIBJARS" type="boolean" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[The job submitter's view of the Job. |
| |
| <p>It allows the user to configure the |
| job, submit it, control its execution, and query the state. The set methods |
| only work until the job is submitted, afterwards they will throw an |
| IllegalStateException. </p> |
| |
| <p> |
| Normally the user creates the application, describes various facets of the |
| job via {@link Job} and then submits the job and monitor its progress.</p> |
| |
| <p>Here is an example on how to submit a job:</p> |
| <p><blockquote><pre> |
| // Create a new Job |
| Job job = Job.getInstance(); |
| job.setJarByClass(MyJob.class); |
| |
| // Specify various job-specific parameters |
| job.setJobName("myjob"); |
| |
| job.setInputPath(new Path("in")); |
| job.setOutputPath(new Path("out")); |
| |
| job.setMapperClass(MyJob.MyMapper.class); |
| job.setReducerClass(MyJob.MyReducer.class); |
| |
| // Submit the job, then poll for progress until the job is complete |
| job.waitForCompletion(true); |
| </pre></blockquote>]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.Job --> |
| <!-- start interface org.apache.hadoop.mapreduce.JobContext --> |
| <interface name="JobContext" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapreduce.MRJobConfig"/> |
| <method name="getConfiguration" return="org.apache.hadoop.conf.Configuration" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Return the configuration for the job. |
| @return the shared configuration object]]> |
| </doc> |
| </method> |
| <method name="getCredentials" return="org.apache.hadoop.security.Credentials" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get credentials for the job. |
| @return credentials for the job]]> |
| </doc> |
| </method> |
| <method name="getJobID" return="org.apache.hadoop.mapreduce.JobID" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the unique ID for the job. |
| @return the object with the job id]]> |
| </doc> |
| </method> |
| <method name="getNumReduceTasks" return="int" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get configured the number of reduce tasks for this job. Defaults to |
| <code>1</code>. |
| @return the number of reduce tasks for this job.]]> |
| </doc> |
| </method> |
| <method name="getWorkingDirectory" return="org.apache.hadoop.fs.Path" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get the current working directory for the default file system. |
| |
| @return the directory name.]]> |
| </doc> |
| </method> |
| <method name="getOutputKeyClass" return="java.lang.Class" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the key class for the job output data. |
| @return the key class for the job output data.]]> |
| </doc> |
| </method> |
| <method name="getOutputValueClass" return="java.lang.Class" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the value class for job outputs. |
| @return the value class for job outputs.]]> |
| </doc> |
| </method> |
| <method name="getMapOutputKeyClass" return="java.lang.Class" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the key class for the map output data. If it is not set, use the |
| (final) output key class. This allows the map output key class to be |
| different than the final output key class. |
| @return the map output key class.]]> |
| </doc> |
| </method> |
| <method name="getMapOutputValueClass" return="java.lang.Class" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the value class for the map output data. If it is not set, use the |
| (final) output value class This allows the map output value class to be |
| different than the final output value class. |
| |
| @return the map output value class.]]> |
| </doc> |
| </method> |
| <method name="getJobName" return="java.lang.String" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the user-specified job name. This is only used to identify the |
| job to the user. |
| |
| @return the job's name, defaulting to "".]]> |
| </doc> |
| </method> |
| <method name="getInputFormatClass" return="java.lang.Class" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/> |
| <doc> |
| <![CDATA[Get the {@link InputFormat} class for the job. |
| |
| @return the {@link InputFormat} class for the job.]]> |
| </doc> |
| </method> |
| <method name="getMapperClass" return="java.lang.Class" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/> |
| <doc> |
| <![CDATA[Get the {@link Mapper} class for the job. |
| |
| @return the {@link Mapper} class for the job.]]> |
| </doc> |
| </method> |
| <method name="getCombinerClass" return="java.lang.Class" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/> |
| <doc> |
| <![CDATA[Get the combiner class for the job. |
| |
| @return the combiner class for the job.]]> |
| </doc> |
| </method> |
| <method name="getReducerClass" return="java.lang.Class" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/> |
| <doc> |
| <![CDATA[Get the {@link Reducer} class for the job. |
| |
| @return the {@link Reducer} class for the job.]]> |
| </doc> |
| </method> |
| <method name="getOutputFormatClass" return="java.lang.Class" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/> |
| <doc> |
| <![CDATA[Get the {@link OutputFormat} class for the job. |
| |
| @return the {@link OutputFormat} class for the job.]]> |
| </doc> |
| </method> |
| <method name="getPartitionerClass" return="java.lang.Class" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/> |
| <doc> |
| <![CDATA[Get the {@link Partitioner} class for the job. |
| |
| @return the {@link Partitioner} class for the job.]]> |
| </doc> |
| </method> |
| <method name="getSortComparator" return="org.apache.hadoop.io.RawComparator" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the {@link RawComparator} comparator used to compare keys. |
| |
| @return the {@link RawComparator} comparator used to compare keys.]]> |
| </doc> |
| </method> |
| <method name="getJar" return="java.lang.String" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the pathname of the job's jar. |
| @return the pathname]]> |
| </doc> |
| </method> |
| <method name="getCombinerKeyGroupingComparator" return="org.apache.hadoop.io.RawComparator" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the user defined {@link RawComparator} comparator for |
| grouping keys of inputs to the combiner. |
| |
| @return comparator set by the user for grouping values. |
| @see Job#setCombinerKeyGroupingComparatorClass(Class)]]> |
| </doc> |
| </method> |
| <method name="getGroupingComparator" return="org.apache.hadoop.io.RawComparator" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the user defined {@link RawComparator} comparator for |
| grouping keys of inputs to the reduce. |
| |
| @return comparator set by the user for grouping values. |
| @see Job#setGroupingComparatorClass(Class) |
| @see #getCombinerKeyGroupingComparator()]]> |
| </doc> |
| </method> |
| <method name="getJobSetupCleanupNeeded" return="boolean" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get whether job-setup and job-cleanup is needed for the job |
| |
| @return boolean]]> |
| </doc> |
| </method> |
| <method name="getTaskCleanupNeeded" return="boolean" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get whether task-cleanup is needed for the job |
| |
| @return boolean]]> |
| </doc> |
| </method> |
| <method name="getProfileEnabled" return="boolean" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get whether the task profiling is enabled. |
| @return true if some tasks will be profiled]]> |
| </doc> |
| </method> |
| <method name="getProfileParams" return="java.lang.String" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the profiler configuration arguments. |
| |
| The default value for this property is |
| "-agentlib:hprof=cpu=samples,heap=sites,force=n,thread=y,verbose=n,file=%s" |
| |
| @return the parameters to pass to the task child to configure profiling]]> |
| </doc> |
| </method> |
| <method name="getProfileTaskRange" return="org.apache.hadoop.conf.Configuration.IntegerRanges" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="isMap" type="boolean"/> |
| <doc> |
| <![CDATA[Get the range of maps or reduces to profile. |
| @param isMap is the task a map? |
| @return the task ranges]]> |
| </doc> |
| </method> |
| <method name="getUser" return="java.lang.String" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the reported username for this job. |
| |
| @return the username]]> |
| </doc> |
| </method> |
| <method name="getSymlink" return="boolean" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Originally intended to check if symlinks should be used, but currently |
| symlinks cannot be disabled. |
| @return true]]> |
| </doc> |
| </method> |
| <method name="getArchiveClassPaths" return="org.apache.hadoop.fs.Path[]" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the archive entries in classpath as an array of Path]]> |
| </doc> |
| </method> |
| <method name="getCacheArchives" return="java.net.URI[]" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get cache archives set in the Configuration |
| @return A URI array of the caches set in the Configuration |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getCacheFiles" return="java.net.URI[]" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get cache files set in the Configuration |
| @return A URI array of the files set in the Configuration |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getLocalCacheArchives" return="org.apache.hadoop.fs.Path[]" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="the array returned only includes the items the were |
| downloaded. There is no way to map this to what is returned by |
| {@link #getCacheArchives()}."> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Return the path array of the localized caches |
| @return A path array of localized caches |
| @throws IOException |
| @deprecated the array returned only includes the items the were |
| downloaded. There is no way to map this to what is returned by |
| {@link #getCacheArchives()}.]]> |
| </doc> |
| </method> |
| <method name="getLocalCacheFiles" return="org.apache.hadoop.fs.Path[]" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="the array returned only includes the items the were |
| downloaded. There is no way to map this to what is returned by |
| {@link #getCacheFiles()}."> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Return the path array of the localized files |
| @return A path array of localized files |
| @throws IOException |
| @deprecated the array returned only includes the items the were |
| downloaded. There is no way to map this to what is returned by |
| {@link #getCacheFiles()}.]]> |
| </doc> |
| </method> |
| <method name="getFileClassPaths" return="org.apache.hadoop.fs.Path[]" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the file entries in classpath as an array of Path]]> |
| </doc> |
| </method> |
| <method name="getArchiveTimestamps" return="java.lang.String[]" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the timestamps of the archives. Used by internal |
| DistributedCache and MapReduce code. |
| @return a string array of timestamps]]> |
| </doc> |
| </method> |
| <method name="getFileTimestamps" return="java.lang.String[]" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the timestamps of the files. Used by internal |
| DistributedCache and MapReduce code. |
| @return a string array of timestamps]]> |
| </doc> |
| </method> |
| <method name="getMaxMapAttempts" return="int" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the configured number of maximum attempts that will be made to run a |
| map task, as specified by the <code>mapred.map.max.attempts</code> |
| property. If this property is not already set, the default is 4 attempts. |
| |
| @return the max number of attempts per map task.]]> |
| </doc> |
| </method> |
| <method name="getMaxReduceAttempts" return="int" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the configured number of maximum attempts that will be made to run a |
| reduce task, as specified by the <code>mapred.reduce.max.attempts</code> |
| property. If this property is not already set, the default is 4 attempts. |
| |
| @return the max number of attempts per reduce task.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[A read-only view of the job that is provided to the tasks while they |
| are running.]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapreduce.JobContext --> |
| <!-- start class org.apache.hadoop.mapreduce.JobCounter --> |
| <class name="JobCounter" extends="java.lang.Enum" |
| abstract="false" |
| static="false" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <method name="values" return="org.apache.hadoop.mapreduce.JobCounter[]" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="valueOf" return="org.apache.hadoop.mapreduce.JobCounter" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="name" type="java.lang.String"/> |
| </method> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.JobCounter --> |
| <!-- start class org.apache.hadoop.mapreduce.JobID --> |
| <class name="JobID" extends="org.apache.hadoop.mapred.ID" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="java.lang.Comparable"/> |
| <constructor name="JobID" type="java.lang.String, int" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Constructs a JobID object |
| @param jtIdentifier jobTracker identifier |
| @param id job number]]> |
| </doc> |
| </constructor> |
| <constructor name="JobID" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getJtIdentifier" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="equals" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="o" type="java.lang.Object"/> |
| </method> |
| <method name="compareTo" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="o" type="org.apache.hadoop.mapreduce.ID"/> |
| <doc> |
| <![CDATA[Compare JobIds by first jtIdentifiers, then by job numbers]]> |
| </doc> |
| </method> |
| <method name="appendTo" return="java.lang.StringBuilder" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="builder" type="java.lang.StringBuilder"/> |
| <doc> |
| <![CDATA[Add the stuff after the "job" prefix to the given builder. This is useful, |
| because the sub-ids use this substring at the start of their string. |
| @param builder the builder to append to |
| @return the builder that was passed in]]> |
| </doc> |
| </method> |
| <method name="hashCode" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="toString" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="readFields" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="in" type="java.io.DataInput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="write" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="out" type="java.io.DataOutput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="forName" return="org.apache.hadoop.mapreduce.JobID" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="str" type="java.lang.String"/> |
| <exception name="IllegalArgumentException" type="java.lang.IllegalArgumentException"/> |
| <doc> |
| <![CDATA[Construct a JobId object from given string |
| @return constructed JobId object or null if the given String is null |
| @throws IllegalArgumentException if the given string is malformed]]> |
| </doc> |
| </method> |
| <field name="JOB" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="JOBID_REGEX" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="idFormat" type="java.text.NumberFormat" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[JobID represents the immutable and unique identifier for |
| the job. JobID consists of two parts. First part |
| represents the jobtracker identifier, so that jobID to jobtracker map |
| is defined. For cluster setup this string is the jobtracker |
| start time, for local setting, it is "local" and a random number. |
| Second part of the JobID is the job number. <br> |
| An example JobID is : |
| <code>job_200707121733_0003</code> , which represents the third job |
| running at the jobtracker started at <code>200707121733</code>. |
| <p> |
| Applications should never construct or parse JobID strings, but rather |
| use appropriate constructors or {@link #forName(String)} method. |
| |
| @see TaskID |
| @see TaskAttemptID]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.JobID --> |
| <!-- start class org.apache.hadoop.mapreduce.JobPriority --> |
| <class name="JobPriority" extends="java.lang.Enum" |
| abstract="false" |
| static="false" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <method name="values" return="org.apache.hadoop.mapreduce.JobPriority[]" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="valueOf" return="org.apache.hadoop.mapreduce.JobPriority" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="name" type="java.lang.String"/> |
| </method> |
| <doc> |
| <![CDATA[Used to describe the priority of the running job. |
| DEFAULT : While submitting a job, if the user is not specifying priority, |
| YARN has the capability to pick the default priority as per its config. |
| Hence MapReduce can indicate such cases with this new enum. |
| UNDEFINED_PRIORITY : YARN supports priority as an integer. Hence other than |
| the five defined enums, YARN can consider other integers also. To generalize |
| such cases, this specific enum is used.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.JobPriority --> |
| <!-- start class org.apache.hadoop.mapreduce.JobStatus --> |
| <class name="JobStatus" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.io.Writable"/> |
| <implements name="java.lang.Cloneable"/> |
| <constructor name="JobStatus" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <constructor name="JobStatus" type="org.apache.hadoop.mapreduce.JobID, float, float, float, float, org.apache.hadoop.mapreduce.JobStatus.State, org.apache.hadoop.mapreduce.JobPriority, java.lang.String, java.lang.String, java.lang.String, java.lang.String" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Create a job status object for a given jobid. |
| @param jobid The jobid of the job |
| @param setupProgress The progress made on the setup |
| @param mapProgress The progress made on the maps |
| @param reduceProgress The progress made on the reduces |
| @param cleanupProgress The progress made on the cleanup |
| @param runState The current state of the job |
| @param jp Priority of the job. |
| @param user userid of the person who submitted the job. |
| @param jobName user-specified job name. |
| @param jobFile job configuration file. |
| @param trackingUrl link to the web-ui for details of the job.]]> |
| </doc> |
| </constructor> |
| <constructor name="JobStatus" type="org.apache.hadoop.mapreduce.JobID, float, float, float, float, org.apache.hadoop.mapreduce.JobStatus.State, org.apache.hadoop.mapreduce.JobPriority, java.lang.String, java.lang.String, java.lang.String, java.lang.String, java.lang.String" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Create a job status object for a given jobid. |
| @param jobid The jobid of the job |
| @param setupProgress The progress made on the setup |
| @param mapProgress The progress made on the maps |
| @param reduceProgress The progress made on the reduces |
| @param cleanupProgress The progress made on the cleanup |
| @param runState The current state of the job |
| @param jp Priority of the job. |
| @param user userid of the person who submitted the job. |
| @param jobName user-specified job name. |
| @param queue queue name |
| @param jobFile job configuration file. |
| @param trackingUrl link to the web-ui for details of the job.]]> |
| </doc> |
| </constructor> |
| <constructor name="JobStatus" type="org.apache.hadoop.mapreduce.JobID, float, float, float, float, org.apache.hadoop.mapreduce.JobStatus.State, org.apache.hadoop.mapreduce.JobPriority, java.lang.String, java.lang.String, java.lang.String, java.lang.String, java.lang.String, boolean" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Create a job status object for a given jobid. |
| @param jobid The jobid of the job |
| @param setupProgress The progress made on the setup |
| @param mapProgress The progress made on the maps |
| @param reduceProgress The progress made on the reduces |
| @param cleanupProgress The progress made on the cleanup |
| @param runState The current state of the job |
| @param jp Priority of the job. |
| @param user userid of the person who submitted the job. |
| @param jobName user-specified job name. |
| @param queue queue name |
| @param jobFile job configuration file. |
| @param trackingUrl link to the web-ui for details of the job. |
| @param isUber Whether job running in uber mode]]> |
| </doc> |
| </constructor> |
| <constructor name="JobStatus" type="org.apache.hadoop.mapreduce.JobID, float, float, float, float, org.apache.hadoop.mapreduce.JobStatus.State, org.apache.hadoop.mapreduce.JobPriority, java.lang.String, java.lang.String, java.lang.String, java.lang.String, java.lang.String, boolean, java.lang.String" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Create a job status object for a given jobid. |
| @param jobid The jobid of the job |
| @param setupProgress The progress made on the setup |
| @param mapProgress The progress made on the maps |
| @param reduceProgress The progress made on the reduces |
| @param cleanupProgress The progress made on the cleanup |
| @param runState The current state of the job |
| @param jp Priority of the job. |
| @param user userid of the person who submitted the job. |
| @param jobName user-specified job name. |
| @param queue queue name |
| @param jobFile job configuration file. |
| @param trackingUrl link to the web-ui for details of the job. |
| @param isUber Whether job running in uber mode |
| @param historyFile history file]]> |
| </doc> |
| </constructor> |
| <method name="setMapProgress" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="p" type="float"/> |
| <doc> |
| <![CDATA[Sets the map progress of this job |
| @param p The value of map progress to set to]]> |
| </doc> |
| </method> |
| <method name="setCleanupProgress" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="p" type="float"/> |
| <doc> |
| <![CDATA[Sets the cleanup progress of this job |
| @param p The value of cleanup progress to set to]]> |
| </doc> |
| </method> |
| <method name="setSetupProgress" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="p" type="float"/> |
| <doc> |
| <![CDATA[Sets the setup progress of this job |
| @param p The value of setup progress to set to]]> |
| </doc> |
| </method> |
| <method name="setReduceProgress" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="p" type="float"/> |
| <doc> |
| <![CDATA[Sets the reduce progress of this Job |
| @param p The value of reduce progress to set to]]> |
| </doc> |
| </method> |
| <method name="setPriority" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="jp" type="org.apache.hadoop.mapreduce.JobPriority"/> |
| <doc> |
| <![CDATA[Set the priority of the job, defaulting to NORMAL. |
| @param jp new job priority]]> |
| </doc> |
| </method> |
| <method name="setFinishTime" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="finishTime" type="long"/> |
| <doc> |
| <![CDATA[Set the finish time of the job |
| @param finishTime The finishTime of the job]]> |
| </doc> |
| </method> |
| <method name="setHistoryFile" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="historyFile" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the job history file url for a completed job]]> |
| </doc> |
| </method> |
| <method name="setTrackingUrl" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="trackingUrl" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the link to the web-ui for details of the job.]]> |
| </doc> |
| </method> |
| <method name="setRetired" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Set the job retire flag to true.]]> |
| </doc> |
| </method> |
| <method name="setState" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="state" type="org.apache.hadoop.mapreduce.JobStatus.State"/> |
| <doc> |
| <![CDATA[Change the current run state of the job.]]> |
| </doc> |
| </method> |
| <method name="setStartTime" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="startTime" type="long"/> |
| <doc> |
| <![CDATA[Set the start time of the job |
| @param startTime The startTime of the job]]> |
| </doc> |
| </method> |
| <method name="setUsername" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="userName" type="java.lang.String"/> |
| <doc> |
| <![CDATA[@param userName The username of the job]]> |
| </doc> |
| </method> |
| <method name="setSchedulingInfo" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="schedulingInfo" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Used to set the scheduling information associated to a particular Job. |
| |
| @param schedulingInfo Scheduling information of the job]]> |
| </doc> |
| </method> |
| <method name="setJobACLs" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="acls" type="java.util.Map"/> |
| <doc> |
| <![CDATA[Set the job acls. |
| |
| @param acls {@link Map} from {@link JobACL} to {@link AccessControlList}]]> |
| </doc> |
| </method> |
| <method name="setQueue" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="queue" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set queue name |
| @param queue queue name]]> |
| </doc> |
| </method> |
| <method name="setFailureInfo" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="failureInfo" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set diagnostic information. |
| @param failureInfo diagnostic information]]> |
| </doc> |
| </method> |
| <method name="getQueue" return="java.lang.String" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get queue name |
| @return queue name]]> |
| </doc> |
| </method> |
| <method name="getMapProgress" return="float" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return Percentage of progress in maps]]> |
| </doc> |
| </method> |
| <method name="getCleanupProgress" return="float" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return Percentage of progress in cleanup]]> |
| </doc> |
| </method> |
| <method name="getSetupProgress" return="float" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return Percentage of progress in setup]]> |
| </doc> |
| </method> |
| <method name="getReduceProgress" return="float" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return Percentage of progress in reduce]]> |
| </doc> |
| </method> |
| <method name="getState" return="org.apache.hadoop.mapreduce.JobStatus.State" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return running state of the job]]> |
| </doc> |
| </method> |
| <method name="getStartTime" return="long" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return start time of the job]]> |
| </doc> |
| </method> |
| <method name="clone" return="java.lang.Object" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getJobID" return="org.apache.hadoop.mapreduce.JobID" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return The jobid of the Job]]> |
| </doc> |
| </method> |
| <method name="getUsername" return="java.lang.String" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the username of the job]]> |
| </doc> |
| </method> |
| <method name="getSchedulingInfo" return="java.lang.String" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Gets the Scheduling information associated to a particular Job. |
| @return the scheduling information of the job]]> |
| </doc> |
| </method> |
| <method name="getJobACLs" return="java.util.Map" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the job acls. |
| |
| @return a {@link Map} from {@link JobACL} to {@link AccessControlList}]]> |
| </doc> |
| </method> |
| <method name="getPriority" return="org.apache.hadoop.mapreduce.JobPriority" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Return the priority of the job |
| @return job priority]]> |
| </doc> |
| </method> |
| <method name="getFailureInfo" return="java.lang.String" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Gets any available info on the reason of failure of the job. |
| @return diagnostic information on why a job might have failed.]]> |
| </doc> |
| </method> |
| <method name="isJobComplete" return="boolean" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns true if the status is for a completed job.]]> |
| </doc> |
| </method> |
| <method name="write" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="out" type="java.io.DataOutput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="readFields" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="in" type="java.io.DataInput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getJobName" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the user-specified job name.]]> |
| </doc> |
| </method> |
| <method name="getJobFile" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the configuration file for the job.]]> |
| </doc> |
| </method> |
| <method name="getTrackingUrl" return="java.lang.String" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the link to the web-ui for details of the job.]]> |
| </doc> |
| </method> |
| <method name="getFinishTime" return="long" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the finish time of the job.]]> |
| </doc> |
| </method> |
| <method name="isRetired" return="boolean" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Check whether the job has retired.]]> |
| </doc> |
| </method> |
| <method name="getHistoryFile" return="java.lang.String" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the job history file name for a completed job. If job is not |
| completed or history file not available then return null.]]> |
| </doc> |
| </method> |
| <method name="getNumUsedSlots" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return number of used mapred slots]]> |
| </doc> |
| </method> |
| <method name="setNumUsedSlots" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="n" type="int"/> |
| <doc> |
| <![CDATA[@param n number of used mapred slots]]> |
| </doc> |
| </method> |
| <method name="getNumReservedSlots" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the number of reserved slots]]> |
| </doc> |
| </method> |
| <method name="setNumReservedSlots" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="n" type="int"/> |
| <doc> |
| <![CDATA[@param n the number of reserved slots]]> |
| </doc> |
| </method> |
| <method name="getUsedMem" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the used memory]]> |
| </doc> |
| </method> |
| <method name="setUsedMem" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="m" type="int"/> |
| <doc> |
| <![CDATA[@param m the used memory]]> |
| </doc> |
| </method> |
| <method name="getReservedMem" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the reserved memory]]> |
| </doc> |
| </method> |
| <method name="setReservedMem" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="r" type="int"/> |
| <doc> |
| <![CDATA[@param r the reserved memory]]> |
| </doc> |
| </method> |
| <method name="getNeededMem" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the needed memory]]> |
| </doc> |
| </method> |
| <method name="setNeededMem" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="n" type="int"/> |
| <doc> |
| <![CDATA[@param n the needed memory]]> |
| </doc> |
| </method> |
| <method name="isUber" return="boolean" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Whether job running in uber mode |
| @return job in uber-mode]]> |
| </doc> |
| </method> |
| <method name="setUber" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="isUber" type="boolean"/> |
| <doc> |
| <![CDATA[Set uber-mode flag |
| @param isUber Whether job running in uber-mode]]> |
| </doc> |
| </method> |
| <method name="toString" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <doc> |
| <![CDATA[Describes the current status of a job.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.JobStatus --> |
| <!-- start interface org.apache.hadoop.mapreduce.MapContext --> |
| <interface name="MapContext" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapreduce.TaskInputOutputContext"/> |
| <method name="getInputSplit" return="org.apache.hadoop.mapreduce.InputSplit" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the input split for this map.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[The context that is given to the {@link Mapper}. |
| @param <KEYIN> the key input type to the Mapper |
| @param <VALUEIN> the value input type to the Mapper |
| @param <KEYOUT> the key output type from the Mapper |
| @param <VALUEOUT> the value output type from the Mapper]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapreduce.MapContext --> |
| <!-- start class org.apache.hadoop.mapreduce.Mapper --> |
| <class name="Mapper" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="Mapper" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="setup" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Called once at the beginning of the task.]]> |
| </doc> |
| </method> |
| <method name="map" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="key" type="KEYIN"/> |
| <param name="value" type="VALUEIN"/> |
| <param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Called once for each key/value pair in the input split. Most applications |
| should override this, but the default is the identity function.]]> |
| </doc> |
| </method> |
| <method name="cleanup" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Called once at the end of the task.]]> |
| </doc> |
| </method> |
| <method name="run" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Expert users can override this method for more complete control over the |
| execution of the Mapper. |
| @param context |
| @throws IOException]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Maps input key/value pairs to a set of intermediate key/value pairs. |
| |
| <p>Maps are the individual tasks which transform input records into a |
| intermediate records. The transformed intermediate records need not be of |
| the same type as the input records. A given input pair may map to zero or |
| many output pairs.</p> |
| |
| <p>The Hadoop Map-Reduce framework spawns one map task for each |
| {@link InputSplit} generated by the {@link InputFormat} for the job. |
| <code>Mapper</code> implementations can access the {@link Configuration} for |
| the job via the {@link JobContext#getConfiguration()}. |
| |
| <p>The framework first calls |
| {@link #setup(org.apache.hadoop.mapreduce.Mapper.Context)}, followed by |
| {@link #map(Object, Object, org.apache.hadoop.mapreduce.Mapper.Context)} |
| for each key/value pair in the <code>InputSplit</code>. Finally |
| {@link #cleanup(org.apache.hadoop.mapreduce.Mapper.Context)} is called.</p> |
| |
| <p>All intermediate values associated with a given output key are |
| subsequently grouped by the framework, and passed to a {@link Reducer} to |
| determine the final output. Users can control the sorting and grouping by |
| specifying two key {@link RawComparator} classes.</p> |
| |
| <p>The <code>Mapper</code> outputs are partitioned per |
| <code>Reducer</code>. Users can control which keys (and hence records) go to |
| which <code>Reducer</code> by implementing a custom {@link Partitioner}. |
| |
| <p>Users can optionally specify a <code>combiner</code>, via |
| {@link Job#setCombinerClass(Class)}, to perform local aggregation of the |
| intermediate outputs, which helps to cut down the amount of data transferred |
| from the <code>Mapper</code> to the <code>Reducer</code>. |
| |
| <p>Applications can specify if and how the intermediate |
| outputs are to be compressed and which {@link CompressionCodec}s are to be |
| used via the <code>Configuration</code>.</p> |
| |
| <p>If the job has zero |
| reduces then the output of the <code>Mapper</code> is directly written |
| to the {@link OutputFormat} without sorting by keys.</p> |
| |
| <p>Example:</p> |
| <p><blockquote><pre> |
| public class TokenCounterMapper |
| extends Mapper<Object, Text, Text, IntWritable>{ |
| |
| private final static IntWritable one = new IntWritable(1); |
| private Text word = new Text(); |
| |
| public void map(Object key, Text value, Context context) throws IOException, InterruptedException { |
| StringTokenizer itr = new StringTokenizer(value.toString()); |
| while (itr.hasMoreTokens()) { |
| word.set(itr.nextToken()); |
| context.write(word, one); |
| } |
| } |
| } |
| </pre></blockquote> |
| |
| <p>Applications may override the |
| {@link #run(org.apache.hadoop.mapreduce.Mapper.Context)} method to exert |
| greater control on map processing e.g. multi-threaded <code>Mapper</code>s |
| etc.</p> |
| |
| @see InputFormat |
| @see JobContext |
| @see Partitioner |
| @see Reducer]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.Mapper --> |
| <!-- start class org.apache.hadoop.mapreduce.MarkableIterator --> |
| <class name="MarkableIterator" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapreduce.MarkableIteratorInterface"/> |
| <constructor name="MarkableIterator" type="java.util.Iterator" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Create a new iterator layered on the input iterator |
| @param itr underlying iterator that implements MarkableIteratorInterface]]> |
| </doc> |
| </constructor> |
| <method name="mark" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="reset" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="clearMark" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="hasNext" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="next" return="VALUE" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="remove" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <doc> |
| <![CDATA[<code>MarkableIterator</code> is a wrapper iterator class that |
| implements the {@link MarkableIteratorInterface}.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.MarkableIterator --> |
| <!-- start class org.apache.hadoop.mapreduce.OutputCommitter --> |
| <class name="OutputCommitter" extends="java.lang.Object" |
| abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="OutputCommitter" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="setupJob" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobContext" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[For the framework to setup the job output during initialization. This is |
| called from the application master process for the entire job. This will be |
| called multiple times, once per job attempt. |
| |
| @param jobContext Context of the job whose output is being written. |
| @throws IOException if temporary output could not be created]]> |
| </doc> |
| </method> |
| <method name="cleanupJob" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="Use {@link #commitJob(JobContext)} and |
| {@link #abortJob(JobContext, JobStatus.State)} instead."> |
| <param name="jobContext" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[For cleaning up the job's output after job completion. This is called |
| from the application master process for the entire job. This may be called |
| multiple times. |
| |
| @param jobContext Context of the job whose output is being written. |
| @throws IOException |
| @deprecated Use {@link #commitJob(JobContext)} and |
| {@link #abortJob(JobContext, JobStatus.State)} instead.]]> |
| </doc> |
| </method> |
| <method name="commitJob" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobContext" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[For committing job's output after successful job completion. Note that this |
| is invoked for jobs with final runstate as SUCCESSFUL. This is called |
| from the application master process for the entire job. This is guaranteed |
| to only be called once. If it throws an exception the entire job will |
| fail. |
| |
| @param jobContext Context of the job whose output is being written. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="abortJob" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobContext" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <param name="state" type="org.apache.hadoop.mapreduce.JobStatus.State"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[For aborting an unsuccessful job's output. Note that this is invoked for |
| jobs with final runstate as {@link JobStatus.State#FAILED} or |
| {@link JobStatus.State#KILLED}. This is called from the application |
| master process for the entire job. This may be called multiple times. |
| |
| @param jobContext Context of the job whose output is being written. |
| @param state final runstate of the job |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="setupTask" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Sets up output for the task. This is called from each individual task's |
| process that will output to HDFS, and it is called just for that task. This |
| may be called multiple times for the same task, but for different task |
| attempts. |
| |
| @param taskContext Context of the task whose output is being written. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="needsTaskCommit" return="boolean" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Check whether task needs a commit. This is called from each individual |
| task's process that will output to HDFS, and it is called just for that |
| task. |
| |
| @param taskContext |
| @return true/false |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="commitTask" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[To promote the task's temporary output to final output location. |
| If {@link #needsTaskCommit(TaskAttemptContext)} returns true and this |
| task is the task that the AM determines finished first, this method |
| is called to commit an individual task's output. This is to mark |
| that tasks output as complete, as {@link #commitJob(JobContext)} will |
| also be called later on if the entire job finished successfully. This |
| is called from a task's process. This may be called multiple times for the |
| same task, but different task attempts. It should be very rare for this to |
| be called multiple times and requires odd networking failures to make this |
| happen. In the future the Hadoop framework may eliminate this race. |
| |
| @param taskContext Context of the task whose output is being written. |
| @throws IOException if commit is not successful.]]> |
| </doc> |
| </method> |
| <method name="abortTask" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Discard the task output. This is called from a task's process to clean |
| up a single task's output that can not yet been committed. This may be |
| called multiple times for the same task, but for different task attempts. |
| |
| @param taskContext |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="isRecoverySupported" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="Use {@link #isRecoverySupported(JobContext)} instead."> |
| <doc> |
| <![CDATA[Is task output recovery supported for restarting jobs? |
| |
| If task output recovery is supported, job restart can be done more |
| efficiently. |
| |
| @return <code>true</code> if task output recovery is supported, |
| <code>false</code> otherwise |
| @see #recoverTask(TaskAttemptContext) |
| @deprecated Use {@link #isRecoverySupported(JobContext)} instead.]]> |
| </doc> |
| </method> |
| <method name="isCommitJobRepeatable" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobContext" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Returns true if an in-progress job commit can be retried. If the MR AM is |
| re-run then it will check this value to determine if it can retry an |
| in-progress commit that was started by a previous version. |
| Note that in rare scenarios, the previous AM version might still be running |
| at that time, due to system anomalies. Hence if this method returns true |
| then the retry commit operation should be able to run concurrently with |
| the previous operation. |
| |
| If repeatable job commit is supported, job restart can tolerate previous |
| AM failures during job commit. |
| |
| By default, it is not supported. Extended classes (like: |
| FileOutputCommitter) should explicitly override it if provide support. |
| |
| @param jobContext |
| Context of the job whose output is being written. |
| @return <code>true</code> repeatable job commit is supported, |
| <code>false</code> otherwise |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="isRecoverySupported" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobContext" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Is task output recovery supported for restarting jobs? |
| |
| If task output recovery is supported, job restart can be done more |
| efficiently. |
| |
| @param jobContext |
| Context of the job whose output is being written. |
| @return <code>true</code> if task output recovery is supported, |
| <code>false</code> otherwise |
| @throws IOException |
| @see #recoverTask(TaskAttemptContext)]]> |
| </doc> |
| </method> |
| <method name="recoverTask" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Recover the task output. |
| |
| The retry-count for the job will be passed via the |
| {@link MRJobConfig#APPLICATION_ATTEMPT_ID} key in |
| {@link TaskAttemptContext#getConfiguration()} for the |
| <code>OutputCommitter</code>. This is called from the application master |
| process, but it is called individually for each task. |
| |
| If an exception is thrown the task will be attempted again. |
| |
| This may be called multiple times for the same task. But from different |
| application attempts. |
| |
| @param taskContext Context of the task whose output is being recovered |
| @throws IOException]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[<code>OutputCommitter</code> describes the commit of task output for a |
| Map-Reduce job. |
| |
| <p>The Map-Reduce framework relies on the <code>OutputCommitter</code> of |
| the job to:<p> |
| <ol> |
| <li> |
| Setup the job during initialization. For example, create the temporary |
| output directory for the job during the initialization of the job. |
| </li> |
| <li> |
| Cleanup the job after the job completion. For example, remove the |
| temporary output directory after the job completion. |
| </li> |
| <li> |
| Setup the task temporary output. |
| </li> |
| <li> |
| Check whether a task needs a commit. This is to avoid the commit |
| procedure if a task does not need commit. |
| </li> |
| <li> |
| Commit of the task output. |
| </li> |
| <li> |
| Discard the task commit. |
| </li> |
| </ol> |
| The methods in this class can be called from several different processes and |
| from several different contexts. It is important to know which process and |
| which context each is called from. Each method should be marked accordingly |
| in its documentation. It is also important to note that not all methods are |
| guaranteed to be called once and only once. If a method is not guaranteed to |
| have this property the output committer needs to handle this appropriately. |
| Also note it will only be in rare situations where they may be called |
| multiple times for the same task. |
| |
| @see org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter |
| @see JobContext |
| @see TaskAttemptContext]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.OutputCommitter --> |
| <!-- start class org.apache.hadoop.mapreduce.OutputFormat --> |
| <class name="OutputFormat" extends="java.lang.Object" |
| abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="OutputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getRecordWriter" return="org.apache.hadoop.mapreduce.RecordWriter" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Get the {@link RecordWriter} for the given task. |
| |
| @param context the information about the current task. |
| @return a {@link RecordWriter} to write the output for the job. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="checkOutputSpecs" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Check for validity of the output-specification for the job. |
| |
| <p>This is to validate the output specification for the job when it is |
| a job is submitted. Typically checks that it does not already exist, |
| throwing an exception when it already exists, so that output is not |
| overwritten.</p> |
| |
| @param context information about the job |
| @throws IOException when output should not be attempted]]> |
| </doc> |
| </method> |
| <method name="getOutputCommitter" return="org.apache.hadoop.mapreduce.OutputCommitter" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Get the output committer for this output format. This is responsible |
| for ensuring the output is committed correctly. |
| @param context the task context |
| @return an output committer |
| @throws IOException |
| @throws InterruptedException]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[<code>OutputFormat</code> describes the output-specification for a |
| Map-Reduce job. |
| |
| <p>The Map-Reduce framework relies on the <code>OutputFormat</code> of the |
| job to:<p> |
| <ol> |
| <li> |
| Validate the output-specification of the job. For e.g. check that the |
| output directory doesn't already exist. |
| <li> |
| Provide the {@link RecordWriter} implementation to be used to write out |
| the output files of the job. Output files are stored in a |
| {@link FileSystem}. |
| </li> |
| </ol> |
| |
| @see RecordWriter]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.OutputFormat --> |
| <!-- start class org.apache.hadoop.mapreduce.Partitioner --> |
| <class name="Partitioner" extends="java.lang.Object" |
| abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="Partitioner" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getPartition" return="int" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="KEY"/> |
| <param name="value" type="VALUE"/> |
| <param name="numPartitions" type="int"/> |
| <doc> |
| <![CDATA[Get the partition number for a given key (hence record) given the total |
| number of partitions i.e. number of reduce-tasks for the job. |
| |
| <p>Typically a hash function on a all or a subset of the key.</p> |
| |
| @param key the key to be partioned. |
| @param value the entry value. |
| @param numPartitions the total number of partitions. |
| @return the partition number for the <code>key</code>.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Partitions the key space. |
| |
| <p><code>Partitioner</code> controls the partitioning of the keys of the |
| intermediate map-outputs. The key (or a subset of the key) is used to derive |
| the partition, typically by a hash function. The total number of partitions |
| is the same as the number of reduce tasks for the job. Hence this controls |
| which of the <code>m</code> reduce tasks the intermediate key (and hence the |
| record) is sent for reduction.</p> |
| |
| <p>Note: A <code>Partitioner</code> is created only when there are multiple |
| reducers.</p> |
| |
| <p>Note: If you require your Partitioner class to obtain the Job's |
| configuration object, implement the {@link Configurable} interface.</p> |
| |
| @see Reducer]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.Partitioner --> |
| <!-- start class org.apache.hadoop.mapreduce.QueueAclsInfo --> |
| <class name="QueueAclsInfo" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.io.Writable"/> |
| <constructor name="QueueAclsInfo" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Default constructor for QueueAclsInfo.]]> |
| </doc> |
| </constructor> |
| <constructor name="QueueAclsInfo" type="java.lang.String, java.lang.String[]" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Construct a new QueueAclsInfo object using the queue name and the |
| queue operations array |
| |
| @param queueName Name of the job queue |
| @param operations]]> |
| </doc> |
| </constructor> |
| <method name="getQueueName" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get queue name. |
| |
| @return name]]> |
| </doc> |
| </method> |
| <method name="setQueueName" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="queueName" type="java.lang.String"/> |
| </method> |
| <method name="getOperations" return="java.lang.String[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get opearations allowed on queue. |
| |
| @return array of String]]> |
| </doc> |
| </method> |
| <method name="readFields" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="in" type="java.io.DataInput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="write" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="out" type="java.io.DataOutput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[Class to encapsulate Queue ACLs for a particular |
| user.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.QueueAclsInfo --> |
| <!-- start class org.apache.hadoop.mapreduce.QueueInfo --> |
| <class name="QueueInfo" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.io.Writable"/> |
| <constructor name="QueueInfo" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Default constructor for QueueInfo.]]> |
| </doc> |
| </constructor> |
| <constructor name="QueueInfo" type="java.lang.String, java.lang.String" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Construct a new QueueInfo object using the queue name and the |
| scheduling information passed. |
| |
| @param queueName Name of the job queue |
| @param schedulingInfo Scheduling Information associated with the job |
| queue]]> |
| </doc> |
| </constructor> |
| <constructor name="QueueInfo" type="java.lang.String, java.lang.String, org.apache.hadoop.mapreduce.QueueState, org.apache.hadoop.mapreduce.JobStatus[]" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@param queueName |
| @param schedulingInfo |
| @param state |
| @param stats]]> |
| </doc> |
| </constructor> |
| <method name="setQueueName" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="queueName" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the queue name of the JobQueueInfo |
| |
| @param queueName Name of the job queue.]]> |
| </doc> |
| </method> |
| <method name="getQueueName" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the queue name from JobQueueInfo |
| |
| @return queue name]]> |
| </doc> |
| </method> |
| <method name="setSchedulingInfo" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="schedulingInfo" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the scheduling information associated to particular job queue |
| |
| @param schedulingInfo]]> |
| </doc> |
| </method> |
| <method name="getSchedulingInfo" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Gets the scheduling information associated to particular job queue. |
| If nothing is set would return <b>"N/A"</b> |
| |
| @return Scheduling information associated to particular Job Queue]]> |
| </doc> |
| </method> |
| <method name="setState" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="state" type="org.apache.hadoop.mapreduce.QueueState"/> |
| <doc> |
| <![CDATA[Set the state of the queue |
| @param state state of the queue.]]> |
| </doc> |
| </method> |
| <method name="getState" return="org.apache.hadoop.mapreduce.QueueState" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Return the queue state |
| @return the queue state.]]> |
| </doc> |
| </method> |
| <method name="setJobStatuses" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="stats" type="org.apache.hadoop.mapreduce.JobStatus[]"/> |
| </method> |
| <method name="getQueueChildren" return="java.util.List" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get immediate children. |
| |
| @return list of QueueInfo]]> |
| </doc> |
| </method> |
| <method name="setQueueChildren" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="children" type="java.util.List"/> |
| </method> |
| <method name="getProperties" return="java.util.Properties" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get properties. |
| |
| @return Properties]]> |
| </doc> |
| </method> |
| <method name="setProperties" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="props" type="java.util.Properties"/> |
| </method> |
| <method name="getJobStatuses" return="org.apache.hadoop.mapreduce.JobStatus[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the jobs submitted to queue |
| @return list of JobStatus for the submitted jobs]]> |
| </doc> |
| </method> |
| <method name="readFields" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="in" type="java.io.DataInput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="write" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="out" type="java.io.DataOutput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[Class that contains the information regarding the Job Queues which are |
| maintained by the Hadoop Map/Reduce framework.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.QueueInfo --> |
| <!-- start class org.apache.hadoop.mapreduce.QueueState --> |
| <class name="QueueState" extends="java.lang.Enum" |
| abstract="false" |
| static="false" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <method name="values" return="org.apache.hadoop.mapreduce.QueueState[]" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="valueOf" return="org.apache.hadoop.mapreduce.QueueState" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="name" type="java.lang.String"/> |
| </method> |
| <method name="getStateName" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the stateName]]> |
| </doc> |
| </method> |
| <method name="getState" return="org.apache.hadoop.mapreduce.QueueState" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="state" type="java.lang.String"/> |
| </method> |
| <method name="toString" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <doc> |
| <![CDATA[Enum representing queue state]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.QueueState --> |
| <!-- start class org.apache.hadoop.mapreduce.RecordReader --> |
| <class name="RecordReader" extends="java.lang.Object" |
| abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="java.io.Closeable"/> |
| <constructor name="RecordReader" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="initialize" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Called once at initialization. |
| @param split the split that defines the range of records to read |
| @param context the information about the task |
| @throws IOException |
| @throws InterruptedException]]> |
| </doc> |
| </method> |
| <method name="nextKeyValue" return="boolean" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Read the next key, value pair. |
| @return true if a key/value pair was read |
| @throws IOException |
| @throws InterruptedException]]> |
| </doc> |
| </method> |
| <method name="getCurrentKey" return="KEYIN" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Get the current key |
| @return the current key or null if there is no current key |
| @throws IOException |
| @throws InterruptedException]]> |
| </doc> |
| </method> |
| <method name="getCurrentValue" return="VALUEIN" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Get the current value. |
| @return the object that was read |
| @throws IOException |
| @throws InterruptedException]]> |
| </doc> |
| </method> |
| <method name="getProgress" return="float" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[The current progress of the record reader through its data. |
| @return a number between 0.0 and 1.0 that is the fraction of the data read |
| @throws IOException |
| @throws InterruptedException]]> |
| </doc> |
| </method> |
| <method name="close" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Close the record reader.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[The record reader breaks the data into key/value pairs for input to the |
| {@link Mapper}. |
| @param <KEYIN> |
| @param <VALUEIN>]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.RecordReader --> |
| <!-- start class org.apache.hadoop.mapreduce.RecordWriter --> |
| <class name="RecordWriter" extends="java.lang.Object" |
| abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="RecordWriter" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="write" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="K"/> |
| <param name="value" type="V"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Writes a key/value pair. |
| |
| @param key the key to write. |
| @param value the value to write. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="close" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Close this <code>RecordWriter</code> to future operations. |
| |
| @param context the context of the task |
| @throws IOException]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[<code>RecordWriter</code> writes the output <key, value> pairs |
| to an output file. |
| |
| <p><code>RecordWriter</code> implementations write the job outputs to the |
| {@link FileSystem}. |
| |
| @see OutputFormat]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.RecordWriter --> |
| <!-- start interface org.apache.hadoop.mapreduce.ReduceContext --> |
| <interface name="ReduceContext" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapreduce.TaskInputOutputContext"/> |
| <method name="nextKey" return="boolean" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Start processing next unique key.]]> |
| </doc> |
| </method> |
| <method name="getValues" return="java.lang.Iterable" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Iterate through the values for the current key, reusing the same value |
| object, which is stored in the context. |
| @return the series of values associated with the current key. All of the |
| objects returned directly and indirectly from this method are reused.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[The context passed to the {@link Reducer}. |
| @param <KEYIN> the class of the input keys |
| @param <VALUEIN> the class of the input values |
| @param <KEYOUT> the class of the output keys |
| @param <VALUEOUT> the class of the output values]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapreduce.ReduceContext --> |
| <!-- start class org.apache.hadoop.mapreduce.Reducer --> |
| <class name="Reducer" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="Reducer" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="setup" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Called once at the start of the task.]]> |
| </doc> |
| </method> |
| <method name="reduce" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="key" type="KEYIN"/> |
| <param name="values" type="java.lang.Iterable"/> |
| <param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[This method is called once for each key. Most applications will define |
| their reduce class by overriding this method. The default implementation |
| is an identity function.]]> |
| </doc> |
| </method> |
| <method name="cleanup" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Called once at the end of the task.]]> |
| </doc> |
| </method> |
| <method name="run" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Advanced application writers can use the |
| {@link #run(org.apache.hadoop.mapreduce.Reducer.Context)} method to |
| control how the reduce task works.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Reduces a set of intermediate values which share a key to a smaller set of |
| values. |
| |
| <p><code>Reducer</code> implementations |
| can access the {@link Configuration} for the job via the |
| {@link JobContext#getConfiguration()} method.</p> |
| |
| <p><code>Reducer</code> has 3 primary phases:</p> |
| <ol> |
| <li> |
| |
| <b id="Shuffle">Shuffle</b> |
| |
| <p>The <code>Reducer</code> copies the sorted output from each |
| {@link Mapper} using HTTP across the network.</p> |
| </li> |
| |
| <li> |
| <b id="Sort">Sort</b> |
| |
| <p>The framework merge sorts <code>Reducer</code> inputs by |
| <code>key</code>s |
| (since different <code>Mapper</code>s may have output the same key).</p> |
| |
| <p>The shuffle and sort phases occur simultaneously i.e. while outputs are |
| being fetched they are merged.</p> |
| |
| <b id="SecondarySort">SecondarySort</b> |
| |
| <p>To achieve a secondary sort on the values returned by the value |
| iterator, the application should extend the key with the secondary |
| key and define a grouping comparator. The keys will be sorted using the |
| entire key, but will be grouped using the grouping comparator to decide |
| which keys and values are sent in the same call to reduce.The grouping |
| comparator is specified via |
| {@link Job#setGroupingComparatorClass(Class)}. The sort order is |
| controlled by |
| {@link Job#setSortComparatorClass(Class)}.</p> |
| |
| |
| For example, say that you want to find duplicate web pages and tag them |
| all with the url of the "best" known example. You would set up the job |
| like: |
| <ul> |
| <li>Map Input Key: url</li> |
| <li>Map Input Value: document</li> |
| <li>Map Output Key: document checksum, url pagerank</li> |
| <li>Map Output Value: url</li> |
| <li>Partitioner: by checksum</li> |
| <li>OutputKeyComparator: by checksum and then decreasing pagerank</li> |
| <li>OutputValueGroupingComparator: by checksum</li> |
| </ul> |
| </li> |
| |
| <li> |
| <b id="Reduce">Reduce</b> |
| |
| <p>In this phase the |
| {@link #reduce(Object, Iterable, org.apache.hadoop.mapreduce.Reducer.Context)} |
| method is called for each <code><key, (collection of values)></code> in |
| the sorted inputs.</p> |
| <p>The output of the reduce task is typically written to a |
| {@link RecordWriter} via |
| {@link Context#write(Object, Object)}.</p> |
| </li> |
| </ol> |
| |
| <p>The output of the <code>Reducer</code> is <b>not re-sorted</b>.</p> |
| |
| <p>Example:</p> |
| <p><blockquote><pre> |
| public class IntSumReducer<Key> extends Reducer<Key,IntWritable, |
| Key,IntWritable> { |
| private IntWritable result = new IntWritable(); |
| |
| public void reduce(Key key, Iterable<IntWritable> values, |
| Context context) throws IOException, InterruptedException { |
| int sum = 0; |
| for (IntWritable val : values) { |
| sum += val.get(); |
| } |
| result.set(sum); |
| context.write(key, result); |
| } |
| } |
| </pre></blockquote> |
| |
| @see Mapper |
| @see Partitioner]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.Reducer --> |
| <!-- start interface org.apache.hadoop.mapreduce.TaskAttemptContext --> |
| <interface name="TaskAttemptContext" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapreduce.JobContext"/> |
| <implements name="org.apache.hadoop.util.Progressable"/> |
| <method name="getTaskAttemptID" return="org.apache.hadoop.mapreduce.TaskAttemptID" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the unique name for this task attempt.]]> |
| </doc> |
| </method> |
| <method name="setStatus" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="msg" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the current status of the task to the given string.]]> |
| </doc> |
| </method> |
| <method name="getStatus" return="java.lang.String" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the last set status message. |
| @return the current status message]]> |
| </doc> |
| </method> |
| <method name="getProgress" return="float" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[The current progress of the task attempt. |
| @return a number between 0.0 and 1.0 (inclusive) indicating the attempt's |
| progress.]]> |
| </doc> |
| </method> |
| <method name="getCounter" return="org.apache.hadoop.mapreduce.Counter" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="counterName" type="java.lang.Enum"/> |
| <doc> |
| <![CDATA[Get the {@link Counter} for the given <code>counterName</code>. |
| @param counterName counter name |
| @return the <code>Counter</code> for the given <code>counterName</code>]]> |
| </doc> |
| </method> |
| <method name="getCounter" return="org.apache.hadoop.mapreduce.Counter" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="groupName" type="java.lang.String"/> |
| <param name="counterName" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Get the {@link Counter} for the given <code>groupName</code> and |
| <code>counterName</code>. |
| @param counterName counter name |
| @return the <code>Counter</code> for the given <code>groupName</code> and |
| <code>counterName</code>]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[The context for task attempts.]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapreduce.TaskAttemptContext --> |
| <!-- start class org.apache.hadoop.mapreduce.TaskAttemptID --> |
| <class name="TaskAttemptID" extends="org.apache.hadoop.mapred.ID" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="TaskAttemptID" type="org.apache.hadoop.mapreduce.TaskID, int" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Constructs a TaskAttemptID object from given {@link TaskID}. |
| @param taskId TaskID that this task belongs to |
| @param id the task attempt number]]> |
| </doc> |
| </constructor> |
| <constructor name="TaskAttemptID" type="java.lang.String, int, org.apache.hadoop.mapreduce.TaskType, int, int" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Constructs a TaskId object from given parts. |
| @param jtIdentifier jobTracker identifier |
| @param jobId job number |
| @param type the TaskType |
| @param taskId taskId number |
| @param id the task attempt number]]> |
| </doc> |
| </constructor> |
| <constructor name="TaskAttemptID" type="java.lang.String, int, boolean, int, int" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Constructs a TaskId object from given parts. |
| @param jtIdentifier jobTracker identifier |
| @param jobId job number |
| @param isMap whether the tip is a map |
| @param taskId taskId number |
| @param id the task attempt number]]> |
| </doc> |
| </constructor> |
| <constructor name="TaskAttemptID" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getJobID" return="org.apache.hadoop.mapreduce.JobID" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns the {@link JobID} object that this task attempt belongs to]]> |
| </doc> |
| </method> |
| <method name="getTaskID" return="org.apache.hadoop.mapreduce.TaskID" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns the {@link TaskID} object that this task attempt belongs to]]> |
| </doc> |
| </method> |
| <method name="isMap" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns whether this TaskID is a map ID]]> |
| </doc> |
| </method> |
| <method name="getTaskType" return="org.apache.hadoop.mapreduce.TaskType" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns the TaskType of the TaskAttemptID]]> |
| </doc> |
| </method> |
| <method name="equals" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="o" type="java.lang.Object"/> |
| </method> |
| <method name="appendTo" return="java.lang.StringBuilder" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="builder" type="java.lang.StringBuilder"/> |
| <doc> |
| <![CDATA[Add the unique string to the StringBuilder |
| @param builder the builder to append ot |
| @return the builder that was passed in.]]> |
| </doc> |
| </method> |
| <method name="readFields" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="in" type="java.io.DataInput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="write" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="out" type="java.io.DataOutput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="hashCode" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="compareTo" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="o" type="org.apache.hadoop.mapreduce.ID"/> |
| <doc> |
| <![CDATA[Compare TaskIds by first tipIds, then by task numbers.]]> |
| </doc> |
| </method> |
| <method name="toString" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="forName" return="org.apache.hadoop.mapreduce.TaskAttemptID" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="str" type="java.lang.String"/> |
| <exception name="IllegalArgumentException" type="java.lang.IllegalArgumentException"/> |
| <doc> |
| <![CDATA[Construct a TaskAttemptID object from given string |
| @return constructed TaskAttemptID object or null if the given String is null |
| @throws IllegalArgumentException if the given string is malformed]]> |
| </doc> |
| </method> |
| <field name="ATTEMPT" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[TaskAttemptID represents the immutable and unique identifier for |
| a task attempt. Each task attempt is one particular instance of a Map or |
| Reduce Task identified by its TaskID. |
| |
| TaskAttemptID consists of 2 parts. First part is the |
| {@link TaskID}, that this TaskAttemptID belongs to. |
| Second part is the task attempt number. <br> |
| An example TaskAttemptID is : |
| <code>attempt_200707121733_0003_m_000005_0</code> , which represents the |
| zeroth task attempt for the fifth map task in the third job |
| running at the jobtracker started at <code>200707121733</code>. |
| <p> |
| Applications should never construct or parse TaskAttemptID strings |
| , but rather use appropriate constructors or {@link #forName(String)} |
| method. |
| |
| @see JobID |
| @see TaskID]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.TaskAttemptID --> |
| <!-- start class org.apache.hadoop.mapreduce.TaskCompletionEvent --> |
| <class name="TaskCompletionEvent" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.io.Writable"/> |
| <constructor name="TaskCompletionEvent" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Default constructor for Writable.]]> |
| </doc> |
| </constructor> |
| <constructor name="TaskCompletionEvent" type="int, org.apache.hadoop.mapreduce.TaskAttemptID, int, boolean, org.apache.hadoop.mapreduce.TaskCompletionEvent.Status, java.lang.String" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Constructor. eventId should be created externally and incremented |
| per event for each job. |
| @param eventId event id, event id should be unique and assigned in |
| incrementally, starting from 0. |
| @param taskId task id |
| @param status task's status |
| @param taskTrackerHttp task tracker's host:port for http.]]> |
| </doc> |
| </constructor> |
| <method name="getEventId" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns event Id. |
| @return event id]]> |
| </doc> |
| </method> |
| <method name="getTaskAttemptId" return="org.apache.hadoop.mapreduce.TaskAttemptID" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns task id. |
| @return task id]]> |
| </doc> |
| </method> |
| <method name="getStatus" return="org.apache.hadoop.mapreduce.TaskCompletionEvent.Status" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns {@link Status} |
| @return task completion status]]> |
| </doc> |
| </method> |
| <method name="getTaskTrackerHttp" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[http location of the tasktracker where this task ran. |
| @return http location of tasktracker user logs]]> |
| </doc> |
| </method> |
| <method name="getTaskRunTime" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns time (in millisec) the task took to complete.]]> |
| </doc> |
| </method> |
| <method name="setTaskRunTime" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="taskCompletionTime" type="int"/> |
| <doc> |
| <![CDATA[Set the task completion time |
| @param taskCompletionTime time (in millisec) the task took to complete]]> |
| </doc> |
| </method> |
| <method name="setEventId" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="eventId" type="int"/> |
| <doc> |
| <![CDATA[set event Id. should be assigned incrementally starting from 0. |
| @param eventId]]> |
| </doc> |
| </method> |
| <method name="setTaskAttemptId" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="taskId" type="org.apache.hadoop.mapreduce.TaskAttemptID"/> |
| <doc> |
| <![CDATA[Sets task id. |
| @param taskId]]> |
| </doc> |
| </method> |
| <method name="setTaskStatus" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="status" type="org.apache.hadoop.mapreduce.TaskCompletionEvent.Status"/> |
| <doc> |
| <![CDATA[Set task status. |
| @param status]]> |
| </doc> |
| </method> |
| <method name="setTaskTrackerHttp" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="taskTrackerHttp" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set task tracker http location. |
| @param taskTrackerHttp]]> |
| </doc> |
| </method> |
| <method name="toString" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="equals" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="o" type="java.lang.Object"/> |
| </method> |
| <method name="hashCode" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="isMapTask" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="idWithinJob" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="write" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="out" type="java.io.DataOutput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="readFields" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="in" type="java.io.DataInput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <field name="EMPTY_ARRAY" type="org.apache.hadoop.mapreduce.TaskCompletionEvent[]" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[This is used to track task completion events on |
| job tracker.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.TaskCompletionEvent --> |
| <!-- start class org.apache.hadoop.mapreduce.TaskCompletionEvent.Status --> |
| <class name="TaskCompletionEvent.Status" extends="java.lang.Enum" |
| abstract="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <method name="values" return="org.apache.hadoop.mapreduce.TaskCompletionEvent.Status[]" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="valueOf" return="org.apache.hadoop.mapreduce.TaskCompletionEvent.Status" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="name" type="java.lang.String"/> |
| </method> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.TaskCompletionEvent.Status --> |
| <!-- start class org.apache.hadoop.mapreduce.TaskCounter --> |
| <class name="TaskCounter" extends="java.lang.Enum" |
| abstract="false" |
| static="false" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <method name="values" return="org.apache.hadoop.mapreduce.TaskCounter[]" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="valueOf" return="org.apache.hadoop.mapreduce.TaskCounter" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="name" type="java.lang.String"/> |
| </method> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.TaskCounter --> |
| <!-- start class org.apache.hadoop.mapreduce.TaskID --> |
| <class name="TaskID" extends="org.apache.hadoop.mapred.ID" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="TaskID" type="org.apache.hadoop.mapreduce.JobID, org.apache.hadoop.mapreduce.TaskType, int" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Constructs a TaskID object from given {@link JobID}. |
| |
| @param jobId JobID that this tip belongs to |
| @param type the {@link TaskType} of the task |
| @param id the tip number]]> |
| </doc> |
| </constructor> |
| <constructor name="TaskID" type="java.lang.String, int, org.apache.hadoop.mapreduce.TaskType, int" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Constructs a TaskInProgressId object from given parts. |
| |
| @param jtIdentifier jobTracker identifier |
| @param jobId job number |
| @param type the TaskType |
| @param id the tip number]]> |
| </doc> |
| </constructor> |
| <constructor name="TaskID" type="org.apache.hadoop.mapreduce.JobID, boolean, int" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Constructs a TaskID object from given {@link JobID}. |
| |
| @param jobId JobID that this tip belongs to |
| @param isMap whether the tip is a map |
| @param id the tip number]]> |
| </doc> |
| </constructor> |
| <constructor name="TaskID" type="java.lang.String, int, boolean, int" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Constructs a TaskInProgressId object from given parts. |
| |
| @param jtIdentifier jobTracker identifier |
| @param jobId job number |
| @param isMap whether the tip is a map |
| @param id the tip number]]> |
| </doc> |
| </constructor> |
| <constructor name="TaskID" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Default constructor for Writable. Sets the task type to |
| {@link TaskType#REDUCE}, the ID to 0, and the job ID to an empty job ID.]]> |
| </doc> |
| </constructor> |
| <method name="getJobID" return="org.apache.hadoop.mapreduce.JobID" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns the {@link JobID} object that this tip belongs to. |
| |
| @return the JobID object]]> |
| </doc> |
| </method> |
| <method name="isMap" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns whether this TaskID is a map ID. |
| |
| @return whether this TaskID is a map ID]]> |
| </doc> |
| </method> |
| <method name="getTaskType" return="org.apache.hadoop.mapreduce.TaskType" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the type of the task. |
| |
| @return the type of the task]]> |
| </doc> |
| </method> |
| <method name="equals" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="o" type="java.lang.Object"/> |
| </method> |
| <method name="compareTo" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="o" type="org.apache.hadoop.mapreduce.ID"/> |
| <doc> |
| <![CDATA[Compare TaskInProgressIds by first jobIds, then by tip numbers. |
| Reducers are defined as greater than mappers. |
| |
| @param o the TaskID against which to compare |
| @return 0 if equal, positive if this TaskID is greater, and negative if |
| this TaskID is less]]> |
| </doc> |
| </method> |
| <method name="toString" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="appendTo" return="java.lang.StringBuilder" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="builder" type="java.lang.StringBuilder"/> |
| <doc> |
| <![CDATA[Add the unique string to the given builder. |
| |
| @param builder the builder to append to |
| @return the builder that was passed in]]> |
| </doc> |
| </method> |
| <method name="hashCode" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="readFields" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="in" type="java.io.DataInput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="write" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="out" type="java.io.DataOutput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="forName" return="org.apache.hadoop.mapreduce.TaskID" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="str" type="java.lang.String"/> |
| <exception name="IllegalArgumentException" type="java.lang.IllegalArgumentException"/> |
| <doc> |
| <![CDATA[Construct a TaskID object from given string. |
| |
| @param str the target string |
| @return constructed TaskID object or null if the given String is null |
| @throws IllegalArgumentException if the given string is malformed]]> |
| </doc> |
| </method> |
| <method name="getRepresentingCharacter" return="char" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="type" type="org.apache.hadoop.mapreduce.TaskType"/> |
| <doc> |
| <![CDATA[Gets the character representing the {@link TaskType}. |
| |
| @param type the TaskType |
| @return the character]]> |
| </doc> |
| </method> |
| <method name="getTaskType" return="org.apache.hadoop.mapreduce.TaskType" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="c" type="char"/> |
| <doc> |
| <![CDATA[Gets the {@link TaskType} corresponding to the character. |
| |
| @param c the character |
| @return the TaskType]]> |
| </doc> |
| </method> |
| <method name="getAllTaskTypes" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns a string of characters describing all possible {@link TaskType} |
| values |
| |
| @return a string of all task type characters]]> |
| </doc> |
| </method> |
| <field name="TASK" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <field name="idFormat" type="java.text.NumberFormat" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <field name="TASK_ID_REGEX" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="taskIdPattern" type="java.util.regex.Pattern" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[TaskID represents the immutable and unique identifier for |
| a Map or Reduce Task. Each TaskID encompasses multiple attempts made to |
| execute the Map or Reduce Task, each of which are uniquely indentified by |
| their TaskAttemptID. |
| |
| TaskID consists of 3 parts. First part is the {@link JobID}, that this |
| TaskInProgress belongs to. Second part of the TaskID is either 'm' or 'r' |
| representing whether the task is a map task or a reduce task. |
| And the third part is the task number. <br> |
| An example TaskID is : |
| <code>task_200707121733_0003_m_000005</code> , which represents the |
| fifth map task in the third job running at the jobtracker |
| started at <code>200707121733</code>. |
| <p> |
| Applications should never construct or parse TaskID strings |
| , but rather use appropriate constructors or {@link #forName(String)} |
| method. |
| |
| @see JobID |
| @see TaskAttemptID]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.TaskID --> |
| <!-- start interface org.apache.hadoop.mapreduce.TaskInputOutputContext --> |
| <interface name="TaskInputOutputContext" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <method name="nextKeyValue" return="boolean" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Advance to the next key, value pair, returning null if at end. |
| @return the key object that was read into, or null if no more]]> |
| </doc> |
| </method> |
| <method name="getCurrentKey" return="KEYIN" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Get the current key. |
| @return the current key object or null if there isn't one |
| @throws IOException |
| @throws InterruptedException]]> |
| </doc> |
| </method> |
| <method name="getCurrentValue" return="VALUEIN" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Get the current value. |
| @return the value object that was read into |
| @throws IOException |
| @throws InterruptedException]]> |
| </doc> |
| </method> |
| <method name="write" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="KEYOUT"/> |
| <param name="value" type="VALUEOUT"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Generate an output key/value pair.]]> |
| </doc> |
| </method> |
| <method name="getOutputCommitter" return="org.apache.hadoop.mapreduce.OutputCommitter" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the {@link OutputCommitter} for the task-attempt. |
| @return the <code>OutputCommitter</code> for the task-attempt]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[A context object that allows input and output from the task. It is only |
| supplied to the {@link Mapper} or {@link Reducer}. |
| @param <KEYIN> the input key type for the task |
| @param <VALUEIN> the input value type for the task |
| @param <KEYOUT> the output key type for the task |
| @param <VALUEOUT> the output value type for the task]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapreduce.TaskInputOutputContext --> |
| <!-- start class org.apache.hadoop.mapreduce.TaskTrackerInfo --> |
| <class name="TaskTrackerInfo" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.io.Writable"/> |
| <constructor name="TaskTrackerInfo" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <constructor name="TaskTrackerInfo" type="java.lang.String" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <constructor name="TaskTrackerInfo" type="java.lang.String, java.lang.String, java.lang.String" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getTaskTrackerName" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Gets the tasktracker's name. |
| |
| @return tracker's name.]]> |
| </doc> |
| </method> |
| <method name="isBlacklisted" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Whether tracker is blacklisted |
| @return true if tracker is blacklisted |
| false otherwise]]> |
| </doc> |
| </method> |
| <method name="getReasonForBlacklist" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Gets the reason for which the tasktracker was blacklisted. |
| |
| @return reason which tracker was blacklisted]]> |
| </doc> |
| </method> |
| <method name="getBlacklistReport" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Gets a descriptive report about why the tasktracker was blacklisted. |
| |
| @return report describing why the tasktracker was blacklisted.]]> |
| </doc> |
| </method> |
| <method name="readFields" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="in" type="java.io.DataInput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="write" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="out" type="java.io.DataOutput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[Information about TaskTracker.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.TaskTrackerInfo --> |
| <!-- start class org.apache.hadoop.mapreduce.TaskType --> |
| <class name="TaskType" extends="java.lang.Enum" |
| abstract="false" |
| static="false" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <method name="values" return="org.apache.hadoop.mapreduce.TaskType[]" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="valueOf" return="org.apache.hadoop.mapreduce.TaskType" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="name" type="java.lang.String"/> |
| </method> |
| <doc> |
| <![CDATA[Enum for map, reduce, job-setup, job-cleanup, task-cleanup task types.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.TaskType --> |
| </package> |
| <package name="org.apache.hadoop.mapreduce.checkpoint"> |
| </package> |
| <package name="org.apache.hadoop.mapreduce.counters"> |
| <!-- start class org.apache.hadoop.mapreduce.counters.AbstractCounters --> |
| <class name="AbstractCounters" extends="java.lang.Object" |
| abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.io.Writable"/> |
| <implements name="java.lang.Iterable"/> |
| <constructor name="AbstractCounters" type="org.apache.hadoop.mapreduce.counters.CounterGroupFactory" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <constructor name="AbstractCounters" type="org.apache.hadoop.mapreduce.counters.AbstractCounters, org.apache.hadoop.mapreduce.counters.CounterGroupFactory" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Construct from another counters object. |
| @param <C1> type of the other counter |
| @param <G1> type of the other counter group |
| @param counters the counters object to copy |
| @param groupFactory the factory for new groups]]> |
| </doc> |
| </constructor> |
| <method name="findCounter" return="C" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="groupName" type="java.lang.String"/> |
| <param name="counterName" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Find a counter, create one if necessary |
| @param groupName of the counter |
| @param counterName name of the counter |
| @return the matching counter]]> |
| </doc> |
| </method> |
| <method name="findCounter" return="C" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="java.lang.Enum"/> |
| <doc> |
| <![CDATA[Find the counter for the given enum. The same enum will always return the |
| same counter. |
| @param key the counter key |
| @return the matching counter object]]> |
| </doc> |
| </method> |
| <method name="getGroupNames" return="java.lang.Iterable" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns the names of all counter classes. |
| @return Set of counter names.]]> |
| </doc> |
| </method> |
| <method name="iterator" return="java.util.Iterator" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getGroup" return="G" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="groupName" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Returns the named counter group, or an empty group if there is none |
| with the specified name. |
| @param groupName name of the group |
| @return the group]]> |
| </doc> |
| </method> |
| <method name="countCounters" return="int" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns the total number of counters, by summing the number of counters |
| in each group. |
| @return the total number of counters]]> |
| </doc> |
| </method> |
| <method name="write" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="out" type="java.io.DataOutput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Write the set of groups. |
| Counters ::= version #fgroups (groupId, group)* #groups (group)*]]> |
| </doc> |
| </method> |
| <method name="readFields" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="in" type="java.io.DataInput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="toString" return="java.lang.String" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Return textual representation of the counter values. |
| @return the string]]> |
| </doc> |
| </method> |
| <method name="incrAllCounters" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="other" type="org.apache.hadoop.mapreduce.counters.AbstractCounters"/> |
| <doc> |
| <![CDATA[Increments multiple counters by their amounts in another Counters |
| instance. |
| @param other the other Counters instance]]> |
| </doc> |
| </method> |
| <method name="equals" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="genericRight" type="java.lang.Object"/> |
| </method> |
| <method name="hashCode" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <field name="LOG" type="org.slf4j.Logger" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[An abstract class to provide common implementation for the Counters |
| container in both mapred and mapreduce packages. |
| |
| @param <C> type of counter inside the counters |
| @param <G> type of group inside the counters]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.counters.AbstractCounters --> |
| <!-- start interface org.apache.hadoop.mapreduce.counters.CounterGroupBase --> |
| <interface name="CounterGroupBase" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.io.Writable"/> |
| <implements name="java.lang.Iterable"/> |
| <method name="getName" return="java.lang.String" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the internal name of the group |
| @return the internal name]]> |
| </doc> |
| </method> |
| <method name="getDisplayName" return="java.lang.String" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the display name of the group. |
| @return the human readable name]]> |
| </doc> |
| </method> |
| <method name="setDisplayName" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="displayName" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the display name of the group |
| @param displayName of the group]]> |
| </doc> |
| </method> |
| <method name="addCounter" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="counter" type="T"/> |
| <doc> |
| <![CDATA[Add a counter to this group. |
| @param counter to add]]> |
| </doc> |
| </method> |
| <method name="addCounter" return="T" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="name" type="java.lang.String"/> |
| <param name="displayName" type="java.lang.String"/> |
| <param name="value" type="long"/> |
| <doc> |
| <![CDATA[Add a counter to this group |
| @param name of the counter |
| @param displayName of the counter |
| @param value of the counter |
| @return the counter]]> |
| </doc> |
| </method> |
| <method name="findCounter" return="T" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="counterName" type="java.lang.String"/> |
| <param name="displayName" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Find a counter in the group. |
| @param counterName the name of the counter |
| @param displayName the display name of the counter |
| @return the counter that was found or added]]> |
| </doc> |
| </method> |
| <method name="findCounter" return="T" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="counterName" type="java.lang.String"/> |
| <param name="create" type="boolean"/> |
| <doc> |
| <![CDATA[Find a counter in the group |
| @param counterName the name of the counter |
| @param create create the counter if not found if true |
| @return the counter that was found or added or null if create is false]]> |
| </doc> |
| </method> |
| <method name="findCounter" return="T" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="counterName" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Find a counter in the group. |
| @param counterName the name of the counter |
| @return the counter that was found or added]]> |
| </doc> |
| </method> |
| <method name="size" return="int" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the number of counters in this group.]]> |
| </doc> |
| </method> |
| <method name="incrAllCounters" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="rightGroup" type="org.apache.hadoop.mapreduce.counters.CounterGroupBase"/> |
| <doc> |
| <![CDATA[Increment all counters by a group of counters |
| @param rightGroup the group to be added to this group]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[The common counter group interface. |
| |
| @param <T> type of the counter for the group]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapreduce.counters.CounterGroupBase --> |
| </package> |
| <package name="org.apache.hadoop.mapreduce.lib.aggregate"> |
| <!-- start class org.apache.hadoop.mapreduce.lib.aggregate.DoubleValueSum --> |
| <class name="DoubleValueSum" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator"/> |
| <constructor name="DoubleValueSum" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[The default constructor]]> |
| </doc> |
| </constructor> |
| <method name="addNextValue" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="val" type="java.lang.Object"/> |
| <doc> |
| <![CDATA[add a value to the aggregator |
| |
| @param val |
| an object whose string representation represents a double value.]]> |
| </doc> |
| </method> |
| <method name="addNextValue" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="val" type="double"/> |
| <doc> |
| <![CDATA[add a value to the aggregator |
| |
| @param val |
| a double value.]]> |
| </doc> |
| </method> |
| <method name="getReport" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the string representation of the aggregated value]]> |
| </doc> |
| </method> |
| <method name="getSum" return="double" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the aggregated value]]> |
| </doc> |
| </method> |
| <method name="reset" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[reset the aggregator]]> |
| </doc> |
| </method> |
| <method name="getCombinerOutput" return="java.util.ArrayList" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return return an array of one element. The element is a string |
| representation of the aggregated value. The return value is |
| expected to be used by the a combiner.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[This class implements a value aggregator that sums up a sequence of double |
| values.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.aggregate.DoubleValueSum --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.aggregate.LongValueMax --> |
| <class name="LongValueMax" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator"/> |
| <constructor name="LongValueMax" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[the default constructor]]> |
| </doc> |
| </constructor> |
| <method name="addNextValue" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="val" type="java.lang.Object"/> |
| <doc> |
| <![CDATA[add a value to the aggregator |
| |
| @param val |
| an object whose string representation represents a long value.]]> |
| </doc> |
| </method> |
| <method name="addNextValue" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="newVal" type="long"/> |
| <doc> |
| <![CDATA[add a value to the aggregator |
| |
| @param newVal |
| a long value.]]> |
| </doc> |
| </method> |
| <method name="getVal" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the aggregated value]]> |
| </doc> |
| </method> |
| <method name="getReport" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the string representation of the aggregated value]]> |
| </doc> |
| </method> |
| <method name="reset" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[reset the aggregator]]> |
| </doc> |
| </method> |
| <method name="getCombinerOutput" return="java.util.ArrayList" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return return an array of one element. The element is a string |
| representation of the aggregated value. The return value is |
| expected to be used by the a combiner.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[This class implements a value aggregator that maintain the maximum of |
| a sequence of long values.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.aggregate.LongValueMax --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.aggregate.LongValueMin --> |
| <class name="LongValueMin" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator"/> |
| <constructor name="LongValueMin" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[the default constructor]]> |
| </doc> |
| </constructor> |
| <method name="addNextValue" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="val" type="java.lang.Object"/> |
| <doc> |
| <![CDATA[add a value to the aggregator |
| |
| @param val |
| an object whose string representation represents a long value.]]> |
| </doc> |
| </method> |
| <method name="addNextValue" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="newVal" type="long"/> |
| <doc> |
| <![CDATA[add a value to the aggregator |
| |
| @param newVal |
| a long value.]]> |
| </doc> |
| </method> |
| <method name="getVal" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the aggregated value]]> |
| </doc> |
| </method> |
| <method name="getReport" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the string representation of the aggregated value]]> |
| </doc> |
| </method> |
| <method name="reset" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[reset the aggregator]]> |
| </doc> |
| </method> |
| <method name="getCombinerOutput" return="java.util.ArrayList" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return return an array of one element. The element is a string |
| representation of the aggregated value. The return value is |
| expected to be used by the a combiner.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[This class implements a value aggregator that maintain the minimum of |
| a sequence of long values.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.aggregate.LongValueMin --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.aggregate.LongValueSum --> |
| <class name="LongValueSum" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator"/> |
| <constructor name="LongValueSum" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[the default constructor]]> |
| </doc> |
| </constructor> |
| <method name="addNextValue" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="val" type="java.lang.Object"/> |
| <doc> |
| <![CDATA[add a value to the aggregator |
| |
| @param val |
| an object whose string representation represents a long value.]]> |
| </doc> |
| </method> |
| <method name="addNextValue" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="val" type="long"/> |
| <doc> |
| <![CDATA[add a value to the aggregator |
| |
| @param val |
| a long value.]]> |
| </doc> |
| </method> |
| <method name="getSum" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the aggregated value]]> |
| </doc> |
| </method> |
| <method name="getReport" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the string representation of the aggregated value]]> |
| </doc> |
| </method> |
| <method name="reset" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[reset the aggregator]]> |
| </doc> |
| </method> |
| <method name="getCombinerOutput" return="java.util.ArrayList" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return return an array of one element. The element is a string |
| representation of the aggregated value. The return value is |
| expected to be used by the a combiner.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[This class implements a value aggregator that sums up |
| a sequence of long values.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.aggregate.LongValueSum --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.aggregate.StringValueMax --> |
| <class name="StringValueMax" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator"/> |
| <constructor name="StringValueMax" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[the default constructor]]> |
| </doc> |
| </constructor> |
| <method name="addNextValue" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="val" type="java.lang.Object"/> |
| <doc> |
| <![CDATA[add a value to the aggregator |
| |
| @param val |
| a string.]]> |
| </doc> |
| </method> |
| <method name="getVal" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the aggregated value]]> |
| </doc> |
| </method> |
| <method name="getReport" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the string representation of the aggregated value]]> |
| </doc> |
| </method> |
| <method name="reset" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[reset the aggregator]]> |
| </doc> |
| </method> |
| <method name="getCombinerOutput" return="java.util.ArrayList" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return return an array of one element. The element is a string |
| representation of the aggregated value. The return value is |
| expected to be used by the a combiner.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[This class implements a value aggregator that maintain the biggest of |
| a sequence of strings.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.aggregate.StringValueMax --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.aggregate.StringValueMin --> |
| <class name="StringValueMin" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator"/> |
| <constructor name="StringValueMin" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[the default constructor]]> |
| </doc> |
| </constructor> |
| <method name="addNextValue" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="val" type="java.lang.Object"/> |
| <doc> |
| <![CDATA[add a value to the aggregator |
| |
| @param val |
| a string.]]> |
| </doc> |
| </method> |
| <method name="getVal" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the aggregated value]]> |
| </doc> |
| </method> |
| <method name="getReport" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the string representation of the aggregated value]]> |
| </doc> |
| </method> |
| <method name="reset" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[reset the aggregator]]> |
| </doc> |
| </method> |
| <method name="getCombinerOutput" return="java.util.ArrayList" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return return an array of one element. The element is a string |
| representation of the aggregated value. The return value is |
| expected to be used by the a combiner.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[This class implements a value aggregator that maintain the smallest of |
| a sequence of strings.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.aggregate.StringValueMin --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.aggregate.UniqValueCount --> |
| <class name="UniqValueCount" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator"/> |
| <constructor name="UniqValueCount" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[the default constructor]]> |
| </doc> |
| </constructor> |
| <constructor name="UniqValueCount" type="long" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[constructor |
| @param maxNum the limit in the number of unique values to keep.]]> |
| </doc> |
| </constructor> |
| <method name="setMaxItems" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="n" type="long"/> |
| <doc> |
| <![CDATA[Set the limit on the number of unique values |
| @param n the desired limit on the number of unique values |
| @return the new limit on the number of unique values]]> |
| </doc> |
| </method> |
| <method name="addNextValue" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="val" type="java.lang.Object"/> |
| <doc> |
| <![CDATA[add a value to the aggregator |
| |
| @param val |
| an object.]]> |
| </doc> |
| </method> |
| <method name="getReport" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return return the number of unique objects aggregated]]> |
| </doc> |
| </method> |
| <method name="getUniqueItems" return="java.util.Set" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the set of the unique objects]]> |
| </doc> |
| </method> |
| <method name="reset" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[reset the aggregator]]> |
| </doc> |
| </method> |
| <method name="getCombinerOutput" return="java.util.ArrayList" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return return an array of the unique objects. The return value is |
| expected to be used by the a combiner.]]> |
| </doc> |
| </method> |
| <field name="MAX_NUM_UNIQUE_VALUES" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[This class implements a value aggregator that dedupes a sequence of objects.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.aggregate.UniqValueCount --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.aggregate.UserDefinedValueAggregatorDescriptor --> |
| <class name="UserDefinedValueAggregatorDescriptor" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorDescriptor"/> |
| <constructor name="UserDefinedValueAggregatorDescriptor" type="java.lang.String, org.apache.hadoop.conf.Configuration" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@param className the class name of the user defined descriptor class |
| @param conf a configure object used for decriptor configuration]]> |
| </doc> |
| </constructor> |
| <method name="createInstance" return="java.lang.Object" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="className" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Create an instance of the given class |
| @param className the name of the class |
| @return a dynamically created instance of the given class]]> |
| </doc> |
| </method> |
| <method name="generateKeyValPairs" return="java.util.ArrayList" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="java.lang.Object"/> |
| <param name="val" type="java.lang.Object"/> |
| <doc> |
| <![CDATA[Generate a list of aggregation-id/value pairs for the given |
| key/value pairs by delegating the invocation to the real object. |
| |
| @param key |
| input key |
| @param val |
| input value |
| @return a list of aggregation id/value pairs. An aggregation id encodes an |
| aggregation type which is used to guide the way to aggregate the |
| value in the reduce/combiner phrase of an Aggregate based job.]]> |
| </doc> |
| </method> |
| <method name="toString" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the string representation of this object.]]> |
| </doc> |
| </method> |
| <method name="configure" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <doc> |
| <![CDATA[Do nothing.]]> |
| </doc> |
| </method> |
| <field name="theAggregatorDescriptor" type="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorDescriptor" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[This class implements a wrapper for a user defined value |
| aggregator descriptor. |
| It serves two functions: One is to create an object of |
| ValueAggregatorDescriptor from the name of a user defined class |
| that may be dynamically loaded. The other is to |
| delegate invocations of generateKeyValPairs function to the created object.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.aggregate.UserDefinedValueAggregatorDescriptor --> |
| <!-- start interface org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator --> |
| <interface name="ValueAggregator" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <method name="addNextValue" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="val" type="java.lang.Object"/> |
| <doc> |
| <![CDATA[add a value to the aggregator |
| |
| @param val the value to be added]]> |
| </doc> |
| </method> |
| <method name="reset" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[reset the aggregator]]> |
| </doc> |
| </method> |
| <method name="getReport" return="java.lang.String" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the string representation of the agregator]]> |
| </doc> |
| </method> |
| <method name="getCombinerOutput" return="java.util.ArrayList" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return an array of values as the outputs of the combiner.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[This interface defines the minimal protocol for value aggregators.]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorBaseDescriptor --> |
| <class name="ValueAggregatorBaseDescriptor" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorDescriptor"/> |
| <constructor name="ValueAggregatorBaseDescriptor" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="generateEntry" return="java.util.Map.Entry" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="type" type="java.lang.String"/> |
| <param name="id" type="java.lang.String"/> |
| <param name="val" type="org.apache.hadoop.io.Text"/> |
| <doc> |
| <![CDATA[@param type the aggregation type |
| @param id the aggregation id |
| @param val the val associated with the id to be aggregated |
| @return an Entry whose key is the aggregation id prefixed with |
| the aggregation type.]]> |
| </doc> |
| </method> |
| <method name="generateValueAggregator" return="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="type" type="java.lang.String"/> |
| <param name="uniqCount" type="long"/> |
| <doc> |
| <![CDATA[@param type the aggregation type |
| @param uniqCount the limit in the number of unique values to keep, |
| if type is UNIQ_VALUE_COUNT |
| @return a value aggregator of the given type.]]> |
| </doc> |
| </method> |
| <method name="generateKeyValPairs" return="java.util.ArrayList" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="java.lang.Object"/> |
| <param name="val" type="java.lang.Object"/> |
| <doc> |
| <![CDATA[Generate 1 or 2 aggregation-id/value pairs for the given key/value pair. |
| The first id will be of type LONG_VALUE_SUM, with "record_count" as |
| its aggregation id. If the input is a file split, |
| the second id of the same type will be generated too, with the file name |
| as its aggregation id. This achieves the behavior of counting the total |
| number of records in the input data, and the number of records |
| in each input file. |
| |
| @param key |
| input key |
| @param val |
| input value |
| @return a list of aggregation id/value pairs. An aggregation id encodes an |
| aggregation type which is used to guide the way to aggregate the |
| value in the reduce/combiner phrase of an Aggregate based job.]]> |
| </doc> |
| </method> |
| <method name="configure" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <doc> |
| <![CDATA[get the input file name. |
| |
| @param conf a configuration object]]> |
| </doc> |
| </method> |
| <field name="UNIQ_VALUE_COUNT" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="LONG_VALUE_SUM" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="DOUBLE_VALUE_SUM" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="VALUE_HISTOGRAM" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="LONG_VALUE_MAX" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="LONG_VALUE_MIN" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="STRING_VALUE_MAX" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="STRING_VALUE_MIN" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="inputFile" type="java.lang.String" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[This class implements the common functionalities of |
| the subclasses of ValueAggregatorDescriptor class.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorBaseDescriptor --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorCombiner --> |
| <class name="ValueAggregatorCombiner" extends="org.apache.hadoop.mapreduce.Reducer" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="ValueAggregatorCombiner" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="reduce" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="org.apache.hadoop.io.Text"/> |
| <param name="values" type="java.lang.Iterable"/> |
| <param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Combines values for a given key. |
| @param key the key is expected to be a Text object, whose prefix indicates |
| the type of aggregation to aggregate the values. |
| @param values the values to combine |
| @param context to collect combined values]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[This class implements the generic combiner of Aggregate.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorCombiner --> |
| <!-- start interface org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorDescriptor --> |
| <interface name="ValueAggregatorDescriptor" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <method name="generateKeyValPairs" return="java.util.ArrayList" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="java.lang.Object"/> |
| <param name="val" type="java.lang.Object"/> |
| <doc> |
| <![CDATA[Generate a list of aggregation-id/value pairs for |
| the given key/value pair. |
| This function is usually called by the mapper of an Aggregate based job. |
| |
| @param key |
| input key |
| @param val |
| input value |
| @return a list of aggregation id/value pairs. An aggregation id encodes an |
| aggregation type which is used to guide the way to aggregate the |
| value in the reduce/combiner phrase of an Aggregate based job.]]> |
| </doc> |
| </method> |
| <method name="configure" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <doc> |
| <![CDATA[Configure the object |
| |
| @param conf |
| a Configuration object that may contain the information |
| that can be used to configure the object.]]> |
| </doc> |
| </method> |
| <field name="TYPE_SEPARATOR" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="ONE" type="org.apache.hadoop.io.Text" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[This interface defines the contract a value aggregator descriptor must |
| support. Such a descriptor can be configured with a {@link Configuration} |
| object. Its main function is to generate a list of aggregation-id/value |
| pairs. An aggregation id encodes an aggregation type which is used to |
| guide the way to aggregate the value in the reduce/combiner phrase of an |
| Aggregate based job. |
| The mapper in an Aggregate based map/reduce job may create one or more of |
| ValueAggregatorDescriptor objects at configuration time. For each input |
| key/value pair, the mapper will use those objects to create aggregation |
| id/value pairs.]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorDescriptor --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorJob --> |
| <class name="ValueAggregatorJob" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="ValueAggregatorJob" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="createValueAggregatorJobs" return="org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="args" type="java.lang.String[]"/> |
| <param name="descriptors" type="java.lang.Class[]"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="createValueAggregatorJobs" return="org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="args" type="java.lang.String[]"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="createValueAggregatorJob" return="org.apache.hadoop.mapreduce.Job" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <param name="args" type="java.lang.String[]"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Create an Aggregate based map/reduce job. |
| |
| @param conf The configuration for job |
| @param args the arguments used for job creation. Generic hadoop |
| arguments are accepted. |
| @return a Job object ready for submission. |
| |
| @throws IOException |
| @see GenericOptionsParser]]> |
| </doc> |
| </method> |
| <method name="createValueAggregatorJob" return="org.apache.hadoop.mapreduce.Job" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="args" type="java.lang.String[]"/> |
| <param name="descriptors" type="java.lang.Class[]"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="setAggregatorDescriptors" return="org.apache.hadoop.conf.Configuration" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="descriptors" type="java.lang.Class[]"/> |
| </method> |
| <method name="main" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="args" type="java.lang.String[]"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/> |
| <doc> |
| <![CDATA[create and run an Aggregate based map/reduce job. |
| |
| @param args the arguments used for job creation |
| @throws IOException]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[This is the main class for creating a map/reduce job using Aggregate |
| framework. The Aggregate is a specialization of map/reduce framework, |
| specializing for performing various simple aggregations. |
| |
| Generally speaking, in order to implement an application using Map/Reduce |
| model, the developer is to implement Map and Reduce functions (and possibly |
| combine function). However, a lot of applications related to counting and |
| statistics computing have very similar characteristics. Aggregate abstracts |
| out the general patterns of these functions and implementing those patterns. |
| In particular, the package provides generic mapper/redducer/combiner |
| classes, and a set of built-in value aggregators, and a generic utility |
| class that helps user create map/reduce jobs using the generic class. |
| The built-in aggregators include: |
| |
| sum over numeric values count the number of distinct values compute the |
| histogram of values compute the minimum, maximum, media,average, standard |
| deviation of numeric values |
| |
| The developer using Aggregate will need only to provide a plugin class |
| conforming to the following interface: |
| |
| public interface ValueAggregatorDescriptor { public ArrayList<Entry> |
| generateKeyValPairs(Object key, Object value); public void |
| configure(Configuration conf); } |
| |
| The package also provides a base class, ValueAggregatorBaseDescriptor, |
| implementing the above interface. The user can extend the base class and |
| implement generateKeyValPairs accordingly. |
| |
| The primary work of generateKeyValPairs is to emit one or more key/value |
| pairs based on the input key/value pair. The key in an output key/value pair |
| encode two pieces of information: aggregation type and aggregation id. The |
| value will be aggregated onto the aggregation id according the aggregation |
| type. |
| |
| This class offers a function to generate a map/reduce job using Aggregate |
| framework. The function takes the following parameters: input directory spec |
| input format (text or sequence file) output directory a file specifying the |
| user plugin class]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorJob --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorJobBase --> |
| <class name="ValueAggregatorJobBase" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="ValueAggregatorJobBase" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="setup" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.conf.Configuration"/> |
| </method> |
| <method name="getValueAggregatorDescriptor" return="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorDescriptor" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="spec" type="java.lang.String"/> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| </method> |
| <method name="getAggregatorDescriptors" return="java.util.ArrayList" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| </method> |
| <method name="logSpec" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </method> |
| <field name="DESCRIPTOR" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="DESCRIPTOR_NUM" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="USER_JAR" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="aggregatorDescriptorList" type="java.util.ArrayList" |
| transient="false" volatile="false" |
| static="true" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[This abstract class implements some common functionalities of the |
| the generic mapper, reducer and combiner classes of Aggregate.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorJobBase --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorMapper --> |
| <class name="ValueAggregatorMapper" extends="org.apache.hadoop.mapreduce.Mapper" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="ValueAggregatorMapper" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="setup" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </method> |
| <method name="map" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="K1"/> |
| <param name="value" type="V1"/> |
| <param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[the map function. It iterates through the value aggregator descriptor |
| list to generate aggregation id/value pairs and emit them.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[This class implements the generic mapper of Aggregate.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorMapper --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorReducer --> |
| <class name="ValueAggregatorReducer" extends="org.apache.hadoop.mapreduce.Reducer" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="ValueAggregatorReducer" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="setup" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </method> |
| <method name="reduce" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="org.apache.hadoop.io.Text"/> |
| <param name="values" type="java.lang.Iterable"/> |
| <param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[@param key |
| the key is expected to be a Text object, whose prefix indicates |
| the type of aggregation to aggregate the values. In effect, data |
| driven computing is achieved. It is assumed that each aggregator's |
| getReport method emits appropriate output for the aggregator. This |
| may be further customized. |
| @param values the values to be aggregated |
| @param context]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[This class implements the generic reducer of Aggregate.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorReducer --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.aggregate.ValueHistogram --> |
| <class name="ValueHistogram" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator"/> |
| <constructor name="ValueHistogram" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="addNextValue" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="val" type="java.lang.Object"/> |
| <doc> |
| <![CDATA[add the given val to the aggregator. |
| |
| @param val the value to be added. It is expected to be a string |
| in the form of xxxx\tnum, meaning xxxx has num occurrences.]]> |
| </doc> |
| </method> |
| <method name="getReport" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the string representation of this aggregator. |
| It includes the following basic statistics of the histogram: |
| the number of unique values |
| the minimum value |
| the media value |
| the maximum value |
| the average value |
| the standard deviation]]> |
| </doc> |
| </method> |
| <method name="getReportDetails" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return a string representation of the list of value/frequence pairs of |
| the histogram]]> |
| </doc> |
| </method> |
| <method name="getCombinerOutput" return="java.util.ArrayList" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return a list value/frequence pairs. |
| The return value is expected to be used by the reducer.]]> |
| </doc> |
| </method> |
| <method name="getReportItems" return="java.util.TreeMap" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return a TreeMap representation of the histogram]]> |
| </doc> |
| </method> |
| <method name="reset" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[reset the aggregator]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[This class implements a value aggregator that computes the |
| histogram of a sequence of strings.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.aggregate.ValueHistogram --> |
| </package> |
| <package name="org.apache.hadoop.mapreduce.lib.chain"> |
| <!-- start class org.apache.hadoop.mapreduce.lib.chain.ChainMapper --> |
| <class name="ChainMapper" extends="org.apache.hadoop.mapreduce.Mapper" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="ChainMapper" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="addMapper" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.Job"/> |
| <param name="klass" type="java.lang.Class"/> |
| <param name="inputKeyClass" type="java.lang.Class"/> |
| <param name="inputValueClass" type="java.lang.Class"/> |
| <param name="outputKeyClass" type="java.lang.Class"/> |
| <param name="outputValueClass" type="java.lang.Class"/> |
| <param name="mapperConf" type="org.apache.hadoop.conf.Configuration"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Adds a {@link Mapper} class to the chain mapper. |
| |
| <p> |
| The key and values are passed from one element of the chain to the next, by |
| value. For the added Mapper the configuration given for it, |
| <code>mapperConf</code>, have precedence over the job's Configuration. This |
| precedence is in effect when the task is running. |
| </p> |
| <p> |
| IMPORTANT: There is no need to specify the output key/value classes for the |
| ChainMapper, this is done by the addMapper for the last mapper in the chain |
| </p> |
| |
| @param job |
| The job. |
| @param klass |
| the Mapper class to add. |
| @param inputKeyClass |
| mapper input key class. |
| @param inputValueClass |
| mapper input value class. |
| @param outputKeyClass |
| mapper output key class. |
| @param outputValueClass |
| mapper output value class. |
| @param mapperConf |
| a configuration for the Mapper class. It is recommended to use a |
| Configuration without default values using the |
| <code>Configuration(boolean loadDefaults)</code> constructor with |
| FALSE.]]> |
| </doc> |
| </method> |
| <method name="setup" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/> |
| </method> |
| <method name="run" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </method> |
| <doc> |
| <![CDATA[The ChainMapper class allows to use multiple Mapper classes within a single |
| Map task. |
| |
| <p> |
| The Mapper classes are invoked in a chained (or piped) fashion, the output of |
| the first becomes the input of the second, and so on until the last Mapper, |
| the output of the last Mapper will be written to the task's output. |
| </p> |
| <p> |
| The key functionality of this feature is that the Mappers in the chain do not |
| need to be aware that they are executed in a chain. This enables having |
| reusable specialized Mappers that can be combined to perform composite |
| operations within a single task. |
| </p> |
| <p> |
| Special care has to be taken when creating chains that the key/values output |
| by a Mapper are valid for the following Mapper in the chain. It is assumed |
| all Mappers and the Reduce in the chain use matching output and input key and |
| value classes as no conversion is done by the chaining code. |
| </p> |
| <p> |
| Using the ChainMapper and the ChainReducer classes is possible to compose |
| Map/Reduce jobs that look like <code>[MAP+ / REDUCE MAP*]</code>. And |
| immediate benefit of this pattern is a dramatic reduction in disk IO. |
| </p> |
| <p> |
| IMPORTANT: There is no need to specify the output key/value classes for the |
| ChainMapper, this is done by the addMapper for the last mapper in the chain. |
| </p> |
| ChainMapper usage pattern: |
| <p> |
| |
| <pre> |
| ... |
| Job = new Job(conf); |
| |
| Configuration mapAConf = new Configuration(false); |
| ... |
| ChainMapper.addMapper(job, AMap.class, LongWritable.class, Text.class, |
| Text.class, Text.class, true, mapAConf); |
| |
| Configuration mapBConf = new Configuration(false); |
| ... |
| ChainMapper.addMapper(job, BMap.class, Text.class, Text.class, |
| LongWritable.class, Text.class, false, mapBConf); |
| |
| ... |
| |
| job.waitForComplettion(true); |
| ... |
| </pre>]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.chain.ChainMapper --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.chain.ChainReducer --> |
| <class name="ChainReducer" extends="org.apache.hadoop.mapreduce.Reducer" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="ChainReducer" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="setReducer" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.Job"/> |
| <param name="klass" type="java.lang.Class"/> |
| <param name="inputKeyClass" type="java.lang.Class"/> |
| <param name="inputValueClass" type="java.lang.Class"/> |
| <param name="outputKeyClass" type="java.lang.Class"/> |
| <param name="outputValueClass" type="java.lang.Class"/> |
| <param name="reducerConf" type="org.apache.hadoop.conf.Configuration"/> |
| <doc> |
| <![CDATA[Sets the {@link Reducer} class to the chain job. |
| |
| <p> |
| The key and values are passed from one element of the chain to the next, by |
| value. For the added Reducer the configuration given for it, |
| <code>reducerConf</code>, have precedence over the job's Configuration. |
| This precedence is in effect when the task is running. |
| </p> |
| <p> |
| IMPORTANT: There is no need to specify the output key/value classes for the |
| ChainReducer, this is done by the setReducer or the addMapper for the last |
| element in the chain. |
| </p> |
| |
| @param job |
| the job |
| @param klass |
| the Reducer class to add. |
| @param inputKeyClass |
| reducer input key class. |
| @param inputValueClass |
| reducer input value class. |
| @param outputKeyClass |
| reducer output key class. |
| @param outputValueClass |
| reducer output value class. |
| @param reducerConf |
| a configuration for the Reducer class. It is recommended to use a |
| Configuration without default values using the |
| <code>Configuration(boolean loadDefaults)</code> constructor with |
| FALSE.]]> |
| </doc> |
| </method> |
| <method name="addMapper" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.Job"/> |
| <param name="klass" type="java.lang.Class"/> |
| <param name="inputKeyClass" type="java.lang.Class"/> |
| <param name="inputValueClass" type="java.lang.Class"/> |
| <param name="outputKeyClass" type="java.lang.Class"/> |
| <param name="outputValueClass" type="java.lang.Class"/> |
| <param name="mapperConf" type="org.apache.hadoop.conf.Configuration"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Adds a {@link Mapper} class to the chain reducer. |
| |
| <p> |
| The key and values are passed from one element of the chain to the next, by |
| value For the added Mapper the configuration given for it, |
| <code>mapperConf</code>, have precedence over the job's Configuration. This |
| precedence is in effect when the task is running. |
| </p> |
| <p> |
| IMPORTANT: There is no need to specify the output key/value classes for the |
| ChainMapper, this is done by the addMapper for the last mapper in the |
| chain. |
| </p> |
| |
| @param job |
| The job. |
| @param klass |
| the Mapper class to add. |
| @param inputKeyClass |
| mapper input key class. |
| @param inputValueClass |
| mapper input value class. |
| @param outputKeyClass |
| mapper output key class. |
| @param outputValueClass |
| mapper output value class. |
| @param mapperConf |
| a configuration for the Mapper class. It is recommended to use a |
| Configuration without default values using the |
| <code>Configuration(boolean loadDefaults)</code> constructor with |
| FALSE.]]> |
| </doc> |
| </method> |
| <method name="setup" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/> |
| </method> |
| <method name="run" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </method> |
| <doc> |
| <![CDATA[The ChainReducer class allows to chain multiple Mapper classes after a |
| Reducer within the Reducer task. |
| |
| <p> |
| For each record output by the Reducer, the Mapper classes are invoked in a |
| chained (or piped) fashion. The output of the reducer becomes the input of |
| the first mapper and output of first becomes the input of the second, and so |
| on until the last Mapper, the output of the last Mapper will be written to |
| the task's output. |
| </p> |
| <p> |
| The key functionality of this feature is that the Mappers in the chain do not |
| need to be aware that they are executed after the Reducer or in a chain. This |
| enables having reusable specialized Mappers that can be combined to perform |
| composite operations within a single task. |
| </p> |
| <p> |
| Special care has to be taken when creating chains that the key/values output |
| by a Mapper are valid for the following Mapper in the chain. It is assumed |
| all Mappers and the Reduce in the chain use matching output and input key and |
| value classes as no conversion is done by the chaining code. |
| </p> |
| <p> Using the ChainMapper and the ChainReducer classes is possible to |
| compose Map/Reduce jobs that look like <code>[MAP+ / REDUCE MAP*]</code>. And |
| immediate benefit of this pattern is a dramatic reduction in disk IO. </p> |
| <p> |
| IMPORTANT: There is no need to specify the output key/value classes for the |
| ChainReducer, this is done by the setReducer or the addMapper for the last |
| element in the chain. |
| </p> |
| ChainReducer usage pattern: |
| <p> |
| |
| <pre> |
| ... |
| Job = new Job(conf); |
| .... |
| |
| Configuration reduceConf = new Configuration(false); |
| ... |
| ChainReducer.setReducer(job, XReduce.class, LongWritable.class, Text.class, |
| Text.class, Text.class, true, reduceConf); |
| |
| ChainReducer.addMapper(job, CMap.class, Text.class, Text.class, |
| LongWritable.class, Text.class, false, null); |
| |
| ChainReducer.addMapper(job, DMap.class, LongWritable.class, Text.class, |
| LongWritable.class, LongWritable.class, true, null); |
| |
| ... |
| |
| job.waitForCompletion(true); |
| ... |
| </pre>]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.chain.ChainReducer --> |
| </package> |
| <package name="org.apache.hadoop.mapreduce.lib.db"> |
| <!-- start class org.apache.hadoop.mapreduce.lib.db.BigDecimalSplitter --> |
| <class name="BigDecimalSplitter" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapreduce.lib.db.DBSplitter"/> |
| <constructor name="BigDecimalSplitter" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="split" return="java.util.List" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <param name="results" type="java.sql.ResultSet"/> |
| <param name="colName" type="java.lang.String"/> |
| <exception name="SQLException" type="java.sql.SQLException"/> |
| </method> |
| <method name="tryDivide" return="java.math.BigDecimal" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="numerator" type="java.math.BigDecimal"/> |
| <param name="denominator" type="java.math.BigDecimal"/> |
| <doc> |
| <![CDATA[Divide numerator by denominator. If impossible in exact mode, use rounding.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Implement DBSplitter over BigDecimal values.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.db.BigDecimalSplitter --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.db.BooleanSplitter --> |
| <class name="BooleanSplitter" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapreduce.lib.db.DBSplitter"/> |
| <constructor name="BooleanSplitter" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="split" return="java.util.List" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <param name="results" type="java.sql.ResultSet"/> |
| <param name="colName" type="java.lang.String"/> |
| <exception name="SQLException" type="java.sql.SQLException"/> |
| </method> |
| <doc> |
| <![CDATA[Implement DBSplitter over boolean values.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.db.BooleanSplitter --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.db.DataDrivenDBInputFormat --> |
| <class name="DataDrivenDBInputFormat" extends="org.apache.hadoop.mapreduce.lib.db.DBInputFormat" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.conf.Configurable"/> |
| <constructor name="DataDrivenDBInputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getSplitter" return="org.apache.hadoop.mapreduce.lib.db.DBSplitter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="sqlDataType" type="int"/> |
| <doc> |
| <![CDATA[@return the DBSplitter implementation to use to divide the table/query into InputSplits.]]> |
| </doc> |
| </method> |
| <method name="getSplits" return="java.util.List" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[{@inheritDoc}]]> |
| </doc> |
| </method> |
| <method name="getBoundingValsQuery" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return a query which returns the minimum and maximum values for |
| the order-by column. |
| |
| The min value should be in the first column, and the |
| max value should be in the second column of the results.]]> |
| </doc> |
| </method> |
| <method name="setBoundingQuery" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <param name="query" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the user-defined bounding query to use with a user-defined query. |
| This *must* include the substring "$CONDITIONS" |
| (DataDrivenDBInputFormat.SUBSTITUTE_TOKEN) inside the WHERE clause, |
| so that DataDrivenDBInputFormat knows where to insert split clauses. |
| e.g., "SELECT foo FROM mytable WHERE $CONDITIONS" |
| This will be expanded to something like: |
| SELECT foo FROM mytable WHERE (id > 100) AND (id < 250) |
| inside each split.]]> |
| </doc> |
| </method> |
| <method name="createDBRecordReader" return="org.apache.hadoop.mapreduce.RecordReader" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="split" type="org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit"/> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="setInput" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.Job"/> |
| <param name="inputClass" type="java.lang.Class"/> |
| <param name="tableName" type="java.lang.String"/> |
| <param name="conditions" type="java.lang.String"/> |
| <param name="splitBy" type="java.lang.String"/> |
| <param name="fieldNames" type="java.lang.String[]"/> |
| <doc> |
| <![CDATA[Note that the "orderBy" column is called the "splitBy" in this version. |
| We reuse the same field, but it's not strictly ordering it -- just partitioning |
| the results.]]> |
| </doc> |
| </method> |
| <method name="setInput" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.Job"/> |
| <param name="inputClass" type="java.lang.Class"/> |
| <param name="inputQuery" type="java.lang.String"/> |
| <param name="inputBoundingQuery" type="java.lang.String"/> |
| <doc> |
| <![CDATA[setInput() takes a custom query and a separate "bounding query" to use |
| instead of the custom "count query" used by DBInputFormat.]]> |
| </doc> |
| </method> |
| <field name="SUBSTITUTE_TOKEN" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[If users are providing their own query, the following string is expected to |
| appear in the WHERE clause, which will be substituted with a pair of conditions |
| on the input to allow input splits to parallelise the import.]]> |
| </doc> |
| </field> |
| <doc> |
| <![CDATA[A InputFormat that reads input data from an SQL table. |
| Operates like DBInputFormat, but instead of using LIMIT and OFFSET to demarcate |
| splits, it tries to generate WHERE clauses which separate the data into roughly |
| equivalent shards.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.db.DataDrivenDBInputFormat --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.db.DataDrivenDBRecordReader --> |
| <class name="DataDrivenDBRecordReader" extends="org.apache.hadoop.mapreduce.lib.db.DBRecordReader" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="DataDrivenDBRecordReader" type="org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit, java.lang.Class, org.apache.hadoop.conf.Configuration, java.sql.Connection, org.apache.hadoop.mapreduce.lib.db.DBConfiguration, java.lang.String, java.lang.String[], java.lang.String, java.lang.String" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="SQLException" type="java.sql.SQLException"/> |
| <doc> |
| <![CDATA[@param split The InputSplit to read data for |
| @throws SQLException]]> |
| </doc> |
| </constructor> |
| <method name="getSelectQuery" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns the query for selecting the records, |
| subclasses can override this for custom behaviour.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[A RecordReader that reads records from a SQL table, |
| using data-driven WHERE clause splits. |
| Emits LongWritables containing the record number as |
| key and DBWritables as value.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.db.DataDrivenDBRecordReader --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.db.DateSplitter --> |
| <class name="DateSplitter" extends="org.apache.hadoop.mapreduce.lib.db.IntegerSplitter" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="DateSplitter" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="split" return="java.util.List" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <param name="results" type="java.sql.ResultSet"/> |
| <param name="colName" type="java.lang.String"/> |
| <exception name="SQLException" type="java.sql.SQLException"/> |
| </method> |
| <method name="dateToString" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="d" type="java.util.Date"/> |
| <doc> |
| <![CDATA[Given a Date 'd', format it as a string for use in a SQL date |
| comparison operation. |
| @param d the date to format. |
| @return the string representing this date in SQL with any appropriate |
| quotation characters, etc.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Implement DBSplitter over date/time values. |
| Make use of logic from IntegerSplitter, since date/time are just longs |
| in Java.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.db.DateSplitter --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.db.DBConfiguration --> |
| <class name="DBConfiguration" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="DBConfiguration" type="org.apache.hadoop.conf.Configuration" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="configureDB" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <param name="driverClass" type="java.lang.String"/> |
| <param name="dbUrl" type="java.lang.String"/> |
| <param name="userName" type="java.lang.String"/> |
| <param name="passwd" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Sets the DB access related fields in the {@link Configuration}. |
| @param conf the configuration |
| @param driverClass JDBC Driver class name |
| @param dbUrl JDBC DB access URL. |
| @param userName DB access username |
| @param passwd DB access passwd]]> |
| </doc> |
| </method> |
| <method name="configureDB" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.conf.Configuration"/> |
| <param name="driverClass" type="java.lang.String"/> |
| <param name="dbUrl" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Sets the DB access related fields in the JobConf. |
| @param job the job |
| @param driverClass JDBC Driver class name |
| @param dbUrl JDBC DB access URL.]]> |
| </doc> |
| </method> |
| <method name="getConnection" return="java.sql.Connection" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/> |
| <exception name="SQLException" type="java.sql.SQLException"/> |
| <doc> |
| <![CDATA[Returns a connection object o the DB |
| @throws ClassNotFoundException |
| @throws SQLException]]> |
| </doc> |
| </method> |
| <method name="getConf" return="org.apache.hadoop.conf.Configuration" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getInputTableName" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="setInputTableName" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="tableName" type="java.lang.String"/> |
| </method> |
| <method name="getInputFieldNames" return="java.lang.String[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="setInputFieldNames" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="fieldNames" type="java.lang.String[]"/> |
| </method> |
| <method name="getInputConditions" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="setInputConditions" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conditions" type="java.lang.String"/> |
| </method> |
| <method name="getInputOrderBy" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="setInputOrderBy" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="orderby" type="java.lang.String"/> |
| </method> |
| <method name="getInputQuery" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="setInputQuery" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="query" type="java.lang.String"/> |
| </method> |
| <method name="getInputCountQuery" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="setInputCountQuery" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="query" type="java.lang.String"/> |
| </method> |
| <method name="setInputBoundingQuery" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="query" type="java.lang.String"/> |
| </method> |
| <method name="getInputBoundingQuery" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getInputClass" return="java.lang.Class" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="setInputClass" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="inputClass" type="java.lang.Class"/> |
| </method> |
| <method name="getOutputTableName" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="setOutputTableName" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="tableName" type="java.lang.String"/> |
| </method> |
| <method name="getOutputFieldNames" return="java.lang.String[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="setOutputFieldNames" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="fieldNames" type="java.lang.String[]"/> |
| </method> |
| <method name="setOutputFieldCount" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="fieldCount" type="int"/> |
| </method> |
| <method name="getOutputFieldCount" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <field name="DRIVER_CLASS_PROPERTY" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[The JDBC Driver class name]]> |
| </doc> |
| </field> |
| <field name="URL_PROPERTY" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[JDBC Database access URL]]> |
| </doc> |
| </field> |
| <field name="USERNAME_PROPERTY" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[User name to access the database]]> |
| </doc> |
| </field> |
| <field name="PASSWORD_PROPERTY" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Password to access the database]]> |
| </doc> |
| </field> |
| <field name="INPUT_TABLE_NAME_PROPERTY" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Input table name]]> |
| </doc> |
| </field> |
| <field name="INPUT_FIELD_NAMES_PROPERTY" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Field names in the Input table]]> |
| </doc> |
| </field> |
| <field name="INPUT_CONDITIONS_PROPERTY" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[WHERE clause in the input SELECT statement]]> |
| </doc> |
| </field> |
| <field name="INPUT_ORDER_BY_PROPERTY" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[ORDER BY clause in the input SELECT statement]]> |
| </doc> |
| </field> |
| <field name="INPUT_QUERY" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Whole input query, exluding LIMIT...OFFSET]]> |
| </doc> |
| </field> |
| <field name="INPUT_COUNT_QUERY" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Input query to get the count of records]]> |
| </doc> |
| </field> |
| <field name="INPUT_BOUNDING_QUERY" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Input query to get the max and min values of the jdbc.input.query]]> |
| </doc> |
| </field> |
| <field name="INPUT_CLASS_PROPERTY" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Class name implementing DBWritable which will hold input tuples]]> |
| </doc> |
| </field> |
| <field name="OUTPUT_TABLE_NAME_PROPERTY" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Output table name]]> |
| </doc> |
| </field> |
| <field name="OUTPUT_FIELD_NAMES_PROPERTY" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Field names in the Output table]]> |
| </doc> |
| </field> |
| <field name="OUTPUT_FIELD_COUNT_PROPERTY" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Number of fields in the Output table]]> |
| </doc> |
| </field> |
| <doc> |
| <![CDATA[A container for configuration property names for jobs with DB input/output. |
| |
| The job can be configured using the static methods in this class, |
| {@link DBInputFormat}, and {@link DBOutputFormat}. |
| Alternatively, the properties can be set in the configuration with proper |
| values. |
| |
| @see DBConfiguration#configureDB(Configuration, String, String, String, String) |
| @see DBInputFormat#setInput(Job, Class, String, String) |
| @see DBInputFormat#setInput(Job, Class, String, String, String, String...) |
| @see DBOutputFormat#setOutput(Job, String, String...)]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.db.DBConfiguration --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.db.DBInputFormat --> |
| <class name="DBInputFormat" extends="org.apache.hadoop.mapreduce.InputFormat" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.conf.Configurable"/> |
| <constructor name="DBInputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="setConf" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <doc> |
| <![CDATA[{@inheritDoc}]]> |
| </doc> |
| </method> |
| <method name="getConf" return="org.apache.hadoop.conf.Configuration" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getDBConf" return="org.apache.hadoop.mapreduce.lib.db.DBConfiguration" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getConnection" return="java.sql.Connection" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="createConnection" return="java.sql.Connection" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getDBProductName" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="createDBRecordReader" return="org.apache.hadoop.mapreduce.RecordReader" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="split" type="org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit"/> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[{@inheritDoc}]]> |
| </doc> |
| </method> |
| <method name="getSplits" return="java.util.List" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[{@inheritDoc}]]> |
| </doc> |
| </method> |
| <method name="getCountQuery" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns the query for getting the total number of rows, |
| subclasses can override this for custom behaviour.]]> |
| </doc> |
| </method> |
| <method name="setInput" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.Job"/> |
| <param name="inputClass" type="java.lang.Class"/> |
| <param name="tableName" type="java.lang.String"/> |
| <param name="conditions" type="java.lang.String"/> |
| <param name="orderBy" type="java.lang.String"/> |
| <param name="fieldNames" type="java.lang.String[]"/> |
| <doc> |
| <![CDATA[Initializes the map-part of the job with the appropriate input settings. |
| |
| @param job The map-reduce job |
| @param inputClass the class object implementing DBWritable, which is the |
| Java object holding tuple fields. |
| @param tableName The table to read data from |
| @param conditions The condition which to select data with, |
| eg. '(updated > 20070101 AND length > 0)' |
| @param orderBy the fieldNames in the orderBy clause. |
| @param fieldNames The field names in the table |
| @see #setInput(Job, Class, String, String)]]> |
| </doc> |
| </method> |
| <method name="setInput" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.Job"/> |
| <param name="inputClass" type="java.lang.Class"/> |
| <param name="inputQuery" type="java.lang.String"/> |
| <param name="inputCountQuery" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Initializes the map-part of the job with the appropriate input settings. |
| |
| @param job The map-reduce job |
| @param inputClass the class object implementing DBWritable, which is the |
| Java object holding tuple fields. |
| @param inputQuery the input query to select fields. Example : |
| "SELECT f1, f2, f3 FROM Mytable ORDER BY f1" |
| @param inputCountQuery the input query that returns |
| the number of records in the table. |
| Example : "SELECT COUNT(f1) FROM Mytable" |
| @see #setInput(Job, Class, String, String, String, String...)]]> |
| </doc> |
| </method> |
| <method name="closeConnection" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </method> |
| <field name="dbProductName" type="java.lang.String" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <field name="conditions" type="java.lang.String" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <field name="connection" type="java.sql.Connection" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <field name="tableName" type="java.lang.String" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <field name="fieldNames" type="java.lang.String[]" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <field name="dbConf" type="org.apache.hadoop.mapreduce.lib.db.DBConfiguration" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[A InputFormat that reads input data from an SQL table. |
| <p> |
| DBInputFormat emits LongWritables containing the record number as |
| key and DBWritables as value. |
| |
| The SQL query, and input class can be using one of the two |
| setInput methods.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.db.DBInputFormat --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.db.DBOutputFormat --> |
| <class name="DBOutputFormat" extends="org.apache.hadoop.mapreduce.OutputFormat" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="DBOutputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="checkOutputSpecs" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </method> |
| <method name="getOutputCommitter" return="org.apache.hadoop.mapreduce.OutputCommitter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </method> |
| <method name="constructQuery" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="table" type="java.lang.String"/> |
| <param name="fieldNames" type="java.lang.String[]"/> |
| <doc> |
| <![CDATA[Constructs the query used as the prepared statement to insert data. |
| |
| @param table |
| the table to insert into |
| @param fieldNames |
| the fields to insert into. If field names are unknown, supply an |
| array of nulls.]]> |
| </doc> |
| </method> |
| <method name="getRecordWriter" return="org.apache.hadoop.mapreduce.RecordWriter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[{@inheritDoc}]]> |
| </doc> |
| </method> |
| <method name="setOutput" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.Job"/> |
| <param name="tableName" type="java.lang.String"/> |
| <param name="fieldNames" type="java.lang.String[]"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Initializes the reduce-part of the job with |
| the appropriate output settings |
| |
| @param job The job |
| @param tableName The table to insert data into |
| @param fieldNames The field names in the table.]]> |
| </doc> |
| </method> |
| <method name="setOutput" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.Job"/> |
| <param name="tableName" type="java.lang.String"/> |
| <param name="fieldCount" type="int"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Initializes the reduce-part of the job |
| with the appropriate output settings |
| |
| @param job The job |
| @param tableName The table to insert data into |
| @param fieldCount the number of fields in the table.]]> |
| </doc> |
| </method> |
| <field name="dbProductName" type="java.lang.String" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[A OutputFormat that sends the reduce output to a SQL table. |
| <p> |
| {@link DBOutputFormat} accepts <key,value> pairs, where |
| key has a type extending DBWritable. Returned {@link RecordWriter} |
| writes <b>only the key</b> to the database with a batch SQL query.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.db.DBOutputFormat --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.db.DBRecordReader --> |
| <class name="DBRecordReader" extends="org.apache.hadoop.mapreduce.RecordReader" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="DBRecordReader" type="org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit, java.lang.Class, org.apache.hadoop.conf.Configuration, java.sql.Connection, org.apache.hadoop.mapreduce.lib.db.DBConfiguration, java.lang.String, java.lang.String[], java.lang.String" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="SQLException" type="java.sql.SQLException"/> |
| <doc> |
| <![CDATA[@param split The InputSplit to read data for |
| @throws SQLException]]> |
| </doc> |
| </constructor> |
| <method name="executeQuery" return="java.sql.ResultSet" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="query" type="java.lang.String"/> |
| <exception name="SQLException" type="java.sql.SQLException"/> |
| </method> |
| <method name="getSelectQuery" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns the query for selecting the records, |
| subclasses can override this for custom behaviour.]]> |
| </doc> |
| </method> |
| <method name="close" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[{@inheritDoc}]]> |
| </doc> |
| </method> |
| <method name="initialize" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </method> |
| <method name="getCurrentKey" return="org.apache.hadoop.io.LongWritable" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[{@inheritDoc}]]> |
| </doc> |
| </method> |
| <method name="getCurrentValue" return="T" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[{@inheritDoc}]]> |
| </doc> |
| </method> |
| <method name="createValue" return="T" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="deprecated, no comment"> |
| <doc> |
| <![CDATA[@deprecated]]> |
| </doc> |
| </method> |
| <method name="getPos" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="deprecated, no comment"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[@deprecated]]> |
| </doc> |
| </method> |
| <method name="next" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="Use {@link #nextKeyValue()}"> |
| <param name="key" type="org.apache.hadoop.io.LongWritable"/> |
| <param name="value" type="T"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[@deprecated Use {@link #nextKeyValue()}]]> |
| </doc> |
| </method> |
| <method name="getProgress" return="float" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[{@inheritDoc}]]> |
| </doc> |
| </method> |
| <method name="nextKeyValue" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[{@inheritDoc}]]> |
| </doc> |
| </method> |
| <method name="getSplit" return="org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getFieldNames" return="java.lang.String[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getTableName" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getConditions" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getDBConf" return="org.apache.hadoop.mapreduce.lib.db.DBConfiguration" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getConnection" return="java.sql.Connection" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getStatement" return="java.sql.PreparedStatement" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </method> |
| <method name="setStatement" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="stmt" type="java.sql.PreparedStatement"/> |
| </method> |
| <field name="statement" type="java.sql.PreparedStatement" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[A RecordReader that reads records from a SQL table. |
| Emits LongWritables containing the record number as |
| key and DBWritables as value.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.db.DBRecordReader --> |
| <!-- start interface org.apache.hadoop.mapreduce.lib.db.DBSplitter --> |
| <interface name="DBSplitter" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <method name="split" return="java.util.List" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <param name="results" type="java.sql.ResultSet"/> |
| <param name="colName" type="java.lang.String"/> |
| <exception name="SQLException" type="java.sql.SQLException"/> |
| <doc> |
| <![CDATA[Given a ResultSet containing one record (and already advanced to that record) |
| with two columns (a low value, and a high value, both of the same type), determine |
| a set of splits that span the given values.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[DBSplitter will generate DBInputSplits to use with DataDrivenDBInputFormat. |
| DataDrivenDBInputFormat needs to interpolate between two values that |
| represent the lowest and highest valued records to import. Depending |
| on the data-type of the column, this requires different behavior. |
| DBSplitter implementations should perform this for a data type or family |
| of data types.]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapreduce.lib.db.DBSplitter --> |
| <!-- start interface org.apache.hadoop.mapreduce.lib.db.DBWritable --> |
| <interface name="DBWritable" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <method name="write" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="statement" type="java.sql.PreparedStatement"/> |
| <exception name="SQLException" type="java.sql.SQLException"/> |
| <doc> |
| <![CDATA[Sets the fields of the object in the {@link PreparedStatement}. |
| @param statement the statement that the fields are put into. |
| @throws SQLException]]> |
| </doc> |
| </method> |
| <method name="readFields" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="resultSet" type="java.sql.ResultSet"/> |
| <exception name="SQLException" type="java.sql.SQLException"/> |
| <doc> |
| <![CDATA[Reads the fields of the object from the {@link ResultSet}. |
| @param resultSet the {@link ResultSet} to get the fields from. |
| @throws SQLException]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Objects that are read from/written to a database should implement |
| <code>DBWritable</code>. DBWritable, is similar to {@link Writable} |
| except that the {@link #write(PreparedStatement)} method takes a |
| {@link PreparedStatement}, and {@link #readFields(ResultSet)} |
| takes a {@link ResultSet}. |
| <p> |
| Implementations are responsible for writing the fields of the object |
| to PreparedStatement, and reading the fields of the object from the |
| ResultSet. |
| |
| <p>Example:</p> |
| If we have the following table in the database : |
| <pre> |
| CREATE TABLE MyTable ( |
| counter INTEGER NOT NULL, |
| timestamp BIGINT NOT NULL, |
| ); |
| </pre> |
| then we can read/write the tuples from/to the table with : |
| <p><pre> |
| public class MyWritable implements Writable, DBWritable { |
| // Some data |
| private int counter; |
| private long timestamp; |
| |
| //Writable#write() implementation |
| public void write(DataOutput out) throws IOException { |
| out.writeInt(counter); |
| out.writeLong(timestamp); |
| } |
| |
| //Writable#readFields() implementation |
| public void readFields(DataInput in) throws IOException { |
| counter = in.readInt(); |
| timestamp = in.readLong(); |
| } |
| |
| public void write(PreparedStatement statement) throws SQLException { |
| statement.setInt(1, counter); |
| statement.setLong(2, timestamp); |
| } |
| |
| public void readFields(ResultSet resultSet) throws SQLException { |
| counter = resultSet.getInt(1); |
| timestamp = resultSet.getLong(2); |
| } |
| } |
| </pre>]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapreduce.lib.db.DBWritable --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.db.FloatSplitter --> |
| <class name="FloatSplitter" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapreduce.lib.db.DBSplitter"/> |
| <constructor name="FloatSplitter" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="split" return="java.util.List" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <param name="results" type="java.sql.ResultSet"/> |
| <param name="colName" type="java.lang.String"/> |
| <exception name="SQLException" type="java.sql.SQLException"/> |
| </method> |
| <doc> |
| <![CDATA[Implement DBSplitter over floating-point values.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.db.FloatSplitter --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.db.IntegerSplitter --> |
| <class name="IntegerSplitter" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapreduce.lib.db.DBSplitter"/> |
| <constructor name="IntegerSplitter" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="split" return="java.util.List" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <param name="results" type="java.sql.ResultSet"/> |
| <param name="colName" type="java.lang.String"/> |
| <exception name="SQLException" type="java.sql.SQLException"/> |
| </method> |
| <doc> |
| <![CDATA[Implement DBSplitter over integer values.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.db.IntegerSplitter --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.db.MySQLDataDrivenDBRecordReader --> |
| <class name="MySQLDataDrivenDBRecordReader" extends="org.apache.hadoop.mapreduce.lib.db.DataDrivenDBRecordReader" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="MySQLDataDrivenDBRecordReader" type="org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit, java.lang.Class, org.apache.hadoop.conf.Configuration, java.sql.Connection, org.apache.hadoop.mapreduce.lib.db.DBConfiguration, java.lang.String, java.lang.String[], java.lang.String" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="SQLException" type="java.sql.SQLException"/> |
| </constructor> |
| <method name="executeQuery" return="java.sql.ResultSet" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="query" type="java.lang.String"/> |
| <exception name="SQLException" type="java.sql.SQLException"/> |
| </method> |
| <doc> |
| <![CDATA[A RecordReader that reads records from a MySQL table via DataDrivenDBRecordReader]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.db.MySQLDataDrivenDBRecordReader --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.db.MySQLDBRecordReader --> |
| <class name="MySQLDBRecordReader" extends="org.apache.hadoop.mapreduce.lib.db.DBRecordReader" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="MySQLDBRecordReader" type="org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit, java.lang.Class, org.apache.hadoop.conf.Configuration, java.sql.Connection, org.apache.hadoop.mapreduce.lib.db.DBConfiguration, java.lang.String, java.lang.String[], java.lang.String" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="SQLException" type="java.sql.SQLException"/> |
| </constructor> |
| <method name="executeQuery" return="java.sql.ResultSet" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="query" type="java.lang.String"/> |
| <exception name="SQLException" type="java.sql.SQLException"/> |
| </method> |
| <doc> |
| <![CDATA[A RecordReader that reads records from a MySQL table.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.db.MySQLDBRecordReader --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.db.OracleDataDrivenDBInputFormat --> |
| <class name="OracleDataDrivenDBInputFormat" extends="org.apache.hadoop.mapreduce.lib.db.DataDrivenDBInputFormat" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.conf.Configurable"/> |
| <constructor name="OracleDataDrivenDBInputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getSplitter" return="org.apache.hadoop.mapreduce.lib.db.DBSplitter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="sqlDataType" type="int"/> |
| <doc> |
| <![CDATA[@return the DBSplitter implementation to use to divide the table/query into InputSplits.]]> |
| </doc> |
| </method> |
| <method name="createDBRecordReader" return="org.apache.hadoop.mapreduce.RecordReader" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="split" type="org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit"/> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[A InputFormat that reads input data from an SQL table in an Oracle db.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.db.OracleDataDrivenDBInputFormat --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.db.OracleDataDrivenDBRecordReader --> |
| <class name="OracleDataDrivenDBRecordReader" extends="org.apache.hadoop.mapreduce.lib.db.DataDrivenDBRecordReader" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="OracleDataDrivenDBRecordReader" type="org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit, java.lang.Class, org.apache.hadoop.conf.Configuration, java.sql.Connection, org.apache.hadoop.mapreduce.lib.db.DBConfiguration, java.lang.String, java.lang.String[], java.lang.String" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="SQLException" type="java.sql.SQLException"/> |
| </constructor> |
| <doc> |
| <![CDATA[A RecordReader that reads records from a Oracle table via DataDrivenDBRecordReader]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.db.OracleDataDrivenDBRecordReader --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.db.OracleDateSplitter --> |
| <class name="OracleDateSplitter" extends="org.apache.hadoop.mapreduce.lib.db.DateSplitter" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="OracleDateSplitter" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="dateToString" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="d" type="java.util.Date"/> |
| </method> |
| <doc> |
| <![CDATA[Implement DBSplitter over date/time values returned by an Oracle db. |
| Make use of logic from DateSplitter, since this just needs to use |
| some Oracle-specific functions on the formatting end when generating |
| InputSplits.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.db.OracleDateSplitter --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.db.OracleDBRecordReader --> |
| <class name="OracleDBRecordReader" extends="org.apache.hadoop.mapreduce.lib.db.DBRecordReader" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="OracleDBRecordReader" type="org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit, java.lang.Class, org.apache.hadoop.conf.Configuration, java.sql.Connection, org.apache.hadoop.mapreduce.lib.db.DBConfiguration, java.lang.String, java.lang.String[], java.lang.String" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="SQLException" type="java.sql.SQLException"/> |
| </constructor> |
| <method name="getSelectQuery" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns the query for selecting the records from an Oracle DB.]]> |
| </doc> |
| </method> |
| <method name="setSessionTimeZone" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <param name="conn" type="java.sql.Connection"/> |
| <exception name="SQLException" type="java.sql.SQLException"/> |
| <doc> |
| <![CDATA[Set session time zone |
| @param conf The current configuration. |
| We read the 'oracle.sessionTimeZone' property from here. |
| @param conn The connection to alter the timezone properties of.]]> |
| </doc> |
| </method> |
| <field name="SESSION_TIMEZONE_KEY" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Configuration key to set to a timezone string.]]> |
| </doc> |
| </field> |
| <doc> |
| <![CDATA[A RecordReader that reads records from an Oracle SQL table.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.db.OracleDBRecordReader --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.db.TextSplitter --> |
| <class name="TextSplitter" extends="org.apache.hadoop.mapreduce.lib.db.BigDecimalSplitter" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="TextSplitter" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="split" return="java.util.List" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <param name="results" type="java.sql.ResultSet"/> |
| <param name="colName" type="java.lang.String"/> |
| <exception name="SQLException" type="java.sql.SQLException"/> |
| <doc> |
| <![CDATA[This method needs to determine the splits between two user-provided strings. |
| In the case where the user's strings are 'A' and 'Z', this is not hard; we |
| could create two splits from ['A', 'M') and ['M', 'Z'], 26 splits for strings |
| beginning with each letter, etc. |
| |
| If a user has provided us with the strings "Ham" and "Haze", however, we need |
| to create splits that differ in the third letter. |
| |
| The algorithm used is as follows: |
| Since there are 2**16 unicode characters, we interpret characters as digits in |
| base 65536. Given a string 's' containing characters s_0, s_1 .. s_n, we interpret |
| the string as the number: 0.s_0 s_1 s_2.. s_n in base 65536. Having mapped the |
| low and high strings into floating-point values, we then use the BigDecimalSplitter |
| to establish the even split points, then map the resulting floating point values |
| back into strings.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Implement DBSplitter over text strings.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.db.TextSplitter --> |
| </package> |
| <package name="org.apache.hadoop.mapreduce.lib.fieldsel"> |
| <!-- start class org.apache.hadoop.mapreduce.lib.fieldsel.FieldSelectionHelper --> |
| <class name="FieldSelectionHelper" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="FieldSelectionHelper" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <constructor name="FieldSelectionHelper" type="org.apache.hadoop.io.Text, org.apache.hadoop.io.Text" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="parseOutputKeyValueSpec" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="keyValueSpec" type="java.lang.String"/> |
| <param name="keyFieldList" type="java.util.List"/> |
| <param name="valueFieldList" type="java.util.List"/> |
| </method> |
| <method name="specToString" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="fieldSeparator" type="java.lang.String"/> |
| <param name="keyValueSpec" type="java.lang.String"/> |
| <param name="allValueFieldsFrom" type="int"/> |
| <param name="keyFieldList" type="java.util.List"/> |
| <param name="valueFieldList" type="java.util.List"/> |
| </method> |
| <method name="getKey" return="org.apache.hadoop.io.Text" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getValue" return="org.apache.hadoop.io.Text" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="extractOutputKeyValue" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="java.lang.String"/> |
| <param name="val" type="java.lang.String"/> |
| <param name="fieldSep" type="java.lang.String"/> |
| <param name="keyFieldList" type="java.util.List"/> |
| <param name="valFieldList" type="java.util.List"/> |
| <param name="allValueFieldsFrom" type="int"/> |
| <param name="ignoreKey" type="boolean"/> |
| <param name="isMap" type="boolean"/> |
| </method> |
| <field name="emptyText" type="org.apache.hadoop.io.Text" |
| transient="false" volatile="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="DATA_FIELD_SEPARATOR" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="DATA_FIELD_SEPERATOR" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="Use {@link #DATA_FIELD_SEPARATOR}"> |
| <doc> |
| <![CDATA[@deprecated Use {@link #DATA_FIELD_SEPARATOR}]]> |
| </doc> |
| </field> |
| <field name="MAP_OUTPUT_KEY_VALUE_SPEC" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="REDUCE_OUTPUT_KEY_VALUE_SPEC" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[This class implements a mapper/reducer class that can be used to perform |
| field selections in a manner similar to unix cut. The input data is treated |
| as fields separated by a user specified separator (the default value is |
| "\t"). The user can specify a list of fields that form the map output keys, |
| and a list of fields that form the map output values. If the inputformat is |
| TextInputFormat, the mapper will ignore the key to the map function. and the |
| fields are from the value only. Otherwise, the fields are the union of those |
| from the key and those from the value. |
| |
| The field separator is under attribute "mapreduce.fieldsel.data.field.separator" |
| |
| The map output field list spec is under attribute |
| "mapreduce.fieldsel.map.output.key.value.fields.spec". |
| The value is expected to be like "keyFieldsSpec:valueFieldsSpec" |
| key/valueFieldsSpec are comma (,) separated field spec: fieldSpec,fieldSpec,fieldSpec ... |
| Each field spec can be a simple number (e.g. 5) specifying a specific field, or a range |
| (like 2-5) to specify a range of fields, or an open range (like 3-) specifying all |
| the fields starting from field 3. The open range field spec applies value fields only. |
| They have no effect on the key fields. |
| |
| Here is an example: "4,3,0,1:6,5,1-3,7-". It specifies to use fields 4,3,0 and 1 for keys, |
| and use fields 6,5,1,2,3,7 and above for values. |
| |
| The reduce output field list spec is under attribute |
| "mapreduce.fieldsel.reduce.output.key.value.fields.spec". |
| |
| The reducer extracts output key/value pairs in a similar manner, except that |
| the key is never ignored.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.fieldsel.FieldSelectionHelper --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.fieldsel.FieldSelectionMapper --> |
| <class name="FieldSelectionMapper" extends="org.apache.hadoop.mapreduce.Mapper" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="FieldSelectionMapper" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="setup" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </method> |
| <method name="map" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="K"/> |
| <param name="val" type="V"/> |
| <param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[The identify function. Input key/value pair is written directly to output.]]> |
| </doc> |
| </method> |
| <field name="LOG" type="org.slf4j.Logger" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[This class implements a mapper class that can be used to perform |
| field selections in a manner similar to unix cut. The input data is treated |
| as fields separated by a user specified separator (the default value is |
| "\t"). The user can specify a list of fields that form the map output keys, |
| and a list of fields that form the map output values. If the inputformat is |
| TextInputFormat, the mapper will ignore the key to the map function. and the |
| fields are from the value only. Otherwise, the fields are the union of those |
| from the key and those from the value. |
| |
| The field separator is under attribute "mapreduce.fieldsel.data.field.separator" |
| |
| The map output field list spec is under attribute |
| "mapreduce.fieldsel.map.output.key.value.fields.spec". |
| The value is expected to be like |
| "keyFieldsSpec:valueFieldsSpec" key/valueFieldsSpec are comma (,) separated |
| field spec: fieldSpec,fieldSpec,fieldSpec ... Each field spec can be a |
| simple number (e.g. 5) specifying a specific field, or a range (like 2-5) |
| to specify a range of fields, or an open range (like 3-) specifying all |
| the fields starting from field 3. The open range field spec applies value |
| fields only. They have no effect on the key fields. |
| |
| Here is an example: "4,3,0,1:6,5,1-3,7-". It specifies to use fields |
| 4,3,0 and 1 for keys, and use fields 6,5,1,2,3,7 and above for values.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.fieldsel.FieldSelectionMapper --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.fieldsel.FieldSelectionReducer --> |
| <class name="FieldSelectionReducer" extends="org.apache.hadoop.mapreduce.Reducer" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="FieldSelectionReducer" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="setup" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </method> |
| <method name="reduce" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="org.apache.hadoop.io.Text"/> |
| <param name="values" type="java.lang.Iterable"/> |
| <param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </method> |
| <field name="LOG" type="org.slf4j.Logger" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[This class implements a reducer class that can be used to perform field |
| selections in a manner similar to unix cut. |
| |
| The input data is treated as fields separated by a user specified |
| separator (the default value is "\t"). The user can specify a list of |
| fields that form the reduce output keys, and a list of fields that form |
| the reduce output values. The fields are the union of those from the key |
| and those from the value. |
| |
| The field separator is under attribute "mapreduce.fieldsel.data.field.separator" |
| |
| The reduce output field list spec is under attribute |
| "mapreduce.fieldsel.reduce.output.key.value.fields.spec". |
| The value is expected to be like |
| "keyFieldsSpec:valueFieldsSpec" key/valueFieldsSpec are comma (,) |
| separated field spec: fieldSpec,fieldSpec,fieldSpec ... Each field spec |
| can be a simple number (e.g. 5) specifying a specific field, or a range |
| (like 2-5) to specify a range of fields, or an open range (like 3-) |
| specifying all the fields starting from field 3. The open range field |
| spec applies value fields only. They have no effect on the key fields. |
| |
| Here is an example: "4,3,0,1:6,5,1-3,7-". It specifies to use fields |
| 4,3,0 and 1 for keys, and use fields 6,5,1,2,3,7 and above for values.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.fieldsel.FieldSelectionReducer --> |
| </package> |
| <package name="org.apache.hadoop.mapreduce.lib.input"> |
| <!-- start class org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat --> |
| <class name="CombineFileInputFormat" extends="org.apache.hadoop.mapreduce.lib.input.FileInputFormat" |
| abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="CombineFileInputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[default constructor]]> |
| </doc> |
| </constructor> |
| <method name="setMaxSplitSize" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="maxSplitSize" type="long"/> |
| <doc> |
| <![CDATA[Specify the maximum size (in bytes) of each split. Each split is |
| approximately equal to the specified size.]]> |
| </doc> |
| </method> |
| <method name="setMinSplitSizeNode" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="minSplitSizeNode" type="long"/> |
| <doc> |
| <![CDATA[Specify the minimum size (in bytes) of each split per node. |
| This applies to data that is left over after combining data on a single |
| node into splits that are of maximum size specified by maxSplitSize. |
| This leftover data will be combined into its own split if its size |
| exceeds minSplitSizeNode.]]> |
| </doc> |
| </method> |
| <method name="setMinSplitSizeRack" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="minSplitSizeRack" type="long"/> |
| <doc> |
| <![CDATA[Specify the minimum size (in bytes) of each split per rack. |
| This applies to data that is left over after combining data on a single |
| rack into splits that are of maximum size specified by maxSplitSize. |
| This leftover data will be combined into its own split if its size |
| exceeds minSplitSizeRack.]]> |
| </doc> |
| </method> |
| <method name="createPool" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="filters" type="java.util.List"/> |
| <doc> |
| <![CDATA[Create a new pool and add the filters to it. |
| A split cannot have files from different pools.]]> |
| </doc> |
| </method> |
| <method name="createPool" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="filters" type="org.apache.hadoop.fs.PathFilter[]"/> |
| <doc> |
| <![CDATA[Create a new pool and add the filters to it. |
| A pathname can satisfy any one of the specified filters. |
| A split cannot have files from different pools.]]> |
| </doc> |
| </method> |
| <method name="isSplitable" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <param name="file" type="org.apache.hadoop.fs.Path"/> |
| </method> |
| <method name="getSplits" return="java.util.List" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[This is not implemented yet.]]> |
| </doc> |
| </method> |
| <method name="getFileBlockLocations" return="org.apache.hadoop.fs.BlockLocation[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="fs" type="org.apache.hadoop.fs.FileSystem"/> |
| <param name="stat" type="org.apache.hadoop.fs.FileStatus"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <field name="SPLIT_MINSIZE_PERNODE" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="SPLIT_MINSIZE_PERRACK" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[An abstract {@link InputFormat} that returns {@link CombineFileSplit}'s in |
| {@link InputFormat#getSplits(JobContext)} method. |
| |
| Splits are constructed from the files under the input paths. |
| A split cannot have files from different pools. |
| Each split returned may contain blocks from different files. |
| If a maxSplitSize is specified, then blocks on the same node are |
| combined to form a single split. Blocks that are left over are |
| then combined with other blocks in the same rack. |
| If maxSplitSize is not specified, then blocks from the same rack |
| are combined in a single split; no attempt is made to create |
| node-local splits. |
| If the maxSplitSize is equal to the block size, then this class |
| is similar to the default splitting behavior in Hadoop: each |
| block is a locally processed split. |
| Subclasses implement |
| {@link InputFormat#createRecordReader(InputSplit, TaskAttemptContext)} |
| to construct <code>RecordReader</code>'s for |
| <code>CombineFileSplit</code>'s. |
| |
| @see CombineFileSplit]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.input.CombineFileRecordReader --> |
| <class name="CombineFileRecordReader" extends="org.apache.hadoop.mapreduce.RecordReader" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="CombineFileRecordReader" type="org.apache.hadoop.mapreduce.lib.input.CombineFileSplit, org.apache.hadoop.mapreduce.TaskAttemptContext, java.lang.Class" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[A generic RecordReader that can hand out different recordReaders |
| for each chunk in the CombineFileSplit.]]> |
| </doc> |
| </constructor> |
| <method name="initialize" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </method> |
| <method name="nextKeyValue" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </method> |
| <method name="getCurrentKey" return="K" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </method> |
| <method name="getCurrentValue" return="V" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </method> |
| <method name="close" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getProgress" return="float" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[return progress based on the amount of data processed so far.]]> |
| </doc> |
| </method> |
| <method name="initNextRecordReader" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get the record reader for the next chunk in this CombineFileSplit.]]> |
| </doc> |
| </method> |
| <field name="split" type="org.apache.hadoop.mapreduce.lib.input.CombineFileSplit" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <field name="rrConstructor" type="java.lang.reflect.Constructor" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <field name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <field name="idx" type="int" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <field name="progress" type="long" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <field name="curReader" type="org.apache.hadoop.mapreduce.RecordReader" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[A generic RecordReader that can hand out different recordReaders |
| for each chunk in a {@link CombineFileSplit}. |
| A CombineFileSplit can combine data chunks from multiple files. |
| This class allows using different RecordReaders for processing |
| these data chunks from different files. |
| @see CombineFileSplit]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.input.CombineFileRecordReader --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.input.CombineFileRecordReaderWrapper --> |
| <class name="CombineFileRecordReaderWrapper" extends="org.apache.hadoop.mapreduce.RecordReader" |
| abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="CombineFileRecordReaderWrapper" type="org.apache.hadoop.mapreduce.lib.input.FileInputFormat, org.apache.hadoop.mapreduce.lib.input.CombineFileSplit, org.apache.hadoop.mapreduce.TaskAttemptContext, java.lang.Integer" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </constructor> |
| <method name="initialize" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </method> |
| <method name="nextKeyValue" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </method> |
| <method name="getCurrentKey" return="K" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </method> |
| <method name="getCurrentValue" return="V" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </method> |
| <method name="getProgress" return="float" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </method> |
| <method name="close" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[A wrapper class for a record reader that handles a single file split. It |
| delegates most of the methods to the wrapped instance. A concrete subclass |
| needs to provide a constructor that calls this parent constructor with the |
| appropriate input format. The subclass constructor must satisfy the specific |
| constructor signature that is required by |
| <code>CombineFileRecordReader</code>. |
| |
| Subclassing is needed to get a concrete record reader wrapper because of the |
| constructor requirement. |
| |
| @see CombineFileRecordReader |
| @see CombineFileInputFormat]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.input.CombineFileRecordReaderWrapper --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.input.CombineFileSplit --> |
| <class name="CombineFileSplit" extends="org.apache.hadoop.mapreduce.InputSplit" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.io.Writable"/> |
| <constructor name="CombineFileSplit" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[default constructor]]> |
| </doc> |
| </constructor> |
| <constructor name="CombineFileSplit" type="org.apache.hadoop.fs.Path[], long[], long[], java.lang.String[]" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <constructor name="CombineFileSplit" type="org.apache.hadoop.fs.Path[], long[]" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <constructor name="CombineFileSplit" type="org.apache.hadoop.mapreduce.lib.input.CombineFileSplit" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Copy constructor]]> |
| </doc> |
| </constructor> |
| <method name="getLength" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getStartOffsets" return="long[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns an array containing the start offsets of the files in the split]]> |
| </doc> |
| </method> |
| <method name="getLengths" return="long[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns an array containing the lengths of the files in the split]]> |
| </doc> |
| </method> |
| <method name="getOffset" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="i" type="int"/> |
| <doc> |
| <![CDATA[Returns the start offset of the i<sup>th</sup> Path]]> |
| </doc> |
| </method> |
| <method name="getLength" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="i" type="int"/> |
| <doc> |
| <![CDATA[Returns the length of the i<sup>th</sup> Path]]> |
| </doc> |
| </method> |
| <method name="getNumPaths" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns the number of Paths in the split]]> |
| </doc> |
| </method> |
| <method name="getPath" return="org.apache.hadoop.fs.Path" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="i" type="int"/> |
| <doc> |
| <![CDATA[Returns the i<sup>th</sup> Path]]> |
| </doc> |
| </method> |
| <method name="getPaths" return="org.apache.hadoop.fs.Path[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns all the Paths in the split]]> |
| </doc> |
| </method> |
| <method name="getLocations" return="java.lang.String[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Returns all the Paths where this input-split resides]]> |
| </doc> |
| </method> |
| <method name="readFields" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="in" type="java.io.DataInput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="write" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="out" type="java.io.DataOutput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="toString" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <doc> |
| <![CDATA[A sub-collection of input files. |
| |
| Unlike {@link FileSplit}, CombineFileSplit class does not represent |
| a split of a file, but a split of input files into smaller sets. |
| A split may contain blocks from different file but all |
| the blocks in the same split are probably local to some rack <br> |
| CombineFileSplit can be used to implement {@link RecordReader}'s, |
| with reading one record per file. |
| |
| @see FileSplit |
| @see CombineFileInputFormat]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.input.CombineFileSplit --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.input.CombineSequenceFileInputFormat --> |
| <class name="CombineSequenceFileInputFormat" extends="org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="CombineSequenceFileInputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[Input format that is a <code>CombineFileInputFormat</code>-equivalent for |
| <code>SequenceFileInputFormat</code>. |
| |
| @see CombineFileInputFormat]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.input.CombineSequenceFileInputFormat --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat --> |
| <class name="CombineTextInputFormat" extends="org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="CombineTextInputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[Input format that is a <code>CombineFileInputFormat</code>-equivalent for |
| <code>TextInputFormat</code>. |
| |
| @see CombineFileInputFormat]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.input.FileInputFormat --> |
| <class name="FileInputFormat" extends="org.apache.hadoop.mapreduce.InputFormat" |
| abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="FileInputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="setInputDirRecursive" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.Job"/> |
| <param name="inputDirRecursive" type="boolean"/> |
| <doc> |
| <![CDATA[@param job |
| the job to modify |
| @param inputDirRecursive]]> |
| </doc> |
| </method> |
| <method name="getInputDirRecursive" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <doc> |
| <![CDATA[@param job |
| the job to look at. |
| @return should the files to be read recursively?]]> |
| </doc> |
| </method> |
| <method name="getFormatMinSplitSize" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the lower bound on split size imposed by the format. |
| @return the number of bytes of the minimal split for this format]]> |
| </doc> |
| </method> |
| <method name="isSplitable" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <param name="filename" type="org.apache.hadoop.fs.Path"/> |
| <doc> |
| <![CDATA[Is the given filename splittable? Usually, true, but if the file is |
| stream compressed, it will not be. |
| |
| The default implementation in <code>FileInputFormat</code> always returns |
| true. Implementations that may deal with non-splittable files <i>must</i> |
| override this method. |
| |
| <code>FileInputFormat</code> implementations can override this and return |
| <code>false</code> to ensure that individual input files are never split-up |
| so that {@link Mapper}s process entire files. |
| |
| @param context the job context |
| @param filename the file name to check |
| @return is this file splitable?]]> |
| </doc> |
| </method> |
| <method name="setInputPathFilter" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.Job"/> |
| <param name="filter" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Set a PathFilter to be applied to the input paths for the map-reduce job. |
| @param job the job to modify |
| @param filter the PathFilter class use for filtering the input paths.]]> |
| </doc> |
| </method> |
| <method name="setMinInputSplitSize" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.Job"/> |
| <param name="size" type="long"/> |
| <doc> |
| <![CDATA[Set the minimum input split size |
| @param job the job to modify |
| @param size the minimum size]]> |
| </doc> |
| </method> |
| <method name="getMinSplitSize" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <doc> |
| <![CDATA[Get the minimum split size |
| @param job the job |
| @return the minimum number of bytes that can be in a split]]> |
| </doc> |
| </method> |
| <method name="setMaxInputSplitSize" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.Job"/> |
| <param name="size" type="long"/> |
| <doc> |
| <![CDATA[Set the maximum split size |
| @param job the job to modify |
| @param size the maximum split size]]> |
| </doc> |
| </method> |
| <method name="getMaxSplitSize" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <doc> |
| <![CDATA[Get the maximum split size. |
| @param context the job to look at. |
| @return the maximum number of bytes a split can include]]> |
| </doc> |
| </method> |
| <method name="getInputPathFilter" return="org.apache.hadoop.fs.PathFilter" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <doc> |
| <![CDATA[Get a PathFilter instance of the filter set for the input paths. |
| |
| @return the PathFilter instance set for the job, NULL if none has been set.]]> |
| </doc> |
| </method> |
| <method name="listStatus" return="java.util.List" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[List input directories. |
| Subclasses may override to, e.g., select only files matching a regular |
| expression. |
| |
| @param job the job to list input paths for |
| @return array of FileStatus objects |
| @throws IOException if zero items.]]> |
| </doc> |
| </method> |
| <method name="addInputPathRecursively" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="result" type="java.util.List"/> |
| <param name="fs" type="org.apache.hadoop.fs.FileSystem"/> |
| <param name="path" type="org.apache.hadoop.fs.Path"/> |
| <param name="inputFilter" type="org.apache.hadoop.fs.PathFilter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Add files in the input path recursively into the results. |
| @param result |
| The List to store all files. |
| @param fs |
| The FileSystem. |
| @param path |
| The input path. |
| @param inputFilter |
| The input filter that can be used to filter files/dirs. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="makeSplit" return="org.apache.hadoop.mapreduce.lib.input.FileSplit" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="file" type="org.apache.hadoop.fs.Path"/> |
| <param name="start" type="long"/> |
| <param name="length" type="long"/> |
| <param name="hosts" type="java.lang.String[]"/> |
| <doc> |
| <![CDATA[A factory that makes the split for this class. It can be overridden |
| by sub-classes to make sub-types]]> |
| </doc> |
| </method> |
| <method name="makeSplit" return="org.apache.hadoop.mapreduce.lib.input.FileSplit" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="file" type="org.apache.hadoop.fs.Path"/> |
| <param name="start" type="long"/> |
| <param name="length" type="long"/> |
| <param name="hosts" type="java.lang.String[]"/> |
| <param name="inMemoryHosts" type="java.lang.String[]"/> |
| <doc> |
| <![CDATA[A factory that makes the split for this class. It can be overridden |
| by sub-classes to make sub-types]]> |
| </doc> |
| </method> |
| <method name="getSplits" return="java.util.List" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Generate the list of files and make them into FileSplits. |
| @param job the job context |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="computeSplitSize" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="blockSize" type="long"/> |
| <param name="minSize" type="long"/> |
| <param name="maxSize" type="long"/> |
| </method> |
| <method name="getBlockIndex" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="blkLocations" type="org.apache.hadoop.fs.BlockLocation[]"/> |
| <param name="offset" type="long"/> |
| </method> |
| <method name="setInputPaths" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.Job"/> |
| <param name="commaSeparatedPaths" type="java.lang.String"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Sets the given comma separated paths as the list of inputs |
| for the map-reduce job. |
| |
| @param job the job |
| @param commaSeparatedPaths Comma separated paths to be set as |
| the list of inputs for the map-reduce job.]]> |
| </doc> |
| </method> |
| <method name="addInputPaths" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.Job"/> |
| <param name="commaSeparatedPaths" type="java.lang.String"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Add the given comma separated paths to the list of inputs for |
| the map-reduce job. |
| |
| @param job The job to modify |
| @param commaSeparatedPaths Comma separated paths to be added to |
| the list of inputs for the map-reduce job.]]> |
| </doc> |
| </method> |
| <method name="setInputPaths" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.Job"/> |
| <param name="inputPaths" type="org.apache.hadoop.fs.Path[]"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Set the array of {@link Path}s as the list of inputs |
| for the map-reduce job. |
| |
| @param job The job to modify |
| @param inputPaths the {@link Path}s of the input directories/files |
| for the map-reduce job.]]> |
| </doc> |
| </method> |
| <method name="addInputPath" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.Job"/> |
| <param name="path" type="org.apache.hadoop.fs.Path"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Add a {@link Path} to the list of inputs for the map-reduce job. |
| |
| @param job The {@link Job} to modify |
| @param path {@link Path} to be added to the list of inputs for |
| the map-reduce job.]]> |
| </doc> |
| </method> |
| <method name="getInputPaths" return="org.apache.hadoop.fs.Path[]" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <doc> |
| <![CDATA[Get the list of input {@link Path}s for the map-reduce job. |
| |
| @param context The job |
| @return the list of input {@link Path}s for the map-reduce job.]]> |
| </doc> |
| </method> |
| <field name="INPUT_DIR" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="SPLIT_MAXSIZE" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="SPLIT_MINSIZE" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="PATHFILTER_CLASS" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="NUM_INPUT_FILES" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="INPUT_DIR_RECURSIVE" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="INPUT_DIR_NONRECURSIVE_IGNORE_SUBDIRS" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="LIST_STATUS_NUM_THREADS" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="DEFAULT_LIST_STATUS_NUM_THREADS" type="int" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[A base class for file-based {@link InputFormat}s. |
| |
| <p><code>FileInputFormat</code> is the base class for all file-based |
| <code>InputFormat</code>s. This provides a generic implementation of |
| {@link #getSplits(JobContext)}. |
| |
| Implementations of <code>FileInputFormat</code> can also override the |
| {@link #isSplitable(JobContext, Path)} method to prevent input files |
| from being split-up in certain situations. Implementations that may |
| deal with non-splittable files <i>must</i> override this method, since |
| the default implementation assumes splitting is always possible.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.input.FileInputFormat --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.input.FileInputFormatCounter --> |
| <class name="FileInputFormatCounter" extends="java.lang.Enum" |
| abstract="false" |
| static="false" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <method name="values" return="org.apache.hadoop.mapreduce.lib.input.FileInputFormatCounter[]" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="valueOf" return="org.apache.hadoop.mapreduce.lib.input.FileInputFormatCounter" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="name" type="java.lang.String"/> |
| </method> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.input.FileInputFormatCounter --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.input.FileSplit --> |
| <class name="FileSplit" extends="org.apache.hadoop.mapreduce.InputSplit" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.io.Writable"/> |
| <constructor name="FileSplit" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <constructor name="FileSplit" type="org.apache.hadoop.fs.Path, long, long, java.lang.String[]" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Constructs a split with host information |
| |
| @param file the file name |
| @param start the position of the first byte in the file to process |
| @param length the number of bytes in the file to process |
| @param hosts the list of hosts containing the block, possibly null]]> |
| </doc> |
| </constructor> |
| <constructor name="FileSplit" type="org.apache.hadoop.fs.Path, long, long, java.lang.String[], java.lang.String[]" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Constructs a split with host and cached-blocks information |
| |
| @param file the file name |
| @param start the position of the first byte in the file to process |
| @param length the number of bytes in the file to process |
| @param hosts the list of hosts containing the block |
| @param inMemoryHosts the list of hosts containing the block in memory]]> |
| </doc> |
| </constructor> |
| <method name="getPath" return="org.apache.hadoop.fs.Path" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[The file containing this split's data.]]> |
| </doc> |
| </method> |
| <method name="getStart" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[The position of the first byte in the file to process.]]> |
| </doc> |
| </method> |
| <method name="getLength" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[The number of bytes in the file to process.]]> |
| </doc> |
| </method> |
| <method name="toString" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="write" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="out" type="java.io.DataOutput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="readFields" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="in" type="java.io.DataInput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getLocations" return="java.lang.String[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getLocationInfo" return="org.apache.hadoop.mapred.SplitLocationInfo[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[A section of an input file. Returned by {@link |
| InputFormat#getSplits(JobContext)} and passed to |
| {@link InputFormat#createRecordReader(InputSplit,TaskAttemptContext)}.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.input.FileSplit --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.input.FixedLengthInputFormat --> |
| <class name="FixedLengthInputFormat" extends="org.apache.hadoop.mapreduce.lib.input.FileInputFormat" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="FixedLengthInputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="setRecordLength" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <param name="recordLength" type="int"/> |
| <doc> |
| <![CDATA[Set the length of each record |
| @param conf configuration |
| @param recordLength the length of a record]]> |
| </doc> |
| </method> |
| <method name="getRecordLength" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <doc> |
| <![CDATA[Get record length value |
| @param conf configuration |
| @return the record length, zero means none was set]]> |
| </doc> |
| </method> |
| <method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </method> |
| <method name="isSplitable" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <param name="file" type="org.apache.hadoop.fs.Path"/> |
| </method> |
| <field name="FIXED_RECORD_LENGTH" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[FixedLengthInputFormat is an input format used to read input files |
| which contain fixed length records. The content of a record need not be |
| text. It can be arbitrary binary data. Users must configure the record |
| length property by calling: |
| FixedLengthInputFormat.setRecordLength(conf, recordLength);<br><br> or |
| conf.setInt(FixedLengthInputFormat.FIXED_RECORD_LENGTH, recordLength); |
| <br><br> |
| @see FixedLengthRecordReader]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.input.FixedLengthInputFormat --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.input.InvalidInputException --> |
| <class name="InvalidInputException" extends="java.io.IOException" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="InvalidInputException" type="java.util.List" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Create the exception with the given list. |
| @param probs the list of problems to report. this list is not copied.]]> |
| </doc> |
| </constructor> |
| <method name="getProblems" return="java.util.List" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the complete list of the problems reported. |
| @return the list of problems, which must not be modified]]> |
| </doc> |
| </method> |
| <method name="getMessage" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get a summary message of the problems found. |
| @return the concatenated messages from all of the problems.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[This class wraps a list of problems with the input, so that the user |
| can get a list of problems together instead of finding and fixing them one |
| by one.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.input.InvalidInputException --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.input.KeyValueLineRecordReader --> |
| <class name="KeyValueLineRecordReader" extends="org.apache.hadoop.mapreduce.RecordReader" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="KeyValueLineRecordReader" type="org.apache.hadoop.conf.Configuration" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </constructor> |
| <method name="getKeyClass" return="java.lang.Class" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="initialize" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="genericSplit" type="org.apache.hadoop.mapreduce.InputSplit"/> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="findSeparator" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="utf" type="byte[]"/> |
| <param name="start" type="int"/> |
| <param name="length" type="int"/> |
| <param name="sep" type="byte"/> |
| </method> |
| <method name="setKeyValue" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="org.apache.hadoop.io.Text"/> |
| <param name="value" type="org.apache.hadoop.io.Text"/> |
| <param name="line" type="byte[]"/> |
| <param name="lineLen" type="int"/> |
| <param name="pos" type="int"/> |
| </method> |
| <method name="nextKeyValue" return="boolean" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Read key/value pair in a line.]]> |
| </doc> |
| </method> |
| <method name="getCurrentKey" return="org.apache.hadoop.io.Text" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getCurrentValue" return="org.apache.hadoop.io.Text" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getProgress" return="float" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="close" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <field name="KEY_VALUE_SEPARATOR" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="KEY_VALUE_SEPERATOR" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="Use {@link #KEY_VALUE_SEPARATOR}"> |
| <doc> |
| <![CDATA[@deprecated Use {@link #KEY_VALUE_SEPARATOR}]]> |
| </doc> |
| </field> |
| <doc> |
| <![CDATA[This class treats a line in the input as a key/value pair separated by a |
| separator character. The separator can be specified in config file |
| under the attribute name mapreduce.input.keyvaluelinerecordreader.key.value.separator. The default |
| separator is the tab character ('\t').]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.input.KeyValueLineRecordReader --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat --> |
| <class name="KeyValueTextInputFormat" extends="org.apache.hadoop.mapreduce.lib.input.FileInputFormat" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="KeyValueTextInputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="isSplitable" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <param name="file" type="org.apache.hadoop.fs.Path"/> |
| </method> |
| <method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="genericSplit" type="org.apache.hadoop.mapreduce.InputSplit"/> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[An {@link InputFormat} for plain text files. Files are broken into lines. |
| Either line feed or carriage-return are used to signal end of line. |
| Each line is divided into key and value parts by a separator byte. If no |
| such a byte exists, the key will be the entire line and value will be empty. |
| The separator byte can be specified in config file under the attribute name |
| mapreduce.input.keyvaluelinerecordreader.key.value.separator. The default |
| is the tab character ('\t').]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.input.MultipleInputs --> |
| <class name="MultipleInputs" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="MultipleInputs" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="addInputPath" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.Job"/> |
| <param name="path" type="org.apache.hadoop.fs.Path"/> |
| <param name="inputFormatClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Add a {@link Path} with a custom {@link InputFormat} to the list of |
| inputs for the map-reduce job. |
| |
| @param job The {@link Job} |
| @param path {@link Path} to be added to the list of inputs for the job |
| @param inputFormatClass {@link InputFormat} class to use for this path]]> |
| </doc> |
| </method> |
| <method name="addInputPath" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.Job"/> |
| <param name="path" type="org.apache.hadoop.fs.Path"/> |
| <param name="inputFormatClass" type="java.lang.Class"/> |
| <param name="mapperClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Add a {@link Path} with a custom {@link InputFormat} and |
| {@link Mapper} to the list of inputs for the map-reduce job. |
| |
| @param job The {@link Job} |
| @param path {@link Path} to be added to the list of inputs for the job |
| @param inputFormatClass {@link InputFormat} class to use for this path |
| @param mapperClass {@link Mapper} class to use for this path]]> |
| </doc> |
| </method> |
| <field name="DIR_FORMATS" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="DIR_MAPPERS" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[This class supports MapReduce jobs that have multiple input paths with |
| a different {@link InputFormat} and {@link Mapper} for each path]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.input.MultipleInputs --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.input.NLineInputFormat --> |
| <class name="NLineInputFormat" extends="org.apache.hadoop.mapreduce.lib.input.FileInputFormat" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="NLineInputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="genericSplit" type="org.apache.hadoop.mapreduce.InputSplit"/> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getSplits" return="java.util.List" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Logically splits the set of input files for the job, splits N lines |
| of the input as one split. |
| |
| @see FileInputFormat#getSplits(JobContext)]]> |
| </doc> |
| </method> |
| <method name="getSplitsForFile" return="java.util.List" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="status" type="org.apache.hadoop.fs.FileStatus"/> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <param name="numLinesPerSplit" type="int"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="createFileSplit" return="org.apache.hadoop.mapreduce.lib.input.FileSplit" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="fileName" type="org.apache.hadoop.fs.Path"/> |
| <param name="begin" type="long"/> |
| <param name="length" type="long"/> |
| <doc> |
| <![CDATA[NLineInputFormat uses LineRecordReader, which always reads |
| (and consumes) at least one character out of its upper split |
| boundary. So to make sure that each mapper gets N lines, we |
| move back the upper split limits of each split |
| by one character here. |
| @param fileName Path of file |
| @param begin the position of the first byte in the file to process |
| @param length number of bytes in InputSplit |
| @return FileSplit]]> |
| </doc> |
| </method> |
| <method name="setNumLinesPerSplit" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.Job"/> |
| <param name="numLines" type="int"/> |
| <doc> |
| <![CDATA[Set the number of lines per split |
| @param job the job to modify |
| @param numLines the number of lines per split]]> |
| </doc> |
| </method> |
| <method name="getNumLinesPerSplit" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <doc> |
| <![CDATA[Get the number of lines per split |
| @param job the job |
| @return the number of lines per split]]> |
| </doc> |
| </method> |
| <field name="LINES_PER_MAP" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[NLineInputFormat which splits N lines of input as one split. |
| |
| In many "pleasantly" parallel applications, each process/mapper |
| processes the same input file (s), but with computations are |
| controlled by different parameters.(Referred to as "parameter sweeps"). |
| One way to achieve this, is to specify a set of parameters |
| (one set per line) as input in a control file |
| (which is the input path to the map-reduce application, |
| where as the input dataset is specified |
| via a config variable in JobConf.). |
| |
| The NLineInputFormat can be used in such applications, that splits |
| the input file such that by default, one line is fed as |
| a value to one map task, and key is the offset. |
| i.e. (k,v) is (LongWritable, Text). |
| The location hints will span the whole mapred cluster.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.input.NLineInputFormat --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.input.SequenceFileAsBinaryInputFormat --> |
| <class name="SequenceFileAsBinaryInputFormat" extends="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="SequenceFileAsBinaryInputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[InputFormat reading keys, values from SequenceFiles in binary (raw) |
| format.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.input.SequenceFileAsBinaryInputFormat --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.input.SequenceFileAsTextInputFormat --> |
| <class name="SequenceFileAsTextInputFormat" extends="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="SequenceFileAsTextInputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[This class is similar to SequenceFileInputFormat, except it generates |
| SequenceFileAsTextRecordReader which converts the input keys and values |
| to their String forms by calling toString() method.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.input.SequenceFileAsTextInputFormat --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.input.SequenceFileAsTextRecordReader --> |
| <class name="SequenceFileAsTextRecordReader" extends="org.apache.hadoop.mapreduce.RecordReader" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="SequenceFileAsTextRecordReader" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </constructor> |
| <method name="initialize" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </method> |
| <method name="getCurrentKey" return="org.apache.hadoop.io.Text" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </method> |
| <method name="getCurrentValue" return="org.apache.hadoop.io.Text" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </method> |
| <method name="nextKeyValue" return="boolean" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Read key/value pair in a line.]]> |
| </doc> |
| </method> |
| <method name="getProgress" return="float" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </method> |
| <method name="close" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[This class converts the input keys and values to their String forms by |
| calling toString() method. This class to SequenceFileAsTextInputFormat |
| class is as LineRecordReader class to TextInputFormat class.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.input.SequenceFileAsTextRecordReader --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFilter --> |
| <class name="SequenceFileInputFilter" extends="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="SequenceFileInputFilter" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Create a record reader for the given split |
| @param split file split |
| @param context the task-attempt context |
| @return RecordReader]]> |
| </doc> |
| </method> |
| <method name="setFilterClass" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.Job"/> |
| <param name="filterClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[set the filter class |
| |
| @param job The job |
| @param filterClass filter class]]> |
| </doc> |
| </method> |
| <field name="LOG" type="org.slf4j.Logger" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="FILTER_CLASS" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="FILTER_FREQUENCY" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="FILTER_REGEX" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[A class that allows a map/red job to work on a sample of sequence files. |
| The sample is decided by the filter class set by the job.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFilter --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat --> |
| <class name="SequenceFileInputFormat" extends="org.apache.hadoop.mapreduce.lib.input.FileInputFormat" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="SequenceFileInputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getFormatMinSplitSize" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </method> |
| <method name="listStatus" return="java.util.List" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[An {@link InputFormat} for {@link SequenceFile}s.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.input.SequenceFileRecordReader --> |
| <class name="SequenceFileRecordReader" extends="org.apache.hadoop.mapreduce.RecordReader" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="SequenceFileRecordReader" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="initialize" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </method> |
| <method name="nextKeyValue" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </method> |
| <method name="getCurrentKey" return="K" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getCurrentValue" return="V" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getProgress" return="float" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Return the progress within the input split |
| @return 0.0 to 1.0 of the input byte range]]> |
| </doc> |
| </method> |
| <method name="close" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <field name="conf" type="org.apache.hadoop.conf.Configuration" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[An {@link RecordReader} for {@link SequenceFile}s.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.input.SequenceFileRecordReader --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.input.TextInputFormat --> |
| <class name="TextInputFormat" extends="org.apache.hadoop.mapreduce.lib.input.FileInputFormat" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="TextInputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| </method> |
| <method name="isSplitable" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <param name="file" type="org.apache.hadoop.fs.Path"/> |
| </method> |
| <doc> |
| <![CDATA[An {@link InputFormat} for plain text files. Files are broken into lines. |
| Either linefeed or carriage-return are used to signal end of line. Keys are |
| the position in the file, and values are the line of text..]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.input.TextInputFormat --> |
| </package> |
| <package name="org.apache.hadoop.mapreduce.lib.jobcontrol"> |
| <!-- start class org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob --> |
| <class name="ControlledJob" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="ControlledJob" type="org.apache.hadoop.mapreduce.Job, java.util.List" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Construct a job. |
| @param job a mapreduce job to be executed. |
| @param dependingJobs an array of jobs the current job depends on]]> |
| </doc> |
| </constructor> |
| <constructor name="ControlledJob" type="org.apache.hadoop.conf.Configuration" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Construct a job. |
| |
| @param conf mapred job configuration representing a job to be executed. |
| @throws IOException]]> |
| </doc> |
| </constructor> |
| <method name="toString" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getJobName" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the job name of this job]]> |
| </doc> |
| </method> |
| <method name="setJobName" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobName" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the job name for this job. |
| @param jobName the job name]]> |
| </doc> |
| </method> |
| <method name="getJobID" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the job ID of this job assigned by JobControl]]> |
| </doc> |
| </method> |
| <method name="setJobID" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="id" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the job ID for this job. |
| @param id the job ID]]> |
| </doc> |
| </method> |
| <method name="getMapredJobId" return="org.apache.hadoop.mapreduce.JobID" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the mapred ID of this job as assigned by the mapred framework.]]> |
| </doc> |
| </method> |
| <method name="getJob" return="org.apache.hadoop.mapreduce.Job" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the mapreduce job]]> |
| </doc> |
| </method> |
| <method name="setJob" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.Job"/> |
| <doc> |
| <![CDATA[Set the mapreduce job |
| @param job the mapreduce job for this job.]]> |
| </doc> |
| </method> |
| <method name="getJobState" return="org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob.State" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the state of this job]]> |
| </doc> |
| </method> |
| <method name="setJobState" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="state" type="org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob.State"/> |
| <doc> |
| <![CDATA[Set the state for this job. |
| @param state the new state for this job.]]> |
| </doc> |
| </method> |
| <method name="getMessage" return="java.lang.String" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the message of this job]]> |
| </doc> |
| </method> |
| <method name="setMessage" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="message" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the message for this job. |
| @param message the message for this job.]]> |
| </doc> |
| </method> |
| <method name="getDependentJobs" return="java.util.List" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the depending jobs of this job]]> |
| </doc> |
| </method> |
| <method name="addDependingJob" return="boolean" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="dependingJob" type="org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob"/> |
| <doc> |
| <![CDATA[Add a job to this jobs' dependency list. |
| Dependent jobs can only be added while a Job |
| is waiting to run, not during or afterwards. |
| |
| @param dependingJob Job that this Job depends on. |
| @return <tt>true</tt> if the Job was added.]]> |
| </doc> |
| </method> |
| <method name="isCompleted" return="boolean" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return true if this job is in a complete state]]> |
| </doc> |
| </method> |
| <method name="isReady" return="boolean" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return true if this job is in READY state]]> |
| </doc> |
| </method> |
| <method name="killJob" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </method> |
| <method name="failJob" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="message" type="java.lang.String"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </method> |
| <method name="submit" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Submit this job to mapred. The state becomes RUNNING if submission |
| is successful, FAILED otherwise.]]> |
| </doc> |
| </method> |
| <field name="CREATE_DIR" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[This class encapsulates a MapReduce job and its dependency. It monitors |
| the states of the depending jobs and updates the state of this job. |
| A job starts in the WAITING state. If it does not have any depending jobs, |
| or all of the depending jobs are in SUCCESS state, then the job state |
| will become READY. If any depending jobs fail, the job will fail too. |
| When in READY state, the job can be submitted to Hadoop for execution, with |
| the state changing into RUNNING state. From RUNNING state, the job |
| can get into SUCCESS or FAILED state, depending |
| the status of the job execution.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl --> |
| <class name="JobControl" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="java.lang.Runnable"/> |
| <constructor name="JobControl" type="java.lang.String" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Construct a job control for a group of jobs. |
| @param groupName a name identifying this group]]> |
| </doc> |
| </constructor> |
| <method name="getWaitingJobList" return="java.util.List" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the jobs in the waiting state]]> |
| </doc> |
| </method> |
| <method name="getRunningJobList" return="java.util.List" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the jobs in the running state]]> |
| </doc> |
| </method> |
| <method name="getReadyJobsList" return="java.util.List" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the jobs in the ready state]]> |
| </doc> |
| </method> |
| <method name="getSuccessfulJobList" return="java.util.List" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the jobs in the success state]]> |
| </doc> |
| </method> |
| <method name="getFailedJobList" return="java.util.List" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="addJob" return="java.lang.String" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="aJob" type="org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob"/> |
| <doc> |
| <![CDATA[Add a new controlled job. |
| @param aJob the new controlled job]]> |
| </doc> |
| </method> |
| <method name="addJob" return="java.lang.String" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="aJob" type="org.apache.hadoop.mapred.jobcontrol.Job"/> |
| <doc> |
| <![CDATA[Add a new job. |
| @param aJob the new job]]> |
| </doc> |
| </method> |
| <method name="addJobCollection" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobs" type="java.util.Collection"/> |
| <doc> |
| <![CDATA[Add a collection of jobs |
| |
| @param jobs]]> |
| </doc> |
| </method> |
| <method name="getThreadState" return="org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl.ThreadState" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the thread state]]> |
| </doc> |
| </method> |
| <method name="stop" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[set the thread state to STOPPING so that the |
| thread will stop when it wakes up.]]> |
| </doc> |
| </method> |
| <method name="suspend" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[suspend the running thread]]> |
| </doc> |
| </method> |
| <method name="resume" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[resume the suspended thread]]> |
| </doc> |
| </method> |
| <method name="allFinished" return="boolean" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="run" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[The main loop for the thread. |
| The loop does the following: |
| Check the states of the running jobs |
| Update the states of waiting jobs |
| Submit the jobs in ready state]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[This class encapsulates a set of MapReduce jobs and its dependency. |
| |
| It tracks the states of the jobs by placing them into different tables |
| according to their states. |
| |
| This class provides APIs for the client app to add a job to the group |
| and to get the jobs in the group in different states. When a job is |
| added, an ID unique to the group is assigned to the job. |
| |
| This class has a thread that submits jobs when they become ready, |
| monitors the states of the running jobs, and updates the states of jobs |
| based on the state changes of their depending jobs states. The class |
| provides APIs for suspending/resuming the thread, and |
| for stopping the thread.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl --> |
| </package> |
| <package name="org.apache.hadoop.mapreduce.lib.join"> |
| <!-- start class org.apache.hadoop.mapreduce.lib.join.ArrayListBackedIterator --> |
| <class name="ArrayListBackedIterator" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapreduce.lib.join.ResetableIterator"/> |
| <constructor name="ArrayListBackedIterator" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <constructor name="ArrayListBackedIterator" type="java.util.ArrayList" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="hasNext" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="next" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="val" type="X"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="replay" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="val" type="X"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="reset" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="add" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="item" type="X"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="close" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="clear" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <doc> |
| <![CDATA[This class provides an implementation of ResetableIterator. The |
| implementation uses an {@link java.util.ArrayList} to store elements |
| added to it, replaying them as requested. |
| Prefer {@link StreamBackedIterator}.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.join.ArrayListBackedIterator --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.join.ComposableInputFormat --> |
| <class name="ComposableInputFormat" extends="org.apache.hadoop.mapreduce.InputFormat" |
| abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="ComposableInputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="createRecordReader" return="org.apache.hadoop.mapreduce.lib.join.ComposableRecordReader" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </method> |
| <doc> |
| <![CDATA[Refinement of InputFormat requiring implementors to provide |
| ComposableRecordReader instead of RecordReader.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.join.ComposableInputFormat --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.join.ComposableRecordReader --> |
| <class name="ComposableRecordReader" extends="org.apache.hadoop.mapreduce.RecordReader" |
| abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="java.lang.Comparable"/> |
| <constructor name="ComposableRecordReader" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <doc> |
| <![CDATA[Additional operations required of a RecordReader to participate in a join.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.join.ComposableRecordReader --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.join.CompositeInputFormat --> |
| <class name="CompositeInputFormat" extends="org.apache.hadoop.mapreduce.InputFormat" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="CompositeInputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="setFormat" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Interpret a given string as a composite expression. |
| {@code |
| func ::= <ident>([<func>,]*<func>) |
| func ::= tbl(<class>,"<path>") |
| class ::= @see java.lang.Class#forName(java.lang.String) |
| path ::= @see org.apache.hadoop.fs.Path#Path(java.lang.String) |
| } |
| Reads expression from the <tt>mapreduce.join.expr</tt> property and |
| user-supplied join types from <tt>mapreduce.join.define.<ident></tt> |
| types. Paths supplied to <tt>tbl</tt> are given as input paths to the |
| InputFormat class listed. |
| @see #compose(java.lang.String, java.lang.Class, java.lang.String...)]]> |
| </doc> |
| </method> |
| <method name="addDefaults" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Adds the default set of identifiers to the parser.]]> |
| </doc> |
| </method> |
| <method name="getSplits" return="java.util.List" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Build a CompositeInputSplit from the child InputFormats by assigning the |
| ith split from each child to the ith composite split.]]> |
| </doc> |
| </method> |
| <method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/> |
| <param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Construct a CompositeRecordReader for the children of this InputFormat |
| as defined in the init expression. |
| The outermost join need only be composable, not necessarily a composite. |
| Mandating TupleWritable isn't strictly correct.]]> |
| </doc> |
| </method> |
| <method name="compose" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="inf" type="java.lang.Class"/> |
| <param name="path" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Convenience method for constructing composite formats. |
| Given InputFormat class (inf), path (p) return: |
| {@code tbl(<inf>, <p>) }]]> |
| </doc> |
| </method> |
| <method name="compose" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="op" type="java.lang.String"/> |
| <param name="inf" type="java.lang.Class"/> |
| <param name="path" type="java.lang.String[]"/> |
| <doc> |
| <![CDATA[Convenience method for constructing composite formats. |
| Given operation (op), Object class (inf), set of paths (p) return: |
| {@code <op>(tbl(<inf>,<p1>),tbl(<inf>,<p2>),...,tbl(<inf>,<pn>)) }]]> |
| </doc> |
| </method> |
| <method name="compose" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="op" type="java.lang.String"/> |
| <param name="inf" type="java.lang.Class"/> |
| <param name="path" type="org.apache.hadoop.fs.Path[]"/> |
| <doc> |
| <![CDATA[Convenience method for constructing composite formats. |
| Given operation (op), Object class (inf), set of paths (p) return: |
| {@code <op>(tbl(<inf>,<p1>),tbl(<inf>,<p2>),...,tbl(<inf>,<pn>)) }]]> |
| </doc> |
| </method> |
| <field name="JOIN_EXPR" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="JOIN_COMPARATOR" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[An InputFormat capable of performing joins over a set of data sources sorted |
| and partitioned the same way. |
| |
| A user may define new join types by setting the property |
| <tt>mapreduce.join.define.<ident></tt> to a classname. |
| In the expression <tt>mapreduce.join.expr</tt>, the identifier will be |
| assumed to be a ComposableRecordReader. |
| <tt>mapreduce.join.keycomparator</tt> can be a classname used to compare |
| keys in the join. |
| @see #setFormat |
| @see JoinRecordReader |
| @see MultiFilterRecordReader]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.join.CompositeInputFormat --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.join.CompositeInputSplit --> |
| <class name="CompositeInputSplit" extends="org.apache.hadoop.mapreduce.InputSplit" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.io.Writable"/> |
| <constructor name="CompositeInputSplit" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <constructor name="CompositeInputSplit" type="int" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="add" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="s" type="org.apache.hadoop.mapreduce.InputSplit"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Add an InputSplit to this collection. |
| @throws IOException If capacity was not specified during construction |
| or if capacity has been reached.]]> |
| </doc> |
| </method> |
| <method name="get" return="org.apache.hadoop.mapreduce.InputSplit" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="i" type="int"/> |
| <doc> |
| <![CDATA[Get ith child InputSplit.]]> |
| </doc> |
| </method> |
| <method name="getLength" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Return the aggregate length of all child InputSplits currently added.]]> |
| </doc> |
| </method> |
| <method name="getLength" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="i" type="int"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Get the length of ith child InputSplit.]]> |
| </doc> |
| </method> |
| <method name="getLocations" return="java.lang.String[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Collect a set of hosts from all child InputSplits.]]> |
| </doc> |
| </method> |
| <method name="getLocation" return="java.lang.String[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="i" type="int"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[getLocations from ith InputSplit.]]> |
| </doc> |
| </method> |
| <method name="write" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="out" type="java.io.DataOutput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Write splits in the following format. |
| {@code |
| <count><class1><class2>...<classn><split1><split2>...<splitn> |
| }]]> |
| </doc> |
| </method> |
| <method name="readFields" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="in" type="java.io.DataInput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[{@inheritDoc} |
| @throws IOException If the child InputSplit cannot be read, typically |
| for failing access checks.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[This InputSplit contains a set of child InputSplits. Any InputSplit inserted |
| into this collection must have a public default constructor.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.join.CompositeInputSplit --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.join.CompositeRecordReader --> |
| <class name="CompositeRecordReader" extends="org.apache.hadoop.mapreduce.lib.join.ComposableRecordReader" |
| abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.conf.Configurable"/> |
| <constructor name="CompositeRecordReader" type="int, int, java.lang.Class" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Create a RecordReader with <tt>capacity</tt> children to position |
| <tt>id</tt> in the parent reader. |
| The id of a root CompositeRecordReader is -1 by convention, but relying |
| on this is not recommended.]]> |
| </doc> |
| </constructor> |
| <method name="combine" return="boolean" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="srcs" type="java.lang.Object[]"/> |
| <param name="value" type="org.apache.hadoop.mapreduce.lib.join.TupleWritable"/> |
| </method> |
| <method name="initialize" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </method> |
| <method name="id" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Return the position in the collector this class occupies.]]> |
| </doc> |
| </method> |
| <method name="setConf" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <doc> |
| <![CDATA[{@inheritDoc}]]> |
| </doc> |
| </method> |
| <method name="getConf" return="org.apache.hadoop.conf.Configuration" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[{@inheritDoc}]]> |
| </doc> |
| </method> |
| <method name="getRecordReaderQueue" return="java.util.PriorityQueue" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Return sorted list of RecordReaders for this composite.]]> |
| </doc> |
| </method> |
| <method name="getComparator" return="org.apache.hadoop.io.WritableComparator" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Return comparator defining the ordering for RecordReaders in this |
| composite.]]> |
| </doc> |
| </method> |
| <method name="add" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="rr" type="org.apache.hadoop.mapreduce.lib.join.ComposableRecordReader"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Add a RecordReader to this collection. |
| The id() of a RecordReader determines where in the Tuple its |
| entry will appear. Adding RecordReaders with the same id has |
| undefined behavior.]]> |
| </doc> |
| </method> |
| <method name="key" return="K" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Return the key for the current join or the value at the top of the |
| RecordReader heap.]]> |
| </doc> |
| </method> |
| <method name="key" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="K"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Clone the key at the top of this RR into the given object.]]> |
| </doc> |
| </method> |
| <method name="getCurrentKey" return="K" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="hasNext" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Return true if it is possible that this could emit more values.]]> |
| </doc> |
| </method> |
| <method name="skip" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="K"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Pass skip key to child RRs.]]> |
| </doc> |
| </method> |
| <method name="getDelegate" return="org.apache.hadoop.mapreduce.lib.join.ResetableIterator" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Obtain an iterator over the child RRs apropos of the value type |
| ultimately emitted from this join.]]> |
| </doc> |
| </method> |
| <method name="accept" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jc" type="org.apache.hadoop.mapreduce.lib.join.CompositeRecordReader.JoinCollector"/> |
| <param name="key" type="K"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[If key provided matches that of this Composite, give JoinCollector |
| iterator over values it may emit.]]> |
| </doc> |
| </method> |
| <method name="fillJoinCollector" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="iterkey" type="K"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[For all child RRs offering the key provided, obtain an iterator |
| at that position in the JoinCollector.]]> |
| </doc> |
| </method> |
| <method name="compareTo" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="other" type="org.apache.hadoop.mapreduce.lib.join.ComposableRecordReader"/> |
| <doc> |
| <![CDATA[Implement Comparable contract (compare key of join or head of heap |
| with that of another).]]> |
| </doc> |
| </method> |
| <method name="createKey" return="K" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Create a new key common to all child RRs. |
| @throws ClassCastException if key classes differ.]]> |
| </doc> |
| </method> |
| <method name="createTupleWritable" return="org.apache.hadoop.mapreduce.lib.join.TupleWritable" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Create a value to be used internally for joins.]]> |
| </doc> |
| </method> |
| <method name="getCurrentValue" return="X" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[{@inheritDoc}]]> |
| </doc> |
| </method> |
| <method name="close" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Close all child RRs.]]> |
| </doc> |
| </method> |
| <method name="getProgress" return="float" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Report progress as the minimum of all child RR progress.]]> |
| </doc> |
| </method> |
| <field name="conf" type="org.apache.hadoop.conf.Configuration" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <field name="keyclass" type="java.lang.Class" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <field name="jc" type="org.apache.hadoop.mapreduce.lib.join.CompositeRecordReader.JoinCollector" |
| transient="false" volatile="false" |
| static="false" final="true" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <field name="kids" type="org.apache.hadoop.mapreduce.lib.join.ComposableRecordReader[]" |
| transient="false" volatile="false" |
| static="false" final="true" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <field name="key" type="K" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <field name="value" type="X" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[A RecordReader that can effect joins of RecordReaders sharing a common key |
| type and partitioning.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.join.CompositeRecordReader --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.join.InnerJoinRecordReader --> |
| <class name="InnerJoinRecordReader" extends="org.apache.hadoop.mapreduce.lib.join.JoinRecordReader" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <method name="combine" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="srcs" type="java.lang.Object[]"/> |
| <param name="dst" type="org.apache.hadoop.mapreduce.lib.join.TupleWritable"/> |
| <doc> |
| <![CDATA[Return true iff the tuple is full (all data sources contain this key).]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Full inner join.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.join.InnerJoinRecordReader --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.join.JoinRecordReader --> |
| <class name="JoinRecordReader" extends="org.apache.hadoop.mapreduce.lib.join.CompositeRecordReader" |
| abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="JoinRecordReader" type="int, org.apache.hadoop.conf.Configuration, int, java.lang.Class" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </constructor> |
| <method name="nextKeyValue" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Emit the next set of key, value pairs as defined by the child |
| RecordReaders and operation associated with this composite RR.]]> |
| </doc> |
| </method> |
| <method name="createValue" return="org.apache.hadoop.mapreduce.lib.join.TupleWritable" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getDelegate" return="org.apache.hadoop.mapreduce.lib.join.ResetableIterator" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Return an iterator wrapping the JoinCollector.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Base class for Composite joins returning Tuples of arbitrary Writables.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.join.JoinRecordReader --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.join.MultiFilterRecordReader --> |
| <class name="MultiFilterRecordReader" extends="org.apache.hadoop.mapreduce.lib.join.CompositeRecordReader" |
| abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="MultiFilterRecordReader" type="int, org.apache.hadoop.conf.Configuration, int, java.lang.Class" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </constructor> |
| <method name="emit" return="V" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="dst" type="org.apache.hadoop.mapreduce.lib.join.TupleWritable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[For each tuple emitted, return a value (typically one of the values |
| in the tuple). |
| Modifying the Writables in the tuple is permitted and unlikely to affect |
| join behavior in most cases, but it is not recommended. It's safer to |
| clone first.]]> |
| </doc> |
| </method> |
| <method name="combine" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="srcs" type="java.lang.Object[]"/> |
| <param name="dst" type="org.apache.hadoop.mapreduce.lib.join.TupleWritable"/> |
| <doc> |
| <![CDATA[Default implementation offers {@link #emit} every Tuple from the |
| collector (the outer join of child RRs).]]> |
| </doc> |
| </method> |
| <method name="nextKeyValue" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[{@inheritDoc}]]> |
| </doc> |
| </method> |
| <method name="initialize" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </method> |
| <method name="getDelegate" return="org.apache.hadoop.mapreduce.lib.join.ResetableIterator" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Return an iterator returning a single value from the tuple. |
| @see MultiFilterDelegationIterator]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Base class for Composite join returning values derived from multiple |
| sources, but generally not tuples.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.join.MultiFilterRecordReader --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.join.OuterJoinRecordReader --> |
| <class name="OuterJoinRecordReader" extends="org.apache.hadoop.mapreduce.lib.join.JoinRecordReader" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <method name="combine" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="srcs" type="java.lang.Object[]"/> |
| <param name="dst" type="org.apache.hadoop.mapreduce.lib.join.TupleWritable"/> |
| <doc> |
| <![CDATA[Emit everything from the collector.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Full outer join.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.join.OuterJoinRecordReader --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.join.OverrideRecordReader --> |
| <class name="OverrideRecordReader" extends="org.apache.hadoop.mapreduce.lib.join.MultiFilterRecordReader" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <method name="emit" return="V" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="dst" type="org.apache.hadoop.mapreduce.lib.join.TupleWritable"/> |
| <doc> |
| <![CDATA[Emit the value with the highest position in the tuple.]]> |
| </doc> |
| </method> |
| <method name="createValue" return="V" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="fillJoinCollector" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="iterkey" type="K"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Instead of filling the JoinCollector with iterators from all |
| data sources, fill only the rightmost for this key. |
| This not only saves space by discarding the other sources, but |
| it also emits the number of key-value pairs in the preferred |
| RecordReader instead of repeating that stream n times, where |
| n is the cardinality of the cross product of the discarded |
| streams for the given key.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Prefer the "rightmost" data source for this key. |
| For example, <tt>override(S1,S2,S3)</tt> will prefer values |
| from S3 over S2, and values from S2 over S1 for all keys |
| emitted from all sources.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.join.OverrideRecordReader --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.join.Parser --> |
| <class name="Parser" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="Parser" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <doc> |
| <![CDATA[Very simple shift-reduce parser for join expressions. |
| |
| This should be sufficient for the user extension permitted now, but ought to |
| be replaced with a parser generator if more complex grammars are supported. |
| In particular, this "shift-reduce" parser has no states. Each set |
| of formals requires a different internal node type, which is responsible for |
| interpreting the list of tokens it receives. This is sufficient for the |
| current grammar, but it has several annoying properties that might inhibit |
| extension. In particular, parenthesis are always function calls; an |
| algebraic or filter grammar would not only require a node type, but must |
| also work around the internals of this parser. |
| |
| For most other cases, adding classes to the hierarchy- particularly by |
| extending JoinRecordReader and MultiFilterRecordReader- is fairly |
| straightforward. One need only override the relevant method(s) (usually only |
| {@link CompositeRecordReader#combine}) and include a property to map its |
| value to an identifier in the parser.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.join.Parser --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.join.Parser.Node --> |
| <class name="Parser.Node" extends="org.apache.hadoop.mapreduce.lib.join.ComposableInputFormat" |
| abstract="true" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="Node" type="java.lang.String" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="addIdentifier" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="ident" type="java.lang.String"/> |
| <param name="mcstrSig" type="java.lang.Class[]"/> |
| <param name="nodetype" type="java.lang.Class"/> |
| <param name="cl" type="java.lang.Class"/> |
| <exception name="NoSuchMethodException" type="java.lang.NoSuchMethodException"/> |
| <doc> |
| <![CDATA[For a given identifier, add a mapping to the nodetype for the parse |
| tree and to the ComposableRecordReader to be created, including the |
| formals required to invoke the constructor. |
| The nodetype and constructor signature should be filled in from the |
| child node.]]> |
| </doc> |
| </method> |
| <method name="setID" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="id" type="int"/> |
| </method> |
| <method name="setKeyComparator" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="cmpcl" type="java.lang.Class"/> |
| </method> |
| <field name="rrCstrMap" type="java.util.Map" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <field name="id" type="int" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <field name="ident" type="java.lang.String" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <field name="cmpcl" type="java.lang.Class" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.join.Parser.Node --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.join.Parser.NodeToken --> |
| <class name="Parser.NodeToken" extends="org.apache.hadoop.mapreduce.lib.join.Parser.Token" |
| abstract="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <method name="getNode" return="org.apache.hadoop.mapreduce.lib.join.Parser.Node" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.join.Parser.NodeToken --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.join.Parser.NumToken --> |
| <class name="Parser.NumToken" extends="org.apache.hadoop.mapreduce.lib.join.Parser.Token" |
| abstract="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="NumToken" type="double" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getNum" return="double" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.join.Parser.NumToken --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.join.Parser.StrToken --> |
| <class name="Parser.StrToken" extends="org.apache.hadoop.mapreduce.lib.join.Parser.Token" |
| abstract="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="StrToken" type="org.apache.hadoop.mapreduce.lib.join.Parser.TType, java.lang.String" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getStr" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.join.Parser.StrToken --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.join.Parser.Token --> |
| <class name="Parser.Token" extends="java.lang.Object" |
| abstract="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <method name="getType" return="org.apache.hadoop.mapreduce.lib.join.Parser.TType" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getNode" return="org.apache.hadoop.mapreduce.lib.join.Parser.Node" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getNum" return="double" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getStr" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[Tagged-union type for tokens from the join expression. |
| @see Parser.TType]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.join.Parser.Token --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.join.Parser.TType --> |
| <class name="Parser.TType" extends="java.lang.Enum" |
| abstract="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <method name="values" return="org.apache.hadoop.mapreduce.lib.join.Parser.TType[]" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="valueOf" return="org.apache.hadoop.mapreduce.lib.join.Parser.TType" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="name" type="java.lang.String"/> |
| </method> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.join.Parser.TType --> |
| <!-- start interface org.apache.hadoop.mapreduce.lib.join.ResetableIterator --> |
| <interface name="ResetableIterator" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <method name="hasNext" return="boolean" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[True if a call to next may return a value. This is permitted false |
| positives, but not false negatives.]]> |
| </doc> |
| </method> |
| <method name="next" return="boolean" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="val" type="T"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Assign next value to actual. |
| It is required that elements added to a ResetableIterator be returned in |
| the same order after a call to {@link #reset} (FIFO). |
| |
| Note that a call to this may fail for nested joins (i.e. more elements |
| available, but none satisfying the constraints of the join)]]> |
| </doc> |
| </method> |
| <method name="replay" return="boolean" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="val" type="T"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Assign last value returned to actual.]]> |
| </doc> |
| </method> |
| <method name="reset" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Set iterator to return to the start of its range. Must be called after |
| calling {@link #add} to avoid a ConcurrentModificationException.]]> |
| </doc> |
| </method> |
| <method name="add" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="item" type="T"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Add an element to the collection of elements to iterate over.]]> |
| </doc> |
| </method> |
| <method name="close" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Close datasources and release resources. Calling methods on the iterator |
| after calling close has undefined behavior.]]> |
| </doc> |
| </method> |
| <method name="clear" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Close datasources, but do not release internal resources. Calling this |
| method should permit the object to be reused with a different datasource.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[This defines an interface to a stateful Iterator that can replay elements |
| added to it directly. |
| Note that this does not extend {@link java.util.Iterator}.]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapreduce.lib.join.ResetableIterator --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.join.StreamBackedIterator --> |
| <class name="StreamBackedIterator" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapreduce.lib.join.ResetableIterator"/> |
| <constructor name="StreamBackedIterator" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="hasNext" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="next" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="val" type="X"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="replay" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="val" type="X"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="reset" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="add" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="item" type="X"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="close" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="clear" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <doc> |
| <![CDATA[This class provides an implementation of ResetableIterator. This |
| implementation uses a byte array to store elements added to it.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.join.StreamBackedIterator --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.join.TupleWritable --> |
| <class name="TupleWritable" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.io.Writable"/> |
| <implements name="java.lang.Iterable"/> |
| <constructor name="TupleWritable" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Create an empty tuple with no allocated storage for writables.]]> |
| </doc> |
| </constructor> |
| <constructor name="TupleWritable" type="org.apache.hadoop.io.Writable[]" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Initialize tuple with storage; unknown whether any of them contain |
| "written" values.]]> |
| </doc> |
| </constructor> |
| <method name="has" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="i" type="int"/> |
| <doc> |
| <![CDATA[Return true if tuple has an element at the position provided.]]> |
| </doc> |
| </method> |
| <method name="get" return="org.apache.hadoop.io.Writable" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="i" type="int"/> |
| <doc> |
| <![CDATA[Get ith Writable from Tuple.]]> |
| </doc> |
| </method> |
| <method name="size" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[The number of children in this Tuple.]]> |
| </doc> |
| </method> |
| <method name="equals" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="other" type="java.lang.Object"/> |
| <doc> |
| <![CDATA[{@inheritDoc}]]> |
| </doc> |
| </method> |
| <method name="hashCode" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="iterator" return="java.util.Iterator" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Return an iterator over the elements in this tuple. |
| Note that this doesn't flatten the tuple; one may receive tuples |
| from this iterator.]]> |
| </doc> |
| </method> |
| <method name="toString" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Convert Tuple to String as in the following. |
| <tt>[<child1>,<child2>,...,<childn>]</tt>]]> |
| </doc> |
| </method> |
| <method name="write" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="out" type="java.io.DataOutput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Writes each Writable to <code>out</code>. |
| TupleWritable format: |
| {@code |
| <count><type1><type2>...<typen><obj1><obj2>...<objn> |
| }]]> |
| </doc> |
| </method> |
| <method name="readFields" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="in" type="java.io.DataInput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[{@inheritDoc}]]> |
| </doc> |
| </method> |
| <field name="written" type="java.util.BitSet" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[Writable type storing multiple {@link org.apache.hadoop.io.Writable}s. |
| |
| This is *not* a general-purpose tuple type. In almost all cases, users are |
| encouraged to implement their own serializable types, which can perform |
| better validation and provide more efficient encodings than this class is |
| capable. TupleWritable relies on the join framework for type safety and |
| assumes its instances will rarely be persisted, assumptions not only |
| incompatible with, but contrary to the general case. |
| |
| @see org.apache.hadoop.io.Writable]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.join.TupleWritable --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.join.WrappedRecordReader --> |
| <class name="WrappedRecordReader" extends="org.apache.hadoop.mapreduce.lib.join.ComposableRecordReader" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="WrappedRecordReader" type="int" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="initialize" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </method> |
| <method name="createKey" return="K" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Request new key from proxied RR.]]> |
| </doc> |
| </method> |
| <method name="createValue" return="U" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="id" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[{@inheritDoc}]]> |
| </doc> |
| </method> |
| <method name="key" return="K" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Return the key at the head of this RR.]]> |
| </doc> |
| </method> |
| <method name="key" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="qkey" type="K"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Clone the key at the head of this RR into the object supplied.]]> |
| </doc> |
| </method> |
| <method name="hasNext" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Return true if the RR- including the k,v pair stored in this object- |
| is exhausted.]]> |
| </doc> |
| </method> |
| <method name="skip" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="K"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Skip key-value pairs with keys less than or equal to the key provided.]]> |
| </doc> |
| </method> |
| <method name="accept" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="i" type="org.apache.hadoop.mapreduce.lib.join.CompositeRecordReader.JoinCollector"/> |
| <param name="key" type="K"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Add an iterator to the collector at the position occupied by this |
| RecordReader over the values in this stream paired with the key |
| provided (ie register a stream of values from this source matching K |
| with a collector).]]> |
| </doc> |
| </method> |
| <method name="nextKeyValue" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Read the next k,v pair into the head of this object; return true iff |
| the RR and this are exhausted.]]> |
| </doc> |
| </method> |
| <method name="getCurrentKey" return="K" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Get current key]]> |
| </doc> |
| </method> |
| <method name="getCurrentValue" return="U" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Get current value]]> |
| </doc> |
| </method> |
| <method name="getProgress" return="float" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Request progress from proxied RR.]]> |
| </doc> |
| </method> |
| <method name="close" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Forward close request to proxied RR.]]> |
| </doc> |
| </method> |
| <method name="compareTo" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="other" type="org.apache.hadoop.mapreduce.lib.join.ComposableRecordReader"/> |
| <doc> |
| <![CDATA[Implement Comparable contract (compare key at head of proxied RR |
| with that of another).]]> |
| </doc> |
| </method> |
| <method name="equals" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="other" type="java.lang.Object"/> |
| <doc> |
| <![CDATA[Return true iff compareTo(other) retn true.]]> |
| </doc> |
| </method> |
| <method name="hashCode" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <field name="empty" type="boolean" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <field name="cmp" type="org.apache.hadoop.io.WritableComparator" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[Proxy class for a RecordReader participating in the join framework. |
| |
| This class keeps track of the "head" key-value pair for the |
| provided RecordReader and keeps a store of values matching a key when |
| this source is participating in a join.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.join.WrappedRecordReader --> |
| </package> |
| <package name="org.apache.hadoop.mapreduce.lib.map"> |
| <!-- start class org.apache.hadoop.mapreduce.lib.map.InverseMapper --> |
| <class name="InverseMapper" extends="org.apache.hadoop.mapreduce.Mapper" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="InverseMapper" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="map" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="K"/> |
| <param name="value" type="V"/> |
| <param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[The inverse function. Input keys and values are swapped.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[A {@link Mapper} that swaps keys and values.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.map.InverseMapper --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.map.MultithreadedMapper --> |
| <class name="MultithreadedMapper" extends="org.apache.hadoop.mapreduce.Mapper" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="MultithreadedMapper" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getNumberOfThreads" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <doc> |
| <![CDATA[The number of threads in the thread pool that will run the map function. |
| @param job the job |
| @return the number of threads]]> |
| </doc> |
| </method> |
| <method name="setNumberOfThreads" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.Job"/> |
| <param name="threads" type="int"/> |
| <doc> |
| <![CDATA[Set the number of threads in the pool for running maps. |
| @param job the job to modify |
| @param threads the new number of threads]]> |
| </doc> |
| </method> |
| <method name="getMapperClass" return="java.lang.Class" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <doc> |
| <![CDATA[Get the application's mapper class. |
| @param <K1> the map's input key type |
| @param <V1> the map's input value type |
| @param <K2> the map's output key type |
| @param <V2> the map's output value type |
| @param job the job |
| @return the mapper class to run]]> |
| </doc> |
| </method> |
| <method name="setMapperClass" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.Job"/> |
| <param name="cls" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Set the application's mapper class. |
| @param <K1> the map input key type |
| @param <V1> the map input value type |
| @param <K2> the map output key type |
| @param <V2> the map output value type |
| @param job the job to modify |
| @param cls the class to use as the mapper]]> |
| </doc> |
| </method> |
| <method name="run" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Run the application's maps using a thread pool.]]> |
| </doc> |
| </method> |
| <field name="NUM_THREADS" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="MAP_CLASS" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[Multithreaded implementation for @link org.apache.hadoop.mapreduce.Mapper. |
| <p> |
| It can be used instead of the default implementation, |
| {@link org.apache.hadoop.mapred.MapRunner}, when the Map operation is not CPU |
| bound in order to improve throughput. |
| <p> |
| Mapper implementations using this MapRunnable must be thread-safe. |
| <p> |
| The Map-Reduce job has to be configured with the mapper to use via |
| {@link #setMapperClass(Job, Class)} and |
| the number of thread the thread-pool can use with the |
| {@link #getNumberOfThreads(JobContext)} method. The default |
| value is 10 threads. |
| <p>]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.map.MultithreadedMapper --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.map.RegexMapper --> |
| <class name="RegexMapper" extends="org.apache.hadoop.mapreduce.Mapper" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="RegexMapper" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="setup" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/> |
| </method> |
| <method name="map" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="K"/> |
| <param name="value" type="org.apache.hadoop.io.Text"/> |
| <param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </method> |
| <field name="PATTERN" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="GROUP" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[A {@link Mapper} that extracts text matching a regular expression.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.map.RegexMapper --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.map.TokenCounterMapper --> |
| <class name="TokenCounterMapper" extends="org.apache.hadoop.mapreduce.Mapper" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="TokenCounterMapper" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="map" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="java.lang.Object"/> |
| <param name="value" type="org.apache.hadoop.io.Text"/> |
| <param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </method> |
| <doc> |
| <![CDATA[Tokenize the input values and emit each word with a count of 1.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.map.TokenCounterMapper --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.map.WrappedMapper --> |
| <class name="WrappedMapper" extends="org.apache.hadoop.mapreduce.Mapper" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="WrappedMapper" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getMapContext" return="org.apache.hadoop.mapreduce.Mapper.Context" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="mapContext" type="org.apache.hadoop.mapreduce.MapContext"/> |
| <doc> |
| <![CDATA[Get a wrapped {@link Mapper.Context} for custom implementations. |
| @param mapContext <code>MapContext</code> to be wrapped |
| @return a wrapped <code>Mapper.Context</code> for custom implementations]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[A {@link Mapper} which wraps a given one to allow custom |
| {@link Mapper.Context} implementations.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.map.WrappedMapper --> |
| </package> |
| <package name="org.apache.hadoop.mapreduce.lib.output"> |
| <!-- start class org.apache.hadoop.mapreduce.lib.output.BindingPathOutputCommitter --> |
| <class name="BindingPathOutputCommitter" extends="org.apache.hadoop.mapreduce.lib.output.PathOutputCommitter" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="BindingPathOutputCommitter" type="org.apache.hadoop.fs.Path, org.apache.hadoop.mapreduce.TaskAttemptContext" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Instantiate. |
| @param outputPath output path (may be null) |
| @param context task context |
| @throws IOException on any failure.]]> |
| </doc> |
| </constructor> |
| <method name="getOutputPath" return="org.apache.hadoop.fs.Path" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getWorkPath" return="org.apache.hadoop.fs.Path" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="setupJob" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobContext" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="setupTask" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="needsTaskCommit" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="commitTask" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="abortTask" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="cleanupJob" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobContext" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="commitJob" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobContext" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="abortJob" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobContext" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <param name="state" type="org.apache.hadoop.mapreduce.JobStatus.State"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="isRecoverySupported" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="isCommitJobRepeatable" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobContext" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="isRecoverySupported" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobContext" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="recoverTask" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="hasOutputPath" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="toString" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getCommitter" return="org.apache.hadoop.mapreduce.lib.output.PathOutputCommitter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the inner committer. |
| @return the bonded committer.]]> |
| </doc> |
| </method> |
| <field name="NAME" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[The classname for use in configurations.]]> |
| </doc> |
| </field> |
| <doc> |
| <![CDATA[This is a special committer which creates the factory for the committer and |
| runs off that. Why does it exist? So that you can explicitly instantiate |
| a committer by classname and yet still have the actual implementation |
| driven dynamically by the factory options and destination filesystem. |
| This simplifies integration |
| with existing code which takes the classname of a committer. |
| There's no factory for this, as that would lead to a loop. |
| |
| All commit protocol methods and accessors are delegated to the |
| wrapped committer. |
| |
| How to use: |
| |
| <ol> |
| <li> |
| In applications which take a classname of committer in |
| a configuration option, set it to the canonical name of this class |
| (see {@link #NAME}). When this class is instantiated, it will |
| use the factory mechanism to locate the configured committer for the |
| destination. |
| </li> |
| <li> |
| In code, explicitly create an instance of this committer through |
| its constructor, then invoke commit lifecycle operations on it. |
| The dynamically configured committer will be created in the constructor |
| and have the lifecycle operations relayed to it. |
| </li> |
| </ol>]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.output.BindingPathOutputCommitter --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter --> |
| <class name="FileOutputCommitter" extends="org.apache.hadoop.mapreduce.lib.output.PathOutputCommitter" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="FileOutputCommitter" type="org.apache.hadoop.fs.Path, org.apache.hadoop.mapreduce.TaskAttemptContext" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Create a file output committer |
| @param outputPath the job's output path, or null if you want the output |
| committer to act as a noop. |
| @param context the task's context |
| @throws IOException]]> |
| </doc> |
| </constructor> |
| <constructor name="FileOutputCommitter" type="org.apache.hadoop.fs.Path, org.apache.hadoop.mapreduce.JobContext" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Create a file output committer |
| @param outputPath the job's output path, or null if you want the output |
| committer to act as a noop. |
| @param context the task's context |
| @throws IOException]]> |
| </doc> |
| </constructor> |
| <method name="getOutputPath" return="org.apache.hadoop.fs.Path" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the path where final output of the job should be placed. This |
| could also be considered the committed application attempt path.]]> |
| </doc> |
| </method> |
| <method name="getJobAttemptPath" return="org.apache.hadoop.fs.Path" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <doc> |
| <![CDATA[Compute the path where the output of a given job attempt will be placed. |
| @param context the context of the job. This is used to get the |
| application attempt id. |
| @return the path to store job attempt data.]]> |
| </doc> |
| </method> |
| <method name="getJobAttemptPath" return="org.apache.hadoop.fs.Path" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <param name="out" type="org.apache.hadoop.fs.Path"/> |
| <doc> |
| <![CDATA[Compute the path where the output of a given job attempt will be placed. |
| @param context the context of the job. This is used to get the |
| application attempt id. |
| @param out the output path to place these in. |
| @return the path to store job attempt data.]]> |
| </doc> |
| </method> |
| <method name="getJobAttemptPath" return="org.apache.hadoop.fs.Path" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="appAttemptId" type="int"/> |
| <doc> |
| <![CDATA[Compute the path where the output of a given job attempt will be placed. |
| @param appAttemptId the ID of the application attempt for this job. |
| @return the path to store job attempt data.]]> |
| </doc> |
| </method> |
| <method name="getTaskAttemptPath" return="org.apache.hadoop.fs.Path" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <doc> |
| <![CDATA[Compute the path where the output of a task attempt is stored until |
| that task is committed. |
| |
| @param context the context of the task attempt. |
| @return the path where a task attempt should be stored.]]> |
| </doc> |
| </method> |
| <method name="getTaskAttemptPath" return="org.apache.hadoop.fs.Path" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <param name="out" type="org.apache.hadoop.fs.Path"/> |
| <doc> |
| <![CDATA[Compute the path where the output of a task attempt is stored until |
| that task is committed. |
| |
| @param context the context of the task attempt. |
| @param out The output path to put things in. |
| @return the path where a task attempt should be stored.]]> |
| </doc> |
| </method> |
| <method name="getCommittedTaskPath" return="org.apache.hadoop.fs.Path" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <doc> |
| <![CDATA[Compute the path where the output of a committed task is stored until |
| the entire job is committed. |
| @param context the context of the task attempt |
| @return the path where the output of a committed task is stored until |
| the entire job is committed.]]> |
| </doc> |
| </method> |
| <method name="getCommittedTaskPath" return="org.apache.hadoop.fs.Path" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <param name="out" type="org.apache.hadoop.fs.Path"/> |
| </method> |
| <method name="getCommittedTaskPath" return="org.apache.hadoop.fs.Path" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="appAttemptId" type="int"/> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <doc> |
| <![CDATA[Compute the path where the output of a committed task is stored until the |
| entire job is committed for a specific application attempt. |
| @param appAttemptId the id of the application attempt to use |
| @param context the context of any task. |
| @return the path where the output of a committed task is stored.]]> |
| </doc> |
| </method> |
| <method name="getWorkPath" return="org.apache.hadoop.fs.Path" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get the directory that the task should write results into. |
| @return the work directory |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="setupJob" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Create the temporary directory that is the root of all of the task |
| work directories. |
| @param context the job's context]]> |
| </doc> |
| </method> |
| <method name="commitJob" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[The job has completed, so do works in commitJobInternal(). |
| Could retry on failure if using algorithm 2. |
| @param context the job's context]]> |
| </doc> |
| </method> |
| <method name="commitJobInternal" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[The job has completed, so do following commit job, include: |
| Move all committed tasks to the final output dir (algorithm 1 only). |
| Delete the temporary directory, including all of the work directories. |
| Create a _SUCCESS file to make it as successful. |
| @param context the job's context]]> |
| </doc> |
| </method> |
| <method name="cleanupJob" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="abortJob" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <param name="state" type="org.apache.hadoop.mapreduce.JobStatus.State"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Delete the temporary directory, including all of the work directories. |
| @param context the job's context]]> |
| </doc> |
| </method> |
| <method name="setupTask" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[No task setup required.]]> |
| </doc> |
| </method> |
| <method name="commitTask" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Move the files from the work directory to the job output directory |
| @param context the task context]]> |
| </doc> |
| </method> |
| <method name="abortTask" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Delete the work directory |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="needsTaskCommit" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Did this task write any files in the work directory? |
| @param context the task's context]]> |
| </doc> |
| </method> |
| <method name="isRecoverySupported" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="isCommitJobRepeatable" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="recoverTask" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="toString" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <field name="PENDING_DIR_NAME" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Name of directory where pending data is placed. Data that has not been |
| committed yet.]]> |
| </doc> |
| </field> |
| <field name="TEMP_DIR_NAME" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="protected" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Temporary directory name |
| |
| The static variable to be compatible with M/R 1.x]]> |
| </doc> |
| </field> |
| <field name="SUCCEEDED_FILE_NAME" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="SUCCESSFUL_JOB_OUTPUT_DIR_MARKER" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="FILEOUTPUTCOMMITTER_ALGORITHM_VERSION" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="FILEOUTPUTCOMMITTER_ALGORITHM_VERSION_DEFAULT" type="int" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="FILEOUTPUTCOMMITTER_CLEANUP_SKIPPED" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="FILEOUTPUTCOMMITTER_CLEANUP_SKIPPED_DEFAULT" type="boolean" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="FILEOUTPUTCOMMITTER_CLEANUP_FAILURES_IGNORED" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="FILEOUTPUTCOMMITTER_CLEANUP_FAILURES_IGNORED_DEFAULT" type="boolean" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="FILEOUTPUTCOMMITTER_FAILURE_ATTEMPTS" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="FILEOUTPUTCOMMITTER_FAILURE_ATTEMPTS_DEFAULT" type="int" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="FILEOUTPUTCOMMITTER_TASK_CLEANUP_ENABLED" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="FILEOUTPUTCOMMITTER_TASK_CLEANUP_ENABLED_DEFAULT" type="boolean" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[An {@link OutputCommitter} that commits files specified |
| in job output directory i.e. ${mapreduce.output.fileoutputformat.outputdir}.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.output.FileOutputFormat --> |
| <class name="FileOutputFormat" extends="org.apache.hadoop.mapreduce.OutputFormat" |
| abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="FileOutputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="setCompressOutput" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.Job"/> |
| <param name="compress" type="boolean"/> |
| <doc> |
| <![CDATA[Set whether the output of the job is compressed. |
| @param job the job to modify |
| @param compress should the output of the job be compressed?]]> |
| </doc> |
| </method> |
| <method name="getCompressOutput" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <doc> |
| <![CDATA[Is the job output compressed? |
| @param job the Job to look in |
| @return <code>true</code> if the job output should be compressed, |
| <code>false</code> otherwise]]> |
| </doc> |
| </method> |
| <method name="setOutputCompressorClass" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.Job"/> |
| <param name="codecClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Set the {@link CompressionCodec} to be used to compress job outputs. |
| @param job the job to modify |
| @param codecClass the {@link CompressionCodec} to be used to |
| compress the job outputs]]> |
| </doc> |
| </method> |
| <method name="getOutputCompressorClass" return="java.lang.Class" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <param name="defaultValue" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Get the {@link CompressionCodec} for compressing the job outputs. |
| @param job the {@link Job} to look in |
| @param defaultValue the {@link CompressionCodec} to return if not set |
| @return the {@link CompressionCodec} to be used to compress the |
| job outputs |
| @throws IllegalArgumentException if the class was specified, but not found]]> |
| </doc> |
| </method> |
| <method name="getRecordWriter" return="org.apache.hadoop.mapreduce.RecordWriter" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </method> |
| <method name="checkOutputSpecs" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <exception name="FileAlreadyExistsException" type="org.apache.hadoop.mapred.FileAlreadyExistsException"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="setOutputPath" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.Job"/> |
| <param name="outputDir" type="org.apache.hadoop.fs.Path"/> |
| <doc> |
| <![CDATA[Set the {@link Path} of the output directory for the map-reduce job. |
| |
| @param job The job to modify |
| @param outputDir the {@link Path} of the output directory for |
| the map-reduce job.]]> |
| </doc> |
| </method> |
| <method name="getOutputPath" return="org.apache.hadoop.fs.Path" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <doc> |
| <![CDATA[Get the {@link Path} to the output directory for the map-reduce job. |
| |
| @return the {@link Path} to the output directory for the map-reduce job. |
| @see FileOutputFormat#getWorkOutputPath(TaskInputOutputContext)]]> |
| </doc> |
| </method> |
| <method name="getWorkOutputPath" return="org.apache.hadoop.fs.Path" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskInputOutputContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Get the {@link Path} to the task's temporary output directory |
| for the map-reduce job |
| |
| <b id="SideEffectFiles">Tasks' Side-Effect Files</b> |
| |
| <p>Some applications need to create/write-to side-files, which differ from |
| the actual job-outputs. |
| |
| <p>In such cases there could be issues with 2 instances of the same TIP |
| (running simultaneously e.g. speculative tasks) trying to open/write-to the |
| same file (path) on HDFS. Hence the application-writer will have to pick |
| unique names per task-attempt (e.g. using the attemptid, say |
| <tt>attempt_200709221812_0001_m_000000_0</tt>), not just per TIP.</p> |
| |
| <p>To get around this the Map-Reduce framework helps the application-writer |
| out by maintaining a special |
| <tt>${mapreduce.output.fileoutputformat.outputdir}/_temporary/_${taskid}</tt> |
| sub-directory for each task-attempt on HDFS where the output of the |
| task-attempt goes. On successful completion of the task-attempt the files |
| in the <tt>${mapreduce.output.fileoutputformat.outputdir}/_temporary/_${taskid}</tt> (only) |
| are <i>promoted</i> to <tt>${mapreduce.output.fileoutputformat.outputdir}</tt>. Of course, the |
| framework discards the sub-directory of unsuccessful task-attempts. This |
| is completely transparent to the application.</p> |
| |
| <p>The application-writer can take advantage of this by creating any |
| side-files required in a work directory during execution |
| of his task i.e. via |
| {@link #getWorkOutputPath(TaskInputOutputContext)}, and |
| the framework will move them out similarly - thus she doesn't have to pick |
| unique paths per task-attempt.</p> |
| |
| <p>The entire discussion holds true for maps of jobs with |
| reducer=NONE (i.e. 0 reduces) since output of the map, in that case, |
| goes directly to HDFS.</p> |
| |
| @return the {@link Path} to the task's temporary output directory |
| for the map-reduce job.]]> |
| </doc> |
| </method> |
| <method name="getPathForWorkFile" return="org.apache.hadoop.fs.Path" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskInputOutputContext"/> |
| <param name="name" type="java.lang.String"/> |
| <param name="extension" type="java.lang.String"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Helper function to generate a {@link Path} for a file that is unique for |
| the task within the job output directory. |
| |
| <p>The path can be used to create custom files from within the map and |
| reduce tasks. The path name will be unique for each task. The path parent |
| will be the job output directory.</p>ls |
| |
| <p>This method uses the {@link #getUniqueFile} method to make the file name |
| unique for the task.</p> |
| |
| @param context the context for the task. |
| @param name the name for the file. |
| @param extension the extension for the file |
| @return a unique path accross all tasks of the job.]]> |
| </doc> |
| </method> |
| <method name="getUniqueFile" return="java.lang.String" |
| abstract="false" native="false" synchronized="true" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <param name="name" type="java.lang.String"/> |
| <param name="extension" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Generate a unique filename, based on the task id, name, and extension |
| @param context the task that is calling this |
| @param name the base filename |
| @param extension the filename extension |
| @return a string like $name-[mrsct]-$id$extension]]> |
| </doc> |
| </method> |
| <method name="getDefaultWorkFile" return="org.apache.hadoop.fs.Path" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <param name="extension" type="java.lang.String"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get the default path and filename for the output format. |
| @param context the task context |
| @param extension an extension to add to the filename |
| @return a full path $output/_temporary/$taskid/part-[mr]-$id |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getOutputName" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <doc> |
| <![CDATA[Get the base output name for the output file.]]> |
| </doc> |
| </method> |
| <method name="setOutputName" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <param name="name" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the base output name for output file to be created.]]> |
| </doc> |
| </method> |
| <method name="getOutputCommitter" return="org.apache.hadoop.mapreduce.OutputCommitter" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <field name="BASE_OUTPUT_NAME" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <field name="PART" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <field name="COMPRESS" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Configuration option: should output be compressed? {@value}.]]> |
| </doc> |
| </field> |
| <field name="COMPRESS_CODEC" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[If compression is enabled, name of codec: {@value}.]]> |
| </doc> |
| </field> |
| <field name="COMPRESS_TYPE" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Type of compression {@value}: NONE, RECORD, BLOCK. |
| Generally only used in {@code SequenceFileOutputFormat}.]]> |
| </doc> |
| </field> |
| <field name="OUTDIR" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Destination directory of work: {@value}.]]> |
| </doc> |
| </field> |
| <doc> |
| <![CDATA[A base class for {@link OutputFormat}s that read from {@link FileSystem}s.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.output.FileOutputFormat --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.output.FileOutputFormatCounter --> |
| <class name="FileOutputFormatCounter" extends="java.lang.Enum" |
| abstract="false" |
| static="false" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <method name="values" return="org.apache.hadoop.mapreduce.lib.output.FileOutputFormatCounter[]" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="valueOf" return="org.apache.hadoop.mapreduce.lib.output.FileOutputFormatCounter" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="name" type="java.lang.String"/> |
| </method> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.output.FileOutputFormatCounter --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.output.FilterOutputFormat --> |
| <class name="FilterOutputFormat" extends="org.apache.hadoop.mapreduce.OutputFormat" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="FilterOutputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <constructor name="FilterOutputFormat" type="org.apache.hadoop.mapreduce.OutputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Create a FilterOutputFormat based on the underlying output format. |
| @param baseOut the underlying OutputFormat]]> |
| </doc> |
| </constructor> |
| <method name="getRecordWriter" return="org.apache.hadoop.mapreduce.RecordWriter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </method> |
| <method name="checkOutputSpecs" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </method> |
| <method name="getOutputCommitter" return="org.apache.hadoop.mapreduce.OutputCommitter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </method> |
| <field name="baseOut" type="org.apache.hadoop.mapreduce.OutputFormat" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[FilterOutputFormat is a convenience class that wraps OutputFormat.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.output.FilterOutputFormat --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat --> |
| <class name="LazyOutputFormat" extends="org.apache.hadoop.mapreduce.lib.output.FilterOutputFormat" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="LazyOutputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="setOutputFormatClass" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.Job"/> |
| <param name="theClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Set the underlying output format for LazyOutputFormat. |
| @param job the {@link Job} to modify |
| @param theClass the underlying class]]> |
| </doc> |
| </method> |
| <method name="getRecordWriter" return="org.apache.hadoop.mapreduce.RecordWriter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </method> |
| <method name="checkOutputSpecs" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </method> |
| <method name="getOutputCommitter" return="org.apache.hadoop.mapreduce.OutputCommitter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </method> |
| <field name="OUTPUT_FORMAT" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[A Convenience class that creates output lazily. |
| Use in conjuction with org.apache.hadoop.mapreduce.lib.output.MultipleOutputs to recreate the |
| behaviour of org.apache.hadoop.mapred.lib.MultipleTextOutputFormat (etc) of the old Hadoop API. |
| See {@link MultipleOutputs} documentation for more information.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.output.MapFileOutputFormat --> |
| <class name="MapFileOutputFormat" extends="org.apache.hadoop.mapreduce.lib.output.FileOutputFormat" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="MapFileOutputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getRecordWriter" return="org.apache.hadoop.mapreduce.RecordWriter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getReaders" return="org.apache.hadoop.io.MapFile.Reader[]" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="dir" type="org.apache.hadoop.fs.Path"/> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Open the output generated by this format.]]> |
| </doc> |
| </method> |
| <method name="getEntry" return="org.apache.hadoop.io.Writable" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="readers" type="org.apache.hadoop.io.MapFile.Reader[]"/> |
| <param name="partitioner" type="org.apache.hadoop.mapreduce.Partitioner"/> |
| <param name="key" type="K"/> |
| <param name="value" type="V"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get an entry from output generated by this class.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[An {@link org.apache.hadoop.mapreduce.OutputFormat} that writes |
| {@link MapFile}s.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.output.MapFileOutputFormat --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.output.MultipleOutputs --> |
| <class name="MultipleOutputs" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="MultipleOutputs" type="org.apache.hadoop.mapreduce.TaskInputOutputContext" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Creates and initializes multiple outputs support, |
| it should be instantiated in the Mapper/Reducer setup method. |
| |
| @param context the TaskInputOutputContext object]]> |
| </doc> |
| </constructor> |
| <method name="addNamedOutput" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.Job"/> |
| <param name="namedOutput" type="java.lang.String"/> |
| <param name="outputFormatClass" type="java.lang.Class"/> |
| <param name="keyClass" type="java.lang.Class"/> |
| <param name="valueClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Adds a named output for the job. |
| |
| @param job job to add the named output |
| @param namedOutput named output name, it has to be a word, letters |
| and numbers only, cannot be the word 'part' as |
| that is reserved for the default output. |
| @param outputFormatClass OutputFormat class. |
| @param keyClass key class |
| @param valueClass value class]]> |
| </doc> |
| </method> |
| <method name="setCountersEnabled" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.Job"/> |
| <param name="enabled" type="boolean"/> |
| <doc> |
| <![CDATA[Enables or disables counters for the named outputs. |
| |
| The counters group is the {@link MultipleOutputs} class name. |
| The names of the counters are the same as the named outputs. These |
| counters count the number records written to each output name. |
| By default these counters are disabled. |
| |
| @param job job to enable counters |
| @param enabled indicates if the counters will be enabled or not.]]> |
| </doc> |
| </method> |
| <method name="getCountersEnabled" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <doc> |
| <![CDATA[Returns if the counters for the named outputs are enabled or not. |
| By default these counters are disabled. |
| |
| @param job the job |
| @return TRUE if the counters are enabled, FALSE if they are disabled.]]> |
| </doc> |
| </method> |
| <method name="write" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="namedOutput" type="java.lang.String"/> |
| <param name="key" type="K"/> |
| <param name="value" type="V"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Write key and value to the namedOutput. |
| |
| Output path is a unique file generated for the namedOutput. |
| For example, {namedOutput}-(m|r)-{part-number} |
| |
| @param namedOutput the named output name |
| @param key the key |
| @param value the value]]> |
| </doc> |
| </method> |
| <method name="write" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="namedOutput" type="java.lang.String"/> |
| <param name="key" type="K"/> |
| <param name="value" type="V"/> |
| <param name="baseOutputPath" type="java.lang.String"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Write key and value to baseOutputPath using the namedOutput. |
| |
| @param namedOutput the named output name |
| @param key the key |
| @param value the value |
| @param baseOutputPath base-output path to write the record to. |
| Note: Framework will generate unique filename for the baseOutputPath |
| <b>Warning</b>: when the baseOutputPath is a path that resolves |
| outside of the final job output directory, the directory is created |
| immediately and then persists through subsequent task retries, breaking |
| the concept of output committing.]]> |
| </doc> |
| </method> |
| <method name="write" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="KEYOUT"/> |
| <param name="value" type="VALUEOUT"/> |
| <param name="baseOutputPath" type="java.lang.String"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Write key value to an output file name. |
| |
| Gets the record writer from job's output format. |
| Job's output format should be a FileOutputFormat. |
| |
| @param key the key |
| @param value the value |
| @param baseOutputPath base-output path to write the record to. |
| Note: Framework will generate unique filename for the baseOutputPath |
| <b>Warning</b>: when the baseOutputPath is a path that resolves |
| outside of the final job output directory, the directory is created |
| immediately and then persists through subsequent task retries, breaking |
| the concept of output committing.]]> |
| </doc> |
| </method> |
| <method name="close" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Closes all the opened outputs. |
| |
| This should be called from cleanup method of map/reduce task. |
| If overridden subclasses must invoke <code>super.close()</code> at the |
| end of their <code>close()</code>]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[The MultipleOutputs class simplifies writing output data |
| to multiple outputs |
| |
| <p> |
| Case one: writing to additional outputs other than the job default output. |
| |
| Each additional output, or named output, may be configured with its own |
| <code>OutputFormat</code>, with its own key class and with its own value |
| class. |
| </p> |
| |
| <p> |
| Case two: to write data to different files provided by user |
| </p> |
| |
| <p> |
| MultipleOutputs supports counters, by default they are disabled. The |
| counters group is the {@link MultipleOutputs} class name. The names of the |
| counters are the same as the output name. These count the number records |
| written to each output name. |
| </p> |
| |
| Usage pattern for job submission: |
| <pre> |
| |
| Job job = new Job(); |
| |
| FileInputFormat.setInputPath(job, inDir); |
| FileOutputFormat.setOutputPath(job, outDir); |
| |
| job.setMapperClass(MOMap.class); |
| job.setReducerClass(MOReduce.class); |
| ... |
| |
| // Defines additional single text based output 'text' for the job |
| MultipleOutputs.addNamedOutput(job, "text", TextOutputFormat.class, |
| LongWritable.class, Text.class); |
| |
| // Defines additional sequence-file based output 'sequence' for the job |
| MultipleOutputs.addNamedOutput(job, "seq", |
| SequenceFileOutputFormat.class, |
| LongWritable.class, Text.class); |
| ... |
| |
| job.waitForCompletion(true); |
| ... |
| </pre> |
| <p> |
| Usage in Reducer: |
| <pre> |
| <K, V> String generateFileName(K k, V v) { |
| return k.toString() + "_" + v.toString(); |
| } |
| |
| public class MOReduce extends |
| Reducer<WritableComparable, Writable,WritableComparable, Writable> { |
| private MultipleOutputs mos; |
| public void setup(Context context) { |
| ... |
| mos = new MultipleOutputs(context); |
| } |
| |
| public void reduce(WritableComparable key, Iterator<Writable> values, |
| Context context) |
| throws IOException { |
| ... |
| mos.write("text", , key, new Text("Hello")); |
| mos.write("seq", LongWritable(1), new Text("Bye"), "seq_a"); |
| mos.write("seq", LongWritable(2), key, new Text("Chau"), "seq_b"); |
| mos.write(key, new Text("value"), generateFileName(key, new Text("value"))); |
| ... |
| } |
| |
| public void cleanup(Context) throws IOException { |
| mos.close(); |
| ... |
| } |
| |
| } |
| </pre> |
| |
| <p> |
| When used in conjuction with org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat, |
| MultipleOutputs can mimic the behaviour of MultipleTextOutputFormat and MultipleSequenceFileOutputFormat |
| from the old Hadoop API - ie, output can be written from the Reducer to more than one location. |
| </p> |
| |
| <p> |
| Use <code>MultipleOutputs.write(KEYOUT key, VALUEOUT value, String baseOutputPath)</code> to write key and |
| value to a path specified by <code>baseOutputPath</code>, with no need to specify a named output. |
| <b>Warning</b>: when the baseOutputPath passed to MultipleOutputs.write |
| is a path that resolves outside of the final job output directory, the |
| directory is created immediately and then persists through subsequent |
| task retries, breaking the concept of output committing: |
| </p> |
| |
| <pre> |
| private MultipleOutputs<Text, Text> out; |
| |
| public void setup(Context context) { |
| out = new MultipleOutputs<Text, Text>(context); |
| ... |
| } |
| |
| public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { |
| for (Text t : values) { |
| out.write(key, t, generateFileName(<<i>parameter list...</i>>)); |
| } |
| } |
| |
| protected void cleanup(Context context) throws IOException, InterruptedException { |
| out.close(); |
| } |
| </pre> |
| |
| <p> |
| Use your own code in <code>generateFileName()</code> to create a custom path to your results. |
| '/' characters in <code>baseOutputPath</code> will be translated into directory levels in your file system. |
| Also, append your custom-generated path with "part" or similar, otherwise your output will be -00000, -00001 etc. |
| No call to <code>context.write()</code> is necessary. See example <code>generateFileName()</code> code below. |
| </p> |
| |
| <pre> |
| private String generateFileName(Text k) { |
| // expect Text k in format "Surname|Forename" |
| String[] kStr = k.toString().split("\\|"); |
| |
| String sName = kStr[0]; |
| String fName = kStr[1]; |
| |
| // example for k = Smith|John |
| // output written to /user/hadoop/path/to/output/Smith/John-r-00000 (etc) |
| return sName + "/" + fName; |
| } |
| </pre> |
| |
| <p> |
| Using MultipleOutputs in this way will still create zero-sized default output, eg part-00000. |
| To prevent this use <code>LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);</code> |
| instead of <code>job.setOutputFormatClass(TextOutputFormat.class);</code> in your Hadoop job configuration. |
| </p>]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.output.MultipleOutputs --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.output.NullOutputFormat --> |
| <class name="NullOutputFormat" extends="org.apache.hadoop.mapreduce.OutputFormat" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="NullOutputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getRecordWriter" return="org.apache.hadoop.mapreduce.RecordWriter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| </method> |
| <method name="checkOutputSpecs" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.JobContext"/> |
| </method> |
| <method name="getOutputCommitter" return="org.apache.hadoop.mapreduce.OutputCommitter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| </method> |
| <doc> |
| <![CDATA[Consume all outputs and put them in /dev/null.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.output.NullOutputFormat --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.output.PartialFileOutputCommitter --> |
| <class name="PartialFileOutputCommitter" extends="org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapreduce.lib.output.PartialOutputCommitter"/> |
| <constructor name="PartialFileOutputCommitter" type="org.apache.hadoop.fs.Path, org.apache.hadoop.mapreduce.TaskAttemptContext" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </constructor> |
| <constructor name="PartialFileOutputCommitter" type="org.apache.hadoop.fs.Path, org.apache.hadoop.mapreduce.JobContext" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </constructor> |
| <method name="getCommittedTaskPath" return="org.apache.hadoop.fs.Path" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="appAttemptId" type="int"/> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| </method> |
| <method name="cleanUpPartialOutputForTask" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[An {@link OutputCommitter} that commits files specified |
| in job output directory i.e. ${mapreduce.output.fileoutputformat.outputdir}.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.output.PartialFileOutputCommitter --> |
| <!-- start interface org.apache.hadoop.mapreduce.lib.output.PartialOutputCommitter --> |
| <interface name="PartialOutputCommitter" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <method name="cleanUpPartialOutputForTask" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Remove all previously committed outputs from prior executions of this task. |
| @param context Context for cleaning up previously promoted output. |
| @throws IOException If cleanup fails, then the state of the task my not be |
| well defined.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Interface for an {@link org.apache.hadoop.mapreduce.OutputCommitter} |
| implementing partial commit of task output, as during preemption.]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapreduce.lib.output.PartialOutputCommitter --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.output.PathOutputCommitter --> |
| <class name="PathOutputCommitter" extends="org.apache.hadoop.mapreduce.OutputCommitter" |
| abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="PathOutputCommitter" type="org.apache.hadoop.fs.Path, org.apache.hadoop.mapreduce.TaskAttemptContext" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Constructor for a task attempt. |
| Subclasses should provide a public constructor with this signature. |
| @param outputPath output path: may be null |
| @param context task context |
| @throws IOException IO problem]]> |
| </doc> |
| </constructor> |
| <constructor name="PathOutputCommitter" type="org.apache.hadoop.fs.Path, org.apache.hadoop.mapreduce.JobContext" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Constructor for a job attempt. |
| Subclasses should provide a public constructor with this signature. |
| @param outputPath output path: may be null |
| @param context task context |
| @throws IOException IO problem]]> |
| </doc> |
| </constructor> |
| <method name="getOutputPath" return="org.apache.hadoop.fs.Path" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the final directory where work will be placed once the job |
| is committed. This may be null, in which case, there is no output |
| path to write data to. |
| @return the path where final output of the job should be placed.]]> |
| </doc> |
| </method> |
| <method name="hasOutputPath" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Predicate: is there an output path? |
| @return true if we have an output path set, else false.]]> |
| </doc> |
| </method> |
| <method name="getWorkPath" return="org.apache.hadoop.fs.Path" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get the directory that the task should write results into. |
| Warning: there's no guarantee that this work path is on the same |
| FS as the final output, or that it's visible across machines. |
| May be null. |
| @return the work directory |
| @throws IOException IO problem]]> |
| </doc> |
| </method> |
| <method name="toString" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <doc> |
| <![CDATA[A committer which somehow commits data written to a working directory |
| to the final directory during the commit process. The reference |
| implementation of this is the {@link FileOutputCommitter}. |
| |
| There are two constructors, both of which do nothing but long and |
| validate their arguments.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.output.PathOutputCommitter --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.output.SequenceFileAsBinaryOutputFormat --> |
| <class name="SequenceFileAsBinaryOutputFormat" extends="org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="SequenceFileAsBinaryOutputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="setSequenceFileOutputKeyClass" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.Job"/> |
| <param name="theClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Set the key class for the {@link SequenceFile} |
| <p>This allows the user to specify the key class to be different |
| from the actual class ({@link BytesWritable}) used for writing </p> |
| |
| @param job the {@link Job} to modify |
| @param theClass the SequenceFile output key class.]]> |
| </doc> |
| </method> |
| <method name="setSequenceFileOutputValueClass" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.Job"/> |
| <param name="theClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Set the value class for the {@link SequenceFile} |
| <p>This allows the user to specify the value class to be different |
| from the actual class ({@link BytesWritable}) used for writing </p> |
| |
| @param job the {@link Job} to modify |
| @param theClass the SequenceFile output key class.]]> |
| </doc> |
| </method> |
| <method name="getSequenceFileOutputKeyClass" return="java.lang.Class" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <doc> |
| <![CDATA[Get the key class for the {@link SequenceFile} |
| |
| @return the key class of the {@link SequenceFile}]]> |
| </doc> |
| </method> |
| <method name="getSequenceFileOutputValueClass" return="java.lang.Class" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <doc> |
| <![CDATA[Get the value class for the {@link SequenceFile} |
| |
| @return the value class of the {@link SequenceFile}]]> |
| </doc> |
| </method> |
| <method name="getRecordWriter" return="org.apache.hadoop.mapreduce.RecordWriter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="checkOutputSpecs" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <field name="KEY_CLASS" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="VALUE_CLASS" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[An {@link org.apache.hadoop.mapreduce.OutputFormat} that writes keys, |
| values to {@link SequenceFile}s in binary(raw) format]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.output.SequenceFileAsBinaryOutputFormat --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat --> |
| <class name="SequenceFileOutputFormat" extends="org.apache.hadoop.mapreduce.lib.output.FileOutputFormat" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="SequenceFileOutputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getSequenceWriter" return="org.apache.hadoop.io.SequenceFile.Writer" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <param name="keyClass" type="java.lang.Class"/> |
| <param name="valueClass" type="java.lang.Class"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getRecordWriter" return="org.apache.hadoop.mapreduce.RecordWriter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </method> |
| <method name="getOutputCompressionType" return="org.apache.hadoop.io.SequenceFile.CompressionType" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <doc> |
| <![CDATA[Get the {@link CompressionType} for the output {@link SequenceFile}. |
| @param job the {@link Job} |
| @return the {@link CompressionType} for the output {@link SequenceFile}, |
| defaulting to {@link CompressionType#RECORD}]]> |
| </doc> |
| </method> |
| <method name="setOutputCompressionType" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.Job"/> |
| <param name="style" type="org.apache.hadoop.io.SequenceFile.CompressionType"/> |
| <doc> |
| <![CDATA[Set the {@link CompressionType} for the output {@link SequenceFile}. |
| @param job the {@link Job} to modify |
| @param style the {@link CompressionType} for the output |
| {@link SequenceFile}]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[An {@link OutputFormat} that writes {@link SequenceFile}s.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.output.TextOutputFormat --> |
| <class name="TextOutputFormat" extends="org.apache.hadoop.mapreduce.lib.output.FileOutputFormat" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="TextOutputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getRecordWriter" return="org.apache.hadoop.mapreduce.RecordWriter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </method> |
| <field name="SEPARATOR" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="SEPERATOR" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="false" visibility="public" |
| deprecated="Use {@link #SEPARATOR}"> |
| <doc> |
| <![CDATA[@deprecated Use {@link #SEPARATOR}]]> |
| </doc> |
| </field> |
| <doc> |
| <![CDATA[An {@link OutputFormat} that writes plain text files.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.output.TextOutputFormat --> |
| </package> |
| <package name="org.apache.hadoop.mapreduce.lib.partition"> |
| <!-- start class org.apache.hadoop.mapreduce.lib.partition.BinaryPartitioner --> |
| <class name="BinaryPartitioner" extends="org.apache.hadoop.mapreduce.Partitioner" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.conf.Configurable"/> |
| <constructor name="BinaryPartitioner" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="setOffsets" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <param name="left" type="int"/> |
| <param name="right" type="int"/> |
| <doc> |
| <![CDATA[Set the subarray to be used for partitioning to |
| <code>bytes[left:(right+1)]</code> in Python syntax. |
| |
| @param conf configuration object |
| @param left left Python-style offset |
| @param right right Python-style offset]]> |
| </doc> |
| </method> |
| <method name="setLeftOffset" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <param name="offset" type="int"/> |
| <doc> |
| <![CDATA[Set the subarray to be used for partitioning to |
| <code>bytes[offset:]</code> in Python syntax. |
| |
| @param conf configuration object |
| @param offset left Python-style offset]]> |
| </doc> |
| </method> |
| <method name="setRightOffset" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <param name="offset" type="int"/> |
| <doc> |
| <![CDATA[Set the subarray to be used for partitioning to |
| <code>bytes[:(offset+1)]</code> in Python syntax. |
| |
| @param conf configuration object |
| @param offset right Python-style offset]]> |
| </doc> |
| </method> |
| <method name="setConf" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| </method> |
| <method name="getConf" return="org.apache.hadoop.conf.Configuration" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getPartition" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="org.apache.hadoop.io.BinaryComparable"/> |
| <param name="value" type="V"/> |
| <param name="numPartitions" type="int"/> |
| <doc> |
| <![CDATA[Use (the specified slice of the array returned by) |
| {@link BinaryComparable#getBytes()} to partition.]]> |
| </doc> |
| </method> |
| <field name="LEFT_OFFSET_PROPERTY_NAME" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="RIGHT_OFFSET_PROPERTY_NAME" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[<p>Partition {@link BinaryComparable} keys using a configurable part of |
| the bytes array returned by {@link BinaryComparable#getBytes()}.</p> |
| |
| <p>The subarray to be used for the partitioning can be defined by means |
| of the following properties: |
| <ul> |
| <li> |
| <i>mapreduce.partition.binarypartitioner.left.offset</i>: |
| left offset in array (0 by default) |
| </li> |
| <li> |
| <i>mapreduce.partition.binarypartitioner.right.offset</i>: |
| right offset in array (-1 by default) |
| </li> |
| </ul> |
| Like in Python, both negative and positive offsets are allowed, but |
| the meaning is slightly different. In case of an array of length 5, |
| for instance, the possible offsets are: |
| <pre><code> |
| +---+---+---+---+---+ |
| | B | B | B | B | B | |
| +---+---+---+---+---+ |
| 0 1 2 3 4 |
| -5 -4 -3 -2 -1 |
| </code></pre> |
| The first row of numbers gives the position of the offsets 0...5 in |
| the array; the second row gives the corresponding negative offsets. |
| Contrary to Python, the specified subarray has byte <code>i</code> |
| and <code>j</code> as first and last element, repectively, when |
| <code>i</code> and <code>j</code> are the left and right offset. |
| |
| <p>For Hadoop programs written in Java, it is advisable to use one of |
| the following static convenience methods for setting the offsets: |
| <ul> |
| <li>{@link #setOffsets}</li> |
| <li>{@link #setLeftOffset}</li> |
| <li>{@link #setRightOffset}</li> |
| </ul>]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.partition.BinaryPartitioner --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.partition.HashPartitioner --> |
| <class name="HashPartitioner" extends="org.apache.hadoop.mapreduce.Partitioner" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="HashPartitioner" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getPartition" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="K"/> |
| <param name="value" type="V"/> |
| <param name="numReduceTasks" type="int"/> |
| <doc> |
| <![CDATA[Use {@link Object#hashCode()} to partition.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Partition keys by their {@link Object#hashCode()}.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.partition.HashPartitioner --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.partition.InputSampler --> |
| <class name="InputSampler" extends="org.apache.hadoop.conf.Configured" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.util.Tool"/> |
| <constructor name="InputSampler" type="org.apache.hadoop.conf.Configuration" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="writePartitionFile" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.Job"/> |
| <param name="sampler" type="org.apache.hadoop.mapreduce.lib.partition.InputSampler.Sampler"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Write a partition file for the given job, using the Sampler provided. |
| Queries the sampler for a sample keyset, sorts by the output key |
| comparator, selects the keys for each rank, and writes to the destination |
| returned from {@link TotalOrderPartitioner#getPartitionFile}.]]> |
| </doc> |
| </method> |
| <method name="run" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="args" type="java.lang.String[]"/> |
| <exception name="Exception" type="java.lang.Exception"/> |
| <doc> |
| <![CDATA[Driver for InputSampler from the command line. |
| Configures a JobConf instance and calls {@link #writePartitionFile}.]]> |
| </doc> |
| </method> |
| <method name="main" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="args" type="java.lang.String[]"/> |
| <exception name="Exception" type="java.lang.Exception"/> |
| </method> |
| <doc> |
| <![CDATA[Utility for collecting samples and writing a partition file for |
| {@link TotalOrderPartitioner}.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.partition.InputSampler --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.partition.KeyFieldBasedComparator --> |
| <class name="KeyFieldBasedComparator" extends="org.apache.hadoop.io.WritableComparator" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.conf.Configurable"/> |
| <constructor name="KeyFieldBasedComparator" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="setConf" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| </method> |
| <method name="getConf" return="org.apache.hadoop.conf.Configuration" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="compare" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="b1" type="byte[]"/> |
| <param name="s1" type="int"/> |
| <param name="l1" type="int"/> |
| <param name="b2" type="byte[]"/> |
| <param name="s2" type="int"/> |
| <param name="l2" type="int"/> |
| </method> |
| <method name="setKeyFieldComparatorOptions" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.Job"/> |
| <param name="keySpec" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the {@link KeyFieldBasedComparator} options used to compare keys. |
| |
| @param keySpec the key specification of the form -k pos1[,pos2], where, |
| pos is of the form f[.c][opts], where f is the number |
| of the key field to use, and c is the number of the first character from |
| the beginning of the field. Fields and character posns are numbered |
| starting with 1; a character position of zero in pos2 indicates the |
| field's last character. If '.c' is omitted from pos1, it defaults to 1 |
| (the beginning of the field); if omitted from pos2, it defaults to 0 |
| (the end of the field). opts are ordering options. The supported options |
| are: |
| -n, (Sort numerically) |
| -r, (Reverse the result of comparison)]]> |
| </doc> |
| </method> |
| <method name="getKeyFieldComparatorOption" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <doc> |
| <![CDATA[Get the {@link KeyFieldBasedComparator} options]]> |
| </doc> |
| </method> |
| <field name="COMPARATOR_OPTIONS" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[This comparator implementation provides a subset of the features provided |
| by the Unix/GNU Sort. In particular, the supported features are: |
| -n, (Sort numerically) |
| -r, (Reverse the result of comparison) |
| -k pos1[,pos2], where pos is of the form f[.c][opts], where f is the number |
| of the field to use, and c is the number of the first character from the |
| beginning of the field. Fields and character posns are numbered starting |
| with 1; a character position of zero in pos2 indicates the field's last |
| character. If '.c' is omitted from pos1, it defaults to 1 (the beginning |
| of the field); if omitted from pos2, it defaults to 0 (the end of the |
| field). opts are ordering options (any of 'nr' as described above). |
| We assume that the fields in the key are separated by |
| {@link JobContext#MAP_OUTPUT_KEY_FIELD_SEPARATOR}.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.partition.KeyFieldBasedComparator --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.partition.KeyFieldBasedPartitioner --> |
| <class name="KeyFieldBasedPartitioner" extends="org.apache.hadoop.mapreduce.Partitioner" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.conf.Configurable"/> |
| <constructor name="KeyFieldBasedPartitioner" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="setConf" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| </method> |
| <method name="getConf" return="org.apache.hadoop.conf.Configuration" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getPartition" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="K2"/> |
| <param name="value" type="V2"/> |
| <param name="numReduceTasks" type="int"/> |
| </method> |
| <method name="hashCode" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="b" type="byte[]"/> |
| <param name="start" type="int"/> |
| <param name="end" type="int"/> |
| <param name="currentHash" type="int"/> |
| </method> |
| <method name="getPartition" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="hash" type="int"/> |
| <param name="numReduceTasks" type="int"/> |
| </method> |
| <method name="setKeyFieldPartitionerOptions" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.Job"/> |
| <param name="keySpec" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the {@link KeyFieldBasedPartitioner} options used for |
| {@link Partitioner} |
| |
| @param keySpec the key specification of the form -k pos1[,pos2], where, |
| pos is of the form f[.c][opts], where f is the number |
| of the key field to use, and c is the number of the first character from |
| the beginning of the field. Fields and character posns are numbered |
| starting with 1; a character position of zero in pos2 indicates the |
| field's last character. If '.c' is omitted from pos1, it defaults to 1 |
| (the beginning of the field); if omitted from pos2, it defaults to 0 |
| (the end of the field).]]> |
| </doc> |
| </method> |
| <method name="getKeyFieldPartitionerOption" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <doc> |
| <![CDATA[Get the {@link KeyFieldBasedPartitioner} options]]> |
| </doc> |
| </method> |
| <field name="PARTITIONER_OPTIONS" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[Defines a way to partition keys based on certain key fields (also see |
| {@link KeyFieldBasedComparator}. |
| The key specification supported is of the form -k pos1[,pos2], where, |
| pos is of the form f[.c][opts], where f is the number |
| of the key field to use, and c is the number of the first character from |
| the beginning of the field. Fields and character posns are numbered |
| starting with 1; a character position of zero in pos2 indicates the |
| field's last character. If '.c' is omitted from pos1, it defaults to 1 |
| (the beginning of the field); if omitted from pos2, it defaults to 0 |
| (the end of the field).]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.partition.KeyFieldBasedPartitioner --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.partition.RehashPartitioner --> |
| <class name="RehashPartitioner" extends="org.apache.hadoop.mapreduce.Partitioner" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="RehashPartitioner" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getPartition" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="K"/> |
| <param name="value" type="V"/> |
| <param name="numReduceTasks" type="int"/> |
| <doc> |
| <![CDATA[Rehash {@link Object#hashCode()} to partition.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[This partitioner rehashes values returned by {@link Object#hashCode()} |
| to get smoother distribution between partitions which may improve |
| reduce reduce time in some cases and should harm things in no cases. |
| This partitioner is suggested with Integer and Long keys with simple |
| patterns in their distributions. |
| @since 2.0.3]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.partition.RehashPartitioner --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner --> |
| <class name="TotalOrderPartitioner" extends="org.apache.hadoop.mapreduce.Partitioner" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.conf.Configurable"/> |
| <constructor name="TotalOrderPartitioner" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="setConf" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <doc> |
| <![CDATA[Read in the partition file and build indexing data structures. |
| If the keytype is {@link org.apache.hadoop.io.BinaryComparable} and |
| <tt>total.order.partitioner.natural.order</tt> is not false, a trie |
| of the first <tt>total.order.partitioner.max.trie.depth</tt>(2) + 1 bytes |
| will be built. Otherwise, keys will be located using a binary search of |
| the partition keyset using the {@link org.apache.hadoop.io.RawComparator} |
| defined for this job. The input file must be sorted with the same |
| comparator and contain {@link Job#getNumReduceTasks()} - 1 keys.]]> |
| </doc> |
| </method> |
| <method name="getConf" return="org.apache.hadoop.conf.Configuration" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getPartition" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="K"/> |
| <param name="value" type="V"/> |
| <param name="numPartitions" type="int"/> |
| </method> |
| <method name="setPartitionFile" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <param name="p" type="org.apache.hadoop.fs.Path"/> |
| <doc> |
| <![CDATA[Set the path to the SequenceFile storing the sorted partition keyset. |
| It must be the case that for <tt>R</tt> reduces, there are <tt>R-1</tt> |
| keys in the SequenceFile.]]> |
| </doc> |
| </method> |
| <method name="getPartitionFile" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <doc> |
| <![CDATA[Get the path to the SequenceFile storing the sorted partition keyset. |
| @see #setPartitionFile(Configuration, Path)]]> |
| </doc> |
| </method> |
| <field name="DEFAULT_PATH" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="PARTITIONER_PATH" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="MAX_TRIE_DEPTH" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="NATURAL_ORDER" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[Partitioner effecting a total order by reading split points from |
| an externally generated source.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner --> |
| </package> |
| <package name="org.apache.hadoop.mapreduce.lib.reduce"> |
| <!-- start class org.apache.hadoop.mapreduce.lib.reduce.IntSumReducer --> |
| <class name="IntSumReducer" extends="org.apache.hadoop.mapreduce.Reducer" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="IntSumReducer" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="reduce" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="Key"/> |
| <param name="values" type="java.lang.Iterable"/> |
| <param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </method> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.reduce.IntSumReducer --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.reduce.LongSumReducer --> |
| <class name="LongSumReducer" extends="org.apache.hadoop.mapreduce.Reducer" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="LongSumReducer" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="reduce" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="KEY"/> |
| <param name="values" type="java.lang.Iterable"/> |
| <param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </method> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.reduce.LongSumReducer --> |
| <!-- start class org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer --> |
| <class name="WrappedReducer" extends="org.apache.hadoop.mapreduce.Reducer" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="WrappedReducer" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getReducerContext" return="org.apache.hadoop.mapreduce.Reducer.Context" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="reduceContext" type="org.apache.hadoop.mapreduce.ReduceContext"/> |
| <doc> |
| <![CDATA[A a wrapped {@link Reducer.Context} for custom implementations. |
| @param reduceContext <code>ReduceContext</code> to be wrapped |
| @return a wrapped <code>Reducer.Context</code> for custom implementations]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[A {@link Reducer} which wraps a given one to allow for custom |
| {@link Reducer.Context} implementations.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer --> |
| </package> |
| <package name="org.apache.hadoop.mapreduce.security"> |
| <!-- start class org.apache.hadoop.mapreduce.security.TokenCache --> |
| <class name="TokenCache" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="TokenCache" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getSecretKey" return="byte[]" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="credentials" type="org.apache.hadoop.security.Credentials"/> |
| <param name="alias" type="org.apache.hadoop.io.Text"/> |
| <doc> |
| <![CDATA[auxiliary method to get user's secret keys.. |
| @param alias |
| @return secret key from the storage]]> |
| </doc> |
| </method> |
| <method name="obtainTokensForNamenodes" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="credentials" type="org.apache.hadoop.security.Credentials"/> |
| <param name="ps" type="org.apache.hadoop.fs.Path[]"/> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Convenience method to obtain delegation tokens from namenodes |
| corresponding to the paths passed. |
| @param credentials |
| @param ps array of paths |
| @param conf configuration |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="cleanUpTokenReferral" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <doc> |
| <![CDATA[Remove jobtoken referrals which don't make sense in the context |
| of the task execution. |
| |
| @param conf]]> |
| </doc> |
| </method> |
| <field name="JOB_TOKEN_HDFS_FILE" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[file name used on HDFS for generated job token]]> |
| </doc> |
| </field> |
| <field name="JOB_TOKENS_FILENAME" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[conf setting for job tokens cache file name]]> |
| </doc> |
| </field> |
| <doc> |
| <![CDATA[This class provides user facing APIs for transferring secrets from |
| the job client to the tasks. |
| The secrets can be stored just before submission of jobs and read during |
| the task execution.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.security.TokenCache --> |
| </package> |
| <package name="org.apache.hadoop.mapreduce.server.jobtracker"> |
| </package> |
| <package name="org.apache.hadoop.mapreduce.server.tasktracker"> |
| </package> |
| <package name="org.apache.hadoop.mapreduce.task.annotation"> |
| <!-- start class org.apache.hadoop.mapreduce.task.annotation.Checkpointable --> |
| <class name="Checkpointable" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="java.lang.annotation.Annotation"/> |
| <doc> |
| <![CDATA[Contract representing to the framework that the task can be safely preempted |
| and restarted between invocations of the user-defined function. |
| |
| This is often true when the result of a function does not rely on state |
| derived from previous elements in the record stream, but the guarantee is |
| left as an exercise to the implementor.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.task.annotation.Checkpointable --> |
| </package> |
| <package name="org.apache.hadoop.mapreduce.tools"> |
| <!-- start class org.apache.hadoop.mapreduce.tools.CLI --> |
| <class name="CLI" extends="org.apache.hadoop.conf.Configured" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.util.Tool"/> |
| <constructor name="CLI" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <constructor name="CLI" type="org.apache.hadoop.conf.Configuration" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="run" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="argv" type="java.lang.String[]"/> |
| <exception name="Exception" type="java.lang.Exception"/> |
| </method> |
| <method name="getCounter" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="counters" type="org.apache.hadoop.mapreduce.Counters"/> |
| <param name="counterGroupName" type="java.lang.String"/> |
| <param name="counterName" type="java.lang.String"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getTaskLogURL" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="taskId" type="org.apache.hadoop.mapreduce.TaskAttemptID"/> |
| <param name="baseUrl" type="java.lang.String"/> |
| </method> |
| <method name="displayTasks" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapreduce.Job"/> |
| <param name="type" type="java.lang.String"/> |
| <param name="state" type="java.lang.String"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Display the information about a job's tasks, of a particular type and |
| in a particular state |
| |
| @param job the job |
| @param type the type of the task (map/reduce/setup/cleanup) |
| @param state the state of the task |
| (pending/running/completed/failed/killed) |
| @throws IOException when there is an error communicating with the master |
| @throws InterruptedException |
| @throws IllegalArgumentException if an invalid type/state is passed]]> |
| </doc> |
| </method> |
| <method name="displayJobList" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobs" type="org.apache.hadoop.mapreduce.JobStatus[]"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| </method> |
| <method name="main" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="argv" type="java.lang.String[]"/> |
| <exception name="Exception" type="java.lang.Exception"/> |
| </method> |
| <field name="cluster" type="org.apache.hadoop.mapreduce.Cluster" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <field name="headerPattern" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="dataPattern" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[Interprets the map reduce cli options]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapreduce.tools.CLI --> |
| </package> |
| <package name="org.apache.hadoop.mapreduce.v2"> |
| </package> |
| |
| </api> |