| <?xml version="1.0" encoding="iso-8859-1" standalone="no"?> |
| <!-- Generated by the JDiff Javadoc doclet --> |
| <!-- (http://www.jdiff.org) --> |
| <!-- on Sun May 31 20:42:50 PDT 2009 --> |
| |
| <api |
| xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' |
| xsi:noNamespaceSchemaLocation='api.xsd' |
| name="hadoopp-mapred 0.20.0" |
| jdversion="1.0.9"> |
| |
| <!-- Command line arguments = -doclet jdiff.JDiff -docletpath /home/gkesavan/release-0.20.0/build/ivy/lib/Hadoop/jdiff/jdiff-1.0.9.jar:/home/gkesavan/release-0.20.0/build/ivy/lib/Hadoop/jdiff/xerces-1.4.4.jar -classpath /home/gkesavan/release-0.20.0/build/classes:/home/gkesavan/release-0.20.0/lib/commons-cli-2.0-SNAPSHOT.jar:/home/gkesavan/release-0.20.0/lib/hsqldb-1.8.0.10.jar:/home/gkesavan/release-0.20.0/lib/jsp-2.1/jsp-2.1.jar:/home/gkesavan/release-0.20.0/lib/jsp-2.1/jsp-api-2.1.jar:/home/gkesavan/release-0.20.0/lib/kfs-0.2.2.jar:/home/gkesavan/release-0.20.0/conf:/home/gkesavan/.ivy2/cache/commons-logging/commons-logging/jars/commons-logging-1.0.4.jar:/home/gkesavan/.ivy2/cache/log4j/log4j/jars/log4j-1.2.15.jar:/home/gkesavan/.ivy2/cache/commons-httpclient/commons-httpclient/jars/commons-httpclient-3.0.1.jar:/home/gkesavan/.ivy2/cache/commons-codec/commons-codec/jars/commons-codec-1.3.jar:/home/gkesavan/.ivy2/cache/xmlenc/xmlenc/jars/xmlenc-0.52.jar:/home/gkesavan/.ivy2/cache/net.java.dev.jets3t/jets3t/jars/jets3t-0.6.1.jar:/home/gkesavan/.ivy2/cache/commons-net/commons-net/jars/commons-net-1.4.1.jar:/home/gkesavan/.ivy2/cache/org.mortbay.jetty/servlet-api-2.5/jars/servlet-api-2.5-6.1.14.jar:/home/gkesavan/.ivy2/cache/oro/oro/jars/oro-2.0.8.jar:/home/gkesavan/.ivy2/cache/org.mortbay.jetty/jetty/jars/jetty-6.1.14.jar:/home/gkesavan/.ivy2/cache/org.mortbay.jetty/jetty-util/jars/jetty-util-6.1.14.jar:/home/gkesavan/.ivy2/cache/tomcat/jasper-runtime/jars/jasper-runtime-5.5.12.jar:/home/gkesavan/.ivy2/cache/tomcat/jasper-compiler/jars/jasper-compiler-5.5.12.jar:/home/gkesavan/.ivy2/cache/commons-el/commons-el/jars/commons-el-1.0.jar:/home/gkesavan/.ivy2/cache/junit/junit/jars/junit-3.8.1.jar:/home/gkesavan/.ivy2/cache/commons-logging/commons-logging-api/jars/commons-logging-api-1.0.4.jar:/home/gkesavan/.ivy2/cache/org.slf4j/slf4j-api/jars/slf4j-api-1.4.3.jar:/home/gkesavan/.ivy2/cache/org.eclipse.jdt/core/jars/core-3.1.1.jar:/home/gkesavan/.ivy2/cache/org.slf4j/slf4j-log4j12/jars/slf4j-log4j12-1.4.3.jar:/home/gkesavan/.ivy2/cache/jdiff/jdiff/jars/jdiff-1.0.9.jar:/home/gkesavan/.ivy2/cache/xerces/xerces/jars/xerces-1.4.4.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-launcher.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-apache-resolver.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-starteam.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-netrexx.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-testutil.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-jai.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-swing.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-jmf.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-apache-bcel.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-jdepend.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-jsch.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-apache-bsf.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-antlr.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-weblogic.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-junit.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-apache-log4j.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/xercesImpl.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-apache-oro.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-trax.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-nodeps.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-commons-logging.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-apache-regexp.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-stylebook.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-javamail.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-commons-net.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/xml-apis.jar:/home/gkesavan/tools/jdk1.6.0_07-32bit/lib/tools.jar -sourcepath /home/gkesavan/release-0.20.0/src/mapred:/home/gkesavan/release-0.20.0/src/tools -apidir /home/gkesavan/release-0.20.0/lib/jdiff -apiname hadoop-mapred 0.20.0 --> |
| <package name="org.apache.hadoop.mapred"> |
| <!-- start class org.apache.hadoop.mapred.ClusterStatus --> |
| <class name="ClusterStatus" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.io.Writable"/> |
| <method name="getTaskTrackers" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the number of task trackers in the cluster. |
| |
| @return the number of task trackers in the cluster.]]> |
| </doc> |
| </method> |
| <method name="getActiveTrackerNames" return="java.util.Collection" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the names of task trackers in the cluster. |
| |
| @return the active task trackers in the cluster.]]> |
| </doc> |
| </method> |
| <method name="getBlacklistedTrackerNames" return="java.util.Collection" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the names of task trackers in the cluster. |
| |
| @return the blacklisted task trackers in the cluster.]]> |
| </doc> |
| </method> |
| <method name="getBlacklistedTrackers" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the number of blacklisted task trackers in the cluster. |
| |
| @return the number of blacklisted task trackers in the cluster.]]> |
| </doc> |
| </method> |
| <method name="getTTExpiryInterval" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the tasktracker expiry interval for the cluster |
| @return the expiry interval in msec]]> |
| </doc> |
| </method> |
| <method name="getMapTasks" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the number of currently running map tasks in the cluster. |
| |
| @return the number of currently running map tasks in the cluster.]]> |
| </doc> |
| </method> |
| <method name="getReduceTasks" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the number of currently running reduce tasks in the cluster. |
| |
| @return the number of currently running reduce tasks in the cluster.]]> |
| </doc> |
| </method> |
| <method name="getMaxMapTasks" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the maximum capacity for running map tasks in the cluster. |
| |
| @return the maximum capacity for running map tasks in the cluster.]]> |
| </doc> |
| </method> |
| <method name="getMaxReduceTasks" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the maximum capacity for running reduce tasks in the cluster. |
| |
| @return the maximum capacity for running reduce tasks in the cluster.]]> |
| </doc> |
| </method> |
| <method name="getJobTrackerState" return="org.apache.hadoop.mapred.JobTracker.State" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the current state of the <code>JobTracker</code>, |
| as {@link JobTracker.State} |
| |
| @return the current state of the <code>JobTracker</code>.]]> |
| </doc> |
| </method> |
| <method name="getUsedMemory" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the total heap memory used by the <code>JobTracker</code> |
| |
| @return the size of heap memory used by the <code>JobTracker</code>]]> |
| </doc> |
| </method> |
| <method name="getMaxMemory" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the maximum configured heap memory that can be used by the <code>JobTracker</code> |
| |
| @return the configured size of max heap memory that can be used by the <code>JobTracker</code>]]> |
| </doc> |
| </method> |
| <method name="write" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="out" type="java.io.DataOutput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="readFields" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="in" type="java.io.DataInput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[Status information on the current state of the Map-Reduce cluster. |
| |
| <p><code>ClusterStatus</code> provides clients with information such as: |
| <ol> |
| <li> |
| Size of the cluster. |
| </li> |
| <li> |
| Name of the trackers. |
| </li> |
| <li> |
| Task capacity of the cluster. |
| </li> |
| <li> |
| The number of currently running map & reduce tasks. |
| </li> |
| <li> |
| State of the <code>JobTracker</code>. |
| </li> |
| </ol></p> |
| |
| <p>Clients can query for the latest <code>ClusterStatus</code>, via |
| {@link JobClient#getClusterStatus()}.</p> |
| |
| @see JobClient]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.ClusterStatus --> |
| <!-- start class org.apache.hadoop.mapred.Counters --> |
| <class name="Counters" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="Use {@link org.apache.hadoop.mapreduce.Counters} instead."> |
| <implements name="org.apache.hadoop.io.Writable"/> |
| <implements name="java.lang.Iterable"/> |
| <constructor name="Counters" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getGroupNames" return="java.util.Collection" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns the names of all counter classes. |
| @return Set of counter names.]]> |
| </doc> |
| </method> |
| <method name="iterator" return="java.util.Iterator" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getGroup" return="org.apache.hadoop.mapred.Counters.Group" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="groupName" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Returns the named counter group, or an empty group if there is none |
| with the specified name.]]> |
| </doc> |
| </method> |
| <method name="findCounter" return="org.apache.hadoop.mapred.Counters.Counter" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="java.lang.Enum"/> |
| <doc> |
| <![CDATA[Find the counter for the given enum. The same enum will always return the |
| same counter. |
| @param key the counter key |
| @return the matching counter object]]> |
| </doc> |
| </method> |
| <method name="findCounter" return="org.apache.hadoop.mapred.Counters.Counter" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="group" type="java.lang.String"/> |
| <param name="name" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Find a counter given the group and the name. |
| @param group the name of the group |
| @param name the internal name of the counter |
| @return the counter for that name]]> |
| </doc> |
| </method> |
| <method name="findCounter" return="org.apache.hadoop.mapred.Counters.Counter" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="deprecated, no comment"> |
| <param name="group" type="java.lang.String"/> |
| <param name="id" type="int"/> |
| <param name="name" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Find a counter by using strings |
| @param group the name of the group |
| @param id the id of the counter within the group (0 to N-1) |
| @param name the internal name of the counter |
| @return the counter for that name |
| @deprecated]]> |
| </doc> |
| </method> |
| <method name="incrCounter" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="java.lang.Enum"/> |
| <param name="amount" type="long"/> |
| <doc> |
| <![CDATA[Increments the specified counter by the specified amount, creating it if |
| it didn't already exist. |
| @param key identifies a counter |
| @param amount amount by which counter is to be incremented]]> |
| </doc> |
| </method> |
| <method name="incrCounter" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="group" type="java.lang.String"/> |
| <param name="counter" type="java.lang.String"/> |
| <param name="amount" type="long"/> |
| <doc> |
| <![CDATA[Increments the specified counter by the specified amount, creating it if |
| it didn't already exist. |
| @param group the name of the group |
| @param counter the internal name of the counter |
| @param amount amount by which counter is to be incremented]]> |
| </doc> |
| </method> |
| <method name="getCounter" return="long" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="java.lang.Enum"/> |
| <doc> |
| <![CDATA[Returns current value of the specified counter, or 0 if the counter |
| does not exist.]]> |
| </doc> |
| </method> |
| <method name="incrAllCounters" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="other" type="org.apache.hadoop.mapred.Counters"/> |
| <doc> |
| <![CDATA[Increments multiple counters by their amounts in another Counters |
| instance. |
| @param other the other Counters instance]]> |
| </doc> |
| </method> |
| <method name="sum" return="org.apache.hadoop.mapred.Counters" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="a" type="org.apache.hadoop.mapred.Counters"/> |
| <param name="b" type="org.apache.hadoop.mapred.Counters"/> |
| <doc> |
| <![CDATA[Convenience method for computing the sum of two sets of counters.]]> |
| </doc> |
| </method> |
| <method name="size" return="int" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns the total number of counters, by summing the number of counters |
| in each group.]]> |
| </doc> |
| </method> |
| <method name="write" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="out" type="java.io.DataOutput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Write the set of groups. |
| The external format is: |
| #groups (groupName group)* |
| |
| i.e. the number of groups followed by 0 or more groups, where each |
| group is of the form: |
| |
| groupDisplayName #counters (false | true counter)* |
| |
| where each counter is of the form: |
| |
| name (false | true displayName) value]]> |
| </doc> |
| </method> |
| <method name="readFields" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="in" type="java.io.DataInput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Read a set of groups.]]> |
| </doc> |
| </method> |
| <method name="log" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="log" type="org.apache.commons.logging.Log"/> |
| <doc> |
| <![CDATA[Logs the current counter values. |
| @param log The log to use.]]> |
| </doc> |
| </method> |
| <method name="toString" return="java.lang.String" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Return textual representation of the counter values.]]> |
| </doc> |
| </method> |
| <method name="makeCompactString" return="java.lang.String" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Convert a counters object into a single line that is easy to parse. |
| @return the string with "name=value" for each counter and separated by ","]]> |
| </doc> |
| </method> |
| <method name="makeEscapedCompactString" return="java.lang.String" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Represent the counter in a textual format that can be converted back to |
| its object form |
| @return the string in the following format |
| {(groupname)(group-displayname)[(countername)(displayname)(value)][][]}{}{}]]> |
| </doc> |
| </method> |
| <method name="fromEscapedCompactString" return="org.apache.hadoop.mapred.Counters" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="compactString" type="java.lang.String"/> |
| <exception name="ParseException" type="java.text.ParseException"/> |
| <doc> |
| <![CDATA[Convert a stringified counter representation into a counter object. Note |
| that the counter can be recovered if its stringified using |
| {@link #makeEscapedCompactString()}. |
| @return a Counter]]> |
| </doc> |
| </method> |
| <method name="hashCode" return="int" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="equals" return="boolean" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="obj" type="java.lang.Object"/> |
| </method> |
| <doc> |
| <![CDATA[A set of named counters. |
| |
| <p><code>Counters</code> represent global counters, defined either by the |
| Map-Reduce framework or applications. Each <code>Counter</code> can be of |
| any {@link Enum} type.</p> |
| |
| <p><code>Counters</code> are bunched into {@link Group}s, each comprising of |
| counters from a particular <code>Enum</code> class. |
| @deprecated Use {@link org.apache.hadoop.mapreduce.Counters} instead.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.Counters --> |
| <!-- start class org.apache.hadoop.mapred.Counters.Counter --> |
| <class name="Counters.Counter" extends="org.apache.hadoop.mapreduce.Counter" |
| abstract="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <method name="setDisplayName" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="newName" type="java.lang.String"/> |
| </method> |
| <method name="makeEscapedCompactString" return="java.lang.String" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns the compact stringified version of the counter in the format |
| [(actual-name)(display-name)(value)]]]> |
| </doc> |
| </method> |
| <method name="getCounter" return="long" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[What is the current value of this counter? |
| @return the current value]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[A counter record, comprising its name and value.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.Counters.Counter --> |
| <!-- start class org.apache.hadoop.mapred.Counters.Group --> |
| <class name="Counters.Group" extends="java.lang.Object" |
| abstract="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.io.Writable"/> |
| <implements name="java.lang.Iterable"/> |
| <method name="getName" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns raw name of the group. This is the name of the enum class |
| for this group of counters.]]> |
| </doc> |
| </method> |
| <method name="getDisplayName" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns localized name of the group. This is the same as getName() by |
| default, but different if an appropriate ResourceBundle is found.]]> |
| </doc> |
| </method> |
| <method name="setDisplayName" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="displayName" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the display name]]> |
| </doc> |
| </method> |
| <method name="makeEscapedCompactString" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns the compact stringified version of the group in the format |
| {(actual-name)(display-name)(value)[][][]} where [] are compact strings for the |
| counters within.]]> |
| </doc> |
| </method> |
| <method name="hashCode" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="equals" return="boolean" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="obj" type="java.lang.Object"/> |
| <doc> |
| <![CDATA[Checks for (content) equality of Groups]]> |
| </doc> |
| </method> |
| <method name="getCounter" return="long" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="counterName" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Returns the value of the specified counter, or 0 if the counter does |
| not exist.]]> |
| </doc> |
| </method> |
| <method name="getCounter" return="org.apache.hadoop.mapred.Counters.Counter" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="use {@link #getCounter(String)} instead"> |
| <param name="id" type="int"/> |
| <param name="name" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Get the counter for the given id and create it if it doesn't exist. |
| @param id the numeric id of the counter within the group |
| @param name the internal counter name |
| @return the counter |
| @deprecated use {@link #getCounter(String)} instead]]> |
| </doc> |
| </method> |
| <method name="getCounterForName" return="org.apache.hadoop.mapred.Counters.Counter" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="name" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Get the counter for the given name and create it if it doesn't exist. |
| @param name the internal counter name |
| @return the counter]]> |
| </doc> |
| </method> |
| <method name="size" return="int" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns the number of counters in this group.]]> |
| </doc> |
| </method> |
| <method name="write" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="out" type="java.io.DataOutput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="readFields" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="in" type="java.io.DataInput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="iterator" return="java.util.Iterator" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <doc> |
| <![CDATA[<code>Group</code> of counters, comprising of counters from a particular |
| counter {@link Enum} class. |
| |
| <p><code>Group</code>handles localization of the class name and the |
| counter names.</p>]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.Counters.Group --> |
| <!-- start class org.apache.hadoop.mapred.DefaultJobHistoryParser --> |
| <class name="DefaultJobHistoryParser" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="DefaultJobHistoryParser" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="parseJobTasks" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobHistoryFile" type="java.lang.String"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobHistory.JobInfo"/> |
| <param name="fs" type="org.apache.hadoop.fs.FileSystem"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Populates a JobInfo object from the job's history log file. |
| @param jobHistoryFile history file for this job. |
| @param job a precreated JobInfo object, should be non-null. |
| @param fs FileSystem where historyFile is present. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Default parser for job history files. It creates object model from |
| job history file.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.DefaultJobHistoryParser --> |
| <!-- start class org.apache.hadoop.mapred.FileAlreadyExistsException --> |
| <class name="FileAlreadyExistsException" extends="java.io.IOException" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="FileAlreadyExistsException" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <constructor name="FileAlreadyExistsException" type="java.lang.String" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <doc> |
| <![CDATA[Used when target file already exists for any operation and |
| is not configured to be overwritten.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.FileAlreadyExistsException --> |
| <!-- start class org.apache.hadoop.mapred.FileInputFormat --> |
| <class name="FileInputFormat" extends="java.lang.Object" |
| abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="Use {@link org.apache.hadoop.mapreduce.lib.input.FileInputFormat} |
| instead."> |
| <implements name="org.apache.hadoop.mapred.InputFormat"/> |
| <constructor name="FileInputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="setMinSplitSize" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="minSplitSize" type="long"/> |
| </method> |
| <method name="isSplitable" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="fs" type="org.apache.hadoop.fs.FileSystem"/> |
| <param name="filename" type="org.apache.hadoop.fs.Path"/> |
| <doc> |
| <![CDATA[Is the given filename splitable? Usually, true, but if the file is |
| stream compressed, it will not be. |
| |
| <code>FileInputFormat</code> implementations can override this and return |
| <code>false</code> to ensure that individual input files are never split-up |
| so that {@link Mapper}s process entire files. |
| |
| @param fs the file system that the file is on |
| @param filename the file name to check |
| @return is this file splitable?]]> |
| </doc> |
| </method> |
| <method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="split" type="org.apache.hadoop.mapred.InputSplit"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="setInputPathFilter" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="filter" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Set a PathFilter to be applied to the input paths for the map-reduce job. |
| |
| @param filter the PathFilter class use for filtering the input paths.]]> |
| </doc> |
| </method> |
| <method name="getInputPathFilter" return="org.apache.hadoop.fs.PathFilter" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <doc> |
| <![CDATA[Get a PathFilter instance of the filter set for the input paths. |
| |
| @return the PathFilter instance set for the job, NULL if none has been set.]]> |
| </doc> |
| </method> |
| <method name="listStatus" return="org.apache.hadoop.fs.FileStatus[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[List input directories. |
| Subclasses may override to, e.g., select only files matching a regular |
| expression. |
| |
| @param job the job to list input paths for |
| @return array of FileStatus objects |
| @throws IOException if zero items.]]> |
| </doc> |
| </method> |
| <method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="numSplits" type="int"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Splits files returned by {@link #listStatus(JobConf)} when |
| they're too big.]]> |
| </doc> |
| </method> |
| <method name="computeSplitSize" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="goalSize" type="long"/> |
| <param name="minSize" type="long"/> |
| <param name="blockSize" type="long"/> |
| </method> |
| <method name="getBlockIndex" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="blkLocations" type="org.apache.hadoop.fs.BlockLocation[]"/> |
| <param name="offset" type="long"/> |
| </method> |
| <method name="setInputPaths" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="commaSeparatedPaths" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Sets the given comma separated paths as the list of inputs |
| for the map-reduce job. |
| |
| @param conf Configuration of the job |
| @param commaSeparatedPaths Comma separated paths to be set as |
| the list of inputs for the map-reduce job.]]> |
| </doc> |
| </method> |
| <method name="addInputPaths" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="commaSeparatedPaths" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Add the given comma separated paths to the list of inputs for |
| the map-reduce job. |
| |
| @param conf The configuration of the job |
| @param commaSeparatedPaths Comma separated paths to be added to |
| the list of inputs for the map-reduce job.]]> |
| </doc> |
| </method> |
| <method name="setInputPaths" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="inputPaths" type="org.apache.hadoop.fs.Path[]"/> |
| <doc> |
| <![CDATA[Set the array of {@link Path}s as the list of inputs |
| for the map-reduce job. |
| |
| @param conf Configuration of the job. |
| @param inputPaths the {@link Path}s of the input directories/files |
| for the map-reduce job.]]> |
| </doc> |
| </method> |
| <method name="addInputPath" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="path" type="org.apache.hadoop.fs.Path"/> |
| <doc> |
| <![CDATA[Add a {@link Path} to the list of inputs for the map-reduce job. |
| |
| @param conf The configuration of the job |
| @param path {@link Path} to be added to the list of inputs for |
| the map-reduce job.]]> |
| </doc> |
| </method> |
| <method name="getInputPaths" return="org.apache.hadoop.fs.Path[]" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <doc> |
| <![CDATA[Get the list of input {@link Path}s for the map-reduce job. |
| |
| @param conf The configuration of the job |
| @return the list of input {@link Path}s for the map-reduce job.]]> |
| </doc> |
| </method> |
| <method name="getSplitHosts" return="java.lang.String[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="blkLocations" type="org.apache.hadoop.fs.BlockLocation[]"/> |
| <param name="offset" type="long"/> |
| <param name="splitSize" type="long"/> |
| <param name="clusterMap" type="org.apache.hadoop.net.NetworkTopology"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[This function identifies and returns the hosts that contribute |
| most for a given split. For calculating the contribution, rack |
| locality is treated on par with host locality, so hosts from racks |
| that contribute the most are preferred over hosts on racks that |
| contribute less |
| @param blkLocations The list of block locations |
| @param offset |
| @param splitSize |
| @return array of hosts that contribute most to this split |
| @throws IOException]]> |
| </doc> |
| </method> |
| <field name="LOG" type="org.apache.commons.logging.Log" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[A base class for file-based {@link InputFormat}. |
| |
| <p><code>FileInputFormat</code> is the base class for all file-based |
| <code>InputFormat</code>s. This provides a generic implementation of |
| {@link #getSplits(JobConf, int)}. |
| Subclasses of <code>FileInputFormat</code> can also override the |
| {@link #isSplitable(FileSystem, Path)} method to ensure input-files are |
| not split-up and are processed as a whole by {@link Mapper}s. |
| @deprecated Use {@link org.apache.hadoop.mapreduce.lib.input.FileInputFormat} |
| instead.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.FileInputFormat --> |
| <!-- start class org.apache.hadoop.mapred.FileOutputCommitter --> |
| <class name="FileOutputCommitter" extends="org.apache.hadoop.mapred.OutputCommitter" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="FileOutputCommitter" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="setupJob" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapred.JobContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="cleanupJob" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapred.JobContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="setupTask" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapred.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="commitTask" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapred.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="abortTask" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapred.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="needsTaskCommit" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapred.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <field name="LOG" type="org.apache.commons.logging.Log" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="TEMP_DIR_NAME" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Temporary directory name]]> |
| </doc> |
| </field> |
| <doc> |
| <![CDATA[An {@link OutputCommitter} that commits files specified |
| in job output directory i.e. ${mapred.output.dir}.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.FileOutputCommitter --> |
| <!-- start class org.apache.hadoop.mapred.FileOutputFormat --> |
| <class name="FileOutputFormat" extends="java.lang.Object" |
| abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.OutputFormat"/> |
| <constructor name="FileOutputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="setCompressOutput" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="compress" type="boolean"/> |
| <doc> |
| <![CDATA[Set whether the output of the job is compressed. |
| @param conf the {@link JobConf} to modify |
| @param compress should the output of the job be compressed?]]> |
| </doc> |
| </method> |
| <method name="getCompressOutput" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <doc> |
| <![CDATA[Is the job output compressed? |
| @param conf the {@link JobConf} to look in |
| @return <code>true</code> if the job output should be compressed, |
| <code>false</code> otherwise]]> |
| </doc> |
| </method> |
| <method name="setOutputCompressorClass" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="codecClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Set the {@link CompressionCodec} to be used to compress job outputs. |
| @param conf the {@link JobConf} to modify |
| @param codecClass the {@link CompressionCodec} to be used to |
| compress the job outputs]]> |
| </doc> |
| </method> |
| <method name="getOutputCompressorClass" return="java.lang.Class" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="defaultValue" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Get the {@link CompressionCodec} for compressing the job outputs. |
| @param conf the {@link JobConf} to look in |
| @param defaultValue the {@link CompressionCodec} to return if not set |
| @return the {@link CompressionCodec} to be used to compress the |
| job outputs |
| @throws IllegalArgumentException if the class was specified, but not found]]> |
| </doc> |
| </method> |
| <method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="ignored" type="org.apache.hadoop.fs.FileSystem"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="name" type="java.lang.String"/> |
| <param name="progress" type="org.apache.hadoop.util.Progressable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="checkOutputSpecs" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="ignored" type="org.apache.hadoop.fs.FileSystem"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <exception name="FileAlreadyExistsException" type="org.apache.hadoop.mapred.FileAlreadyExistsException"/> |
| <exception name="InvalidJobConfException" type="org.apache.hadoop.mapred.InvalidJobConfException"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="setOutputPath" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="outputDir" type="org.apache.hadoop.fs.Path"/> |
| <doc> |
| <![CDATA[Set the {@link Path} of the output directory for the map-reduce job. |
| |
| @param conf The configuration of the job. |
| @param outputDir the {@link Path} of the output directory for |
| the map-reduce job.]]> |
| </doc> |
| </method> |
| <method name="getOutputPath" return="org.apache.hadoop.fs.Path" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <doc> |
| <![CDATA[Get the {@link Path} to the output directory for the map-reduce job. |
| |
| @return the {@link Path} to the output directory for the map-reduce job. |
| @see FileOutputFormat#getWorkOutputPath(JobConf)]]> |
| </doc> |
| </method> |
| <method name="getWorkOutputPath" return="org.apache.hadoop.fs.Path" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <doc> |
| <![CDATA[Get the {@link Path} to the task's temporary output directory |
| for the map-reduce job |
| |
| <h4 id="SideEffectFiles">Tasks' Side-Effect Files</h4> |
| |
| <p><i>Note:</i> The following is valid only if the {@link OutputCommitter} |
| is {@link FileOutputCommitter}. If <code>OutputCommitter</code> is not |
| a <code>FileOutputCommitter</code>, the task's temporary output |
| directory is same as {@link #getOutputPath(JobConf)} i.e. |
| <tt>${mapred.output.dir}$</tt></p> |
| |
| <p>Some applications need to create/write-to side-files, which differ from |
| the actual job-outputs. |
| |
| <p>In such cases there could be issues with 2 instances of the same TIP |
| (running simultaneously e.g. speculative tasks) trying to open/write-to the |
| same file (path) on HDFS. Hence the application-writer will have to pick |
| unique names per task-attempt (e.g. using the attemptid, say |
| <tt>attempt_200709221812_0001_m_000000_0</tt>), not just per TIP.</p> |
| |
| <p>To get around this the Map-Reduce framework helps the application-writer |
| out by maintaining a special |
| <tt>${mapred.output.dir}/_temporary/_${taskid}</tt> |
| sub-directory for each task-attempt on HDFS where the output of the |
| task-attempt goes. On successful completion of the task-attempt the files |
| in the <tt>${mapred.output.dir}/_temporary/_${taskid}</tt> (only) |
| are <i>promoted</i> to <tt>${mapred.output.dir}</tt>. Of course, the |
| framework discards the sub-directory of unsuccessful task-attempts. This |
| is completely transparent to the application.</p> |
| |
| <p>The application-writer can take advantage of this by creating any |
| side-files required in <tt>${mapred.work.output.dir}</tt> during execution |
| of his reduce-task i.e. via {@link #getWorkOutputPath(JobConf)}, and the |
| framework will move them out similarly - thus she doesn't have to pick |
| unique paths per task-attempt.</p> |
| |
| <p><i>Note</i>: the value of <tt>${mapred.work.output.dir}</tt> during |
| execution of a particular task-attempt is actually |
| <tt>${mapred.output.dir}/_temporary/_{$taskid}</tt>, and this value is |
| set by the map-reduce framework. So, just create any side-files in the |
| path returned by {@link #getWorkOutputPath(JobConf)} from map/reduce |
| task to take advantage of this feature.</p> |
| |
| <p>The entire discussion holds true for maps of jobs with |
| reducer=NONE (i.e. 0 reduces) since output of the map, in that case, |
| goes directly to HDFS.</p> |
| |
| @return the {@link Path} to the task's temporary output directory |
| for the map-reduce job.]]> |
| </doc> |
| </method> |
| <method name="getTaskOutputPath" return="org.apache.hadoop.fs.Path" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="name" type="java.lang.String"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Helper function to create the task's temporary output directory and |
| return the path to the task's output file. |
| |
| @param conf job-configuration |
| @param name temporary task-output filename |
| @return path to the task's temporary output file |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getUniqueName" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="name" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Helper function to generate a name that is unique for the task. |
| |
| <p>The generated name can be used to create custom files from within the |
| different tasks for the job, the names for different tasks will not collide |
| with each other.</p> |
| |
| <p>The given name is postfixed with the task type, 'm' for maps, 'r' for |
| reduces and the task partition number. For example, give a name 'test' |
| running on the first map o the job the generated name will be |
| 'test-m-00000'.</p> |
| |
| @param conf the configuration for the job. |
| @param name the name to make unique. |
| @return a unique name accross all tasks of the job.]]> |
| </doc> |
| </method> |
| <method name="getPathForCustomFile" return="org.apache.hadoop.fs.Path" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="name" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Helper function to generate a {@link Path} for a file that is unique for |
| the task within the job output directory. |
| |
| <p>The path can be used to create custom files from within the map and |
| reduce tasks. The path name will be unique for each task. The path parent |
| will be the job output directory.</p>ls |
| |
| <p>This method uses the {@link #getUniqueName} method to make the file name |
| unique for the task.</p> |
| |
| @param conf the configuration for the job. |
| @param name the name for the file. |
| @return a unique path accross all tasks of the job.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[A base class for {@link OutputFormat}.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.FileOutputFormat --> |
| <!-- start class org.apache.hadoop.mapred.FileSplit --> |
| <class name="FileSplit" extends="org.apache.hadoop.mapreduce.InputSplit" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="Use {@link org.apache.hadoop.mapreduce.lib.input.FileSplit} |
| instead."> |
| <implements name="org.apache.hadoop.mapred.InputSplit"/> |
| <constructor name="FileSplit" type="org.apache.hadoop.fs.Path, long, long, org.apache.hadoop.mapred.JobConf" |
| static="false" final="false" visibility="public" |
| deprecated="deprecated, no comment"> |
| <doc> |
| <![CDATA[Constructs a split. |
| @deprecated |
| @param file the file name |
| @param start the position of the first byte in the file to process |
| @param length the number of bytes in the file to process]]> |
| </doc> |
| </constructor> |
| <constructor name="FileSplit" type="org.apache.hadoop.fs.Path, long, long, java.lang.String[]" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Constructs a split with host information |
| |
| @param file the file name |
| @param start the position of the first byte in the file to process |
| @param length the number of bytes in the file to process |
| @param hosts the list of hosts containing the block, possibly null]]> |
| </doc> |
| </constructor> |
| <method name="getPath" return="org.apache.hadoop.fs.Path" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[The file containing this split's data.]]> |
| </doc> |
| </method> |
| <method name="getStart" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[The position of the first byte in the file to process.]]> |
| </doc> |
| </method> |
| <method name="getLength" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[The number of bytes in the file to process.]]> |
| </doc> |
| </method> |
| <method name="toString" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="write" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="out" type="java.io.DataOutput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="readFields" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="in" type="java.io.DataInput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getLocations" return="java.lang.String[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[A section of an input file. Returned by {@link |
| InputFormat#getSplits(JobConf, int)} and passed to |
| {@link InputFormat#getRecordReader(InputSplit,JobConf,Reporter)}. |
| @deprecated Use {@link org.apache.hadoop.mapreduce.lib.input.FileSplit} |
| instead.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.FileSplit --> |
| <!-- start class org.apache.hadoop.mapred.ID --> |
| <class name="ID" extends="org.apache.hadoop.mapreduce.ID" |
| abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="ID" type="int" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[constructs an ID object from the given int]]> |
| </doc> |
| </constructor> |
| <constructor name="ID" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </constructor> |
| <doc> |
| <![CDATA[A general identifier, which internally stores the id |
| as an integer. This is the super class of {@link JobID}, |
| {@link TaskID} and {@link TaskAttemptID}. |
| |
| @see JobID |
| @see TaskID |
| @see TaskAttemptID]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.ID --> |
| <!-- start interface org.apache.hadoop.mapred.InputFormat --> |
| <interface name="InputFormat" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="Use {@link org.apache.hadoop.mapreduce.InputFormat} instead."> |
| <method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="numSplits" type="int"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Logically split the set of input files for the job. |
| |
| <p>Each {@link InputSplit} is then assigned to an individual {@link Mapper} |
| for processing.</p> |
| |
| <p><i>Note</i>: The split is a <i>logical</i> split of the inputs and the |
| input files are not physically split into chunks. For e.g. a split could |
| be <i><input-file-path, start, offset></i> tuple. |
| |
| @param job job configuration. |
| @param numSplits the desired number of splits, a hint. |
| @return an array of {@link InputSplit}s for the job.]]> |
| </doc> |
| </method> |
| <method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="split" type="org.apache.hadoop.mapred.InputSplit"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get the {@link RecordReader} for the given {@link InputSplit}. |
| |
| <p>It is the responsibility of the <code>RecordReader</code> to respect |
| record boundaries while processing the logical split to present a |
| record-oriented view to the individual task.</p> |
| |
| @param split the {@link InputSplit} |
| @param job the job that this split belongs to |
| @return a {@link RecordReader}]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[<code>InputFormat</code> describes the input-specification for a |
| Map-Reduce job. |
| |
| <p>The Map-Reduce framework relies on the <code>InputFormat</code> of the |
| job to:<p> |
| <ol> |
| <li> |
| Validate the input-specification of the job. |
| <li> |
| Split-up the input file(s) into logical {@link InputSplit}s, each of |
| which is then assigned to an individual {@link Mapper}. |
| </li> |
| <li> |
| Provide the {@link RecordReader} implementation to be used to glean |
| input records from the logical <code>InputSplit</code> for processing by |
| the {@link Mapper}. |
| </li> |
| </ol> |
| |
| <p>The default behavior of file-based {@link InputFormat}s, typically |
| sub-classes of {@link FileInputFormat}, is to split the |
| input into <i>logical</i> {@link InputSplit}s based on the total size, in |
| bytes, of the input files. However, the {@link FileSystem} blocksize of |
| the input files is treated as an upper bound for input splits. A lower bound |
| on the split size can be set via |
| <a href="{@docRoot}/../mapred-default.html#mapred.min.split.size"> |
| mapred.min.split.size</a>.</p> |
| |
| <p>Clearly, logical splits based on input-size is insufficient for many |
| applications since record boundaries are to respected. In such cases, the |
| application has to also implement a {@link RecordReader} on whom lies the |
| responsibilty to respect record-boundaries and present a record-oriented |
| view of the logical <code>InputSplit</code> to the individual task. |
| |
| @see InputSplit |
| @see RecordReader |
| @see JobClient |
| @see FileInputFormat |
| @deprecated Use {@link org.apache.hadoop.mapreduce.InputFormat} instead.]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapred.InputFormat --> |
| <!-- start interface org.apache.hadoop.mapred.InputSplit --> |
| <interface name="InputSplit" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="Use {@link org.apache.hadoop.mapreduce.InputSplit} instead."> |
| <implements name="org.apache.hadoop.io.Writable"/> |
| <method name="getLength" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get the total number of bytes in the data of the <code>InputSplit</code>. |
| |
| @return the number of bytes in the input split. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getLocations" return="java.lang.String[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get the list of hostnames where the input split is located. |
| |
| @return list of hostnames where data of the <code>InputSplit</code> is |
| located as an array of <code>String</code>s. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[<code>InputSplit</code> represents the data to be processed by an |
| individual {@link Mapper}. |
| |
| <p>Typically, it presents a byte-oriented view on the input and is the |
| responsibility of {@link RecordReader} of the job to process this and present |
| a record-oriented view. |
| |
| @see InputFormat |
| @see RecordReader |
| @deprecated Use {@link org.apache.hadoop.mapreduce.InputSplit} instead.]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapred.InputSplit --> |
| <!-- start class org.apache.hadoop.mapred.InvalidFileTypeException --> |
| <class name="InvalidFileTypeException" extends="java.io.IOException" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="InvalidFileTypeException" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <constructor name="InvalidFileTypeException" type="java.lang.String" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <doc> |
| <![CDATA[Used when file type differs from the desired file type. like |
| getting a file when a directory is expected. Or a wrong file type.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.InvalidFileTypeException --> |
| <!-- start class org.apache.hadoop.mapred.InvalidInputException --> |
| <class name="InvalidInputException" extends="java.io.IOException" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="InvalidInputException" type="java.util.List" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Create the exception with the given list. |
| @param probs the list of problems to report. this list is not copied.]]> |
| </doc> |
| </constructor> |
| <method name="getProblems" return="java.util.List" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the complete list of the problems reported. |
| @return the list of problems, which must not be modified]]> |
| </doc> |
| </method> |
| <method name="getMessage" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get a summary message of the problems found. |
| @return the concatenated messages from all of the problems.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[This class wraps a list of problems with the input, so that the user |
| can get a list of problems together instead of finding and fixing them one |
| by one.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.InvalidInputException --> |
| <!-- start class org.apache.hadoop.mapred.InvalidJobConfException --> |
| <class name="InvalidJobConfException" extends="java.io.IOException" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="InvalidJobConfException" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <constructor name="InvalidJobConfException" type="java.lang.String" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <doc> |
| <![CDATA[This exception is thrown when jobconf misses some mendatory attributes |
| or value of some attributes is invalid.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.InvalidJobConfException --> |
| <!-- start class org.apache.hadoop.mapred.IsolationRunner --> |
| <class name="IsolationRunner" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="IsolationRunner" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="main" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="args" type="java.lang.String[]"/> |
| <exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Run a single task |
| @param args the first argument is the task directory]]> |
| </doc> |
| </method> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.IsolationRunner --> |
| <!-- start class org.apache.hadoop.mapred.JobClient --> |
| <class name="JobClient" extends="org.apache.hadoop.conf.Configured" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.MRConstants"/> |
| <implements name="org.apache.hadoop.util.Tool"/> |
| <constructor name="JobClient" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Create a job client.]]> |
| </doc> |
| </constructor> |
| <constructor name="JobClient" type="org.apache.hadoop.mapred.JobConf" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Build a job client with the given {@link JobConf}, and connect to the |
| default {@link JobTracker}. |
| |
| @param conf the job configuration. |
| @throws IOException]]> |
| </doc> |
| </constructor> |
| <constructor name="JobClient" type="java.net.InetSocketAddress, org.apache.hadoop.conf.Configuration" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Build a job client, connect to the indicated job tracker. |
| |
| @param jobTrackAddr the job tracker to connect to. |
| @param conf configuration.]]> |
| </doc> |
| </constructor> |
| <method name="init" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Connect to the default {@link JobTracker}. |
| @param conf the job configuration. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="close" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Close the <code>JobClient</code>.]]> |
| </doc> |
| </method> |
| <method name="getFs" return="org.apache.hadoop.fs.FileSystem" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get a filesystem handle. We need this to prepare jobs |
| for submission to the MapReduce system. |
| |
| @return the filesystem handle.]]> |
| </doc> |
| </method> |
| <method name="submitJob" return="org.apache.hadoop.mapred.RunningJob" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobFile" type="java.lang.String"/> |
| <exception name="FileNotFoundException" type="java.io.FileNotFoundException"/> |
| <exception name="InvalidJobConfException" type="org.apache.hadoop.mapred.InvalidJobConfException"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Submit a job to the MR system. |
| |
| This returns a handle to the {@link RunningJob} which can be used to track |
| the running-job. |
| |
| @param jobFile the job configuration. |
| @return a handle to the {@link RunningJob} which can be used to track the |
| running-job. |
| @throws FileNotFoundException |
| @throws InvalidJobConfException |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="submitJob" return="org.apache.hadoop.mapred.RunningJob" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <exception name="FileNotFoundException" type="java.io.FileNotFoundException"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Submit a job to the MR system. |
| This returns a handle to the {@link RunningJob} which can be used to track |
| the running-job. |
| |
| @param job the job configuration. |
| @return a handle to the {@link RunningJob} which can be used to track the |
| running-job. |
| @throws FileNotFoundException |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="submitJobInternal" return="org.apache.hadoop.mapred.RunningJob" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <exception name="FileNotFoundException" type="java.io.FileNotFoundException"/> |
| <exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Internal method for submitting jobs to the system. |
| @param job the configuration to submit |
| @return a proxy object for the running job |
| @throws FileNotFoundException |
| @throws ClassNotFoundException |
| @throws InterruptedException |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="isJobDirValid" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobDirPath" type="org.apache.hadoop.fs.Path"/> |
| <param name="fs" type="org.apache.hadoop.fs.FileSystem"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Checks if the job directory is clean and has all the required components |
| for (re) starting the job]]> |
| </doc> |
| </method> |
| <method name="getJob" return="org.apache.hadoop.mapred.RunningJob" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobid" type="org.apache.hadoop.mapred.JobID"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get an {@link RunningJob} object to track an ongoing job. Returns |
| null if the id does not correspond to any known job. |
| |
| @param jobid the jobid of the job. |
| @return the {@link RunningJob} handle to track the job, null if the |
| <code>jobid</code> doesn't correspond to any known job. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getJob" return="org.apache.hadoop.mapred.RunningJob" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="Applications should rather use {@link #getJob(JobID)}."> |
| <param name="jobid" type="java.lang.String"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[@deprecated Applications should rather use {@link #getJob(JobID)}.]]> |
| </doc> |
| </method> |
| <method name="getMapTaskReports" return="org.apache.hadoop.mapred.TaskReport[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobId" type="org.apache.hadoop.mapred.JobID"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get the information of the current state of the map tasks of a job. |
| |
| @param jobId the job to query. |
| @return the list of all of the map tips. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getMapTaskReports" return="org.apache.hadoop.mapred.TaskReport[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="Applications should rather use {@link #getMapTaskReports(JobID)}"> |
| <param name="jobId" type="java.lang.String"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[@deprecated Applications should rather use {@link #getMapTaskReports(JobID)}]]> |
| </doc> |
| </method> |
| <method name="getReduceTaskReports" return="org.apache.hadoop.mapred.TaskReport[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobId" type="org.apache.hadoop.mapred.JobID"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get the information of the current state of the reduce tasks of a job. |
| |
| @param jobId the job to query. |
| @return the list of all of the reduce tips. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getCleanupTaskReports" return="org.apache.hadoop.mapred.TaskReport[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobId" type="org.apache.hadoop.mapred.JobID"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get the information of the current state of the cleanup tasks of a job. |
| |
| @param jobId the job to query. |
| @return the list of all of the cleanup tips. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getSetupTaskReports" return="org.apache.hadoop.mapred.TaskReport[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobId" type="org.apache.hadoop.mapred.JobID"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get the information of the current state of the setup tasks of a job. |
| |
| @param jobId the job to query. |
| @return the list of all of the setup tips. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getReduceTaskReports" return="org.apache.hadoop.mapred.TaskReport[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="Applications should rather use {@link #getReduceTaskReports(JobID)}"> |
| <param name="jobId" type="java.lang.String"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[@deprecated Applications should rather use {@link #getReduceTaskReports(JobID)}]]> |
| </doc> |
| </method> |
| <method name="displayTasks" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobId" type="org.apache.hadoop.mapred.JobID"/> |
| <param name="type" type="java.lang.String"/> |
| <param name="state" type="java.lang.String"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Display the information about a job's tasks, of a particular type and |
| in a particular state |
| |
| @param jobId the ID of the job |
| @param type the type of the task (map/reduce/setup/cleanup) |
| @param state the state of the task |
| (pending/running/completed/failed/killed)]]> |
| </doc> |
| </method> |
| <method name="getClusterStatus" return="org.apache.hadoop.mapred.ClusterStatus" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get status information about the Map-Reduce cluster. |
| |
| @return the status information about the Map-Reduce cluster as an object |
| of {@link ClusterStatus}. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getClusterStatus" return="org.apache.hadoop.mapred.ClusterStatus" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="detailed" type="boolean"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get status information about the Map-Reduce cluster. |
| |
| @param detailed if true then get a detailed status including the |
| tracker names |
| @return the status information about the Map-Reduce cluster as an object |
| of {@link ClusterStatus}. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="jobsToComplete" return="org.apache.hadoop.mapred.JobStatus[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get the jobs that are not completed and not failed. |
| |
| @return array of {@link JobStatus} for the running/to-be-run jobs. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getAllJobs" return="org.apache.hadoop.mapred.JobStatus[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get the jobs that are submitted. |
| |
| @return array of {@link JobStatus} for the submitted jobs. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="runJob" return="org.apache.hadoop.mapred.RunningJob" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Utility that submits a job, then polls for progress until the job is |
| complete. |
| |
| @param job the job configuration. |
| @throws IOException if the job fails]]> |
| </doc> |
| </method> |
| <method name="monitorAndPrintJob" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="job" type="org.apache.hadoop.mapred.RunningJob"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Monitor a job and print status in real-time as progress is made and tasks |
| fail. |
| @param conf the job's configuration |
| @param job the job to track |
| @return true if the job succeeded |
| @throws IOException if communication to the JobTracker fails]]> |
| </doc> |
| </method> |
| <method name="setTaskOutputFilter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="newValue" type="org.apache.hadoop.mapred.JobClient.TaskStatusFilter"/> |
| <doc> |
| <![CDATA[Sets the output filter for tasks. only those tasks are printed whose |
| output matches the filter. |
| @param newValue task filter.]]> |
| </doc> |
| </method> |
| <method name="getTaskOutputFilter" return="org.apache.hadoop.mapred.JobClient.TaskStatusFilter" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <doc> |
| <![CDATA[Get the task output filter out of the JobConf. |
| |
| @param job the JobConf to examine. |
| @return the filter level.]]> |
| </doc> |
| </method> |
| <method name="setTaskOutputFilter" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="newValue" type="org.apache.hadoop.mapred.JobClient.TaskStatusFilter"/> |
| <doc> |
| <![CDATA[Modify the JobConf to set the task output filter. |
| |
| @param job the JobConf to modify. |
| @param newValue the value to set.]]> |
| </doc> |
| </method> |
| <method name="getTaskOutputFilter" return="org.apache.hadoop.mapred.JobClient.TaskStatusFilter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns task output filter. |
| @return task filter.]]> |
| </doc> |
| </method> |
| <method name="run" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="argv" type="java.lang.String[]"/> |
| <exception name="Exception" type="java.lang.Exception"/> |
| </method> |
| <method name="getDefaultMaps" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get status information about the max available Maps in the cluster. |
| |
| @return the max available Maps in the cluster |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getDefaultReduces" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get status information about the max available Reduces in the cluster. |
| |
| @return the max available Reduces in the cluster |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getSystemDir" return="org.apache.hadoop.fs.Path" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Grab the jobtracker system directory path where job-specific files are to be placed. |
| |
| @return the system directory where job-specific files are to be placed.]]> |
| </doc> |
| </method> |
| <method name="getQueues" return="org.apache.hadoop.mapred.JobQueueInfo[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Return an array of queue information objects about all the Job Queues |
| configured. |
| |
| @return Array of JobQueueInfo objects |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getJobsFromQueue" return="org.apache.hadoop.mapred.JobStatus[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="queueName" type="java.lang.String"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Gets all the jobs which were added to particular Job Queue |
| |
| @param queueName name of the Job Queue |
| @return Array of jobs present in the job queue |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getQueueInfo" return="org.apache.hadoop.mapred.JobQueueInfo" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="queueName" type="java.lang.String"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Gets the queue information associated to a particular Job Queue |
| |
| @param queueName name of the job queue. |
| @return Queue information associated to particular queue. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="main" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="argv" type="java.lang.String[]"/> |
| <exception name="Exception" type="java.lang.Exception"/> |
| </method> |
| <doc> |
| <![CDATA[<code>JobClient</code> is the primary interface for the user-job to interact |
| with the {@link JobTracker}. |
| |
| <code>JobClient</code> provides facilities to submit jobs, track their |
| progress, access component-tasks' reports/logs, get the Map-Reduce cluster |
| status information etc. |
| |
| <p>The job submission process involves: |
| <ol> |
| <li> |
| Checking the input and output specifications of the job. |
| </li> |
| <li> |
| Computing the {@link InputSplit}s for the job. |
| </li> |
| <li> |
| Setup the requisite accounting information for the {@link DistributedCache} |
| of the job, if necessary. |
| </li> |
| <li> |
| Copying the job's jar and configuration to the map-reduce system directory |
| on the distributed file-system. |
| </li> |
| <li> |
| Submitting the job to the <code>JobTracker</code> and optionally monitoring |
| it's status. |
| </li> |
| </ol></p> |
| |
| Normally the user creates the application, describes various facets of the |
| job via {@link JobConf} and then uses the <code>JobClient</code> to submit |
| the job and monitor its progress. |
| |
| <p>Here is an example on how to use <code>JobClient</code>:</p> |
| <p><blockquote><pre> |
| // Create a new JobConf |
| JobConf job = new JobConf(new Configuration(), MyJob.class); |
| |
| // Specify various job-specific parameters |
| job.setJobName("myjob"); |
| |
| job.setInputPath(new Path("in")); |
| job.setOutputPath(new Path("out")); |
| |
| job.setMapperClass(MyJob.MyMapper.class); |
| job.setReducerClass(MyJob.MyReducer.class); |
| |
| // Submit the job, then poll for progress until the job is complete |
| JobClient.runJob(job); |
| </pre></blockquote></p> |
| |
| <h4 id="JobControl">Job Control</h4> |
| |
| <p>At times clients would chain map-reduce jobs to accomplish complex tasks |
| which cannot be done via a single map-reduce job. This is fairly easy since |
| the output of the job, typically, goes to distributed file-system and that |
| can be used as the input for the next job.</p> |
| |
| <p>However, this also means that the onus on ensuring jobs are complete |
| (success/failure) lies squarely on the clients. In such situations the |
| various job-control options are: |
| <ol> |
| <li> |
| {@link #runJob(JobConf)} : submits the job and returns only after |
| the job has completed. |
| </li> |
| <li> |
| {@link #submitJob(JobConf)} : only submits the job, then poll the |
| returned handle to the {@link RunningJob} to query status and make |
| scheduling decisions. |
| </li> |
| <li> |
| {@link JobConf#setJobEndNotificationURI(String)} : setup a notification |
| on job-completion, thus avoiding polling. |
| </li> |
| </ol></p> |
| |
| @see JobConf |
| @see ClusterStatus |
| @see Tool |
| @see DistributedCache]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.JobClient --> |
| <!-- start class org.apache.hadoop.mapred.JobClient.TaskStatusFilter --> |
| <class name="JobClient.TaskStatusFilter" extends="java.lang.Enum" |
| abstract="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <method name="values" return="org.apache.hadoop.mapred.JobClient.TaskStatusFilter[]" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="valueOf" return="org.apache.hadoop.mapred.JobClient.TaskStatusFilter" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="name" type="java.lang.String"/> |
| </method> |
| <field name="NONE" type="org.apache.hadoop.mapred.JobClient.TaskStatusFilter" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="KILLED" type="org.apache.hadoop.mapred.JobClient.TaskStatusFilter" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="FAILED" type="org.apache.hadoop.mapred.JobClient.TaskStatusFilter" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="SUCCEEDED" type="org.apache.hadoop.mapred.JobClient.TaskStatusFilter" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="ALL" type="org.apache.hadoop.mapred.JobClient.TaskStatusFilter" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.JobClient.TaskStatusFilter --> |
| <!-- start class org.apache.hadoop.mapred.JobConf --> |
| <class name="JobConf" extends="org.apache.hadoop.conf.Configuration" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="Use {@link Configuration} instead"> |
| <constructor name="JobConf" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Construct a map/reduce job configuration.]]> |
| </doc> |
| </constructor> |
| <constructor name="JobConf" type="java.lang.Class" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Construct a map/reduce job configuration. |
| |
| @param exampleClass a class whose containing jar is used as the job's jar.]]> |
| </doc> |
| </constructor> |
| <constructor name="JobConf" type="org.apache.hadoop.conf.Configuration" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Construct a map/reduce job configuration. |
| |
| @param conf a Configuration whose settings will be inherited.]]> |
| </doc> |
| </constructor> |
| <constructor name="JobConf" type="org.apache.hadoop.conf.Configuration, java.lang.Class" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Construct a map/reduce job configuration. |
| |
| @param conf a Configuration whose settings will be inherited. |
| @param exampleClass a class whose containing jar is used as the job's jar.]]> |
| </doc> |
| </constructor> |
| <constructor name="JobConf" type="java.lang.String" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Construct a map/reduce configuration. |
| |
| @param config a Configuration-format XML job description file.]]> |
| </doc> |
| </constructor> |
| <constructor name="JobConf" type="org.apache.hadoop.fs.Path" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Construct a map/reduce configuration. |
| |
| @param config a Configuration-format XML job description file.]]> |
| </doc> |
| </constructor> |
| <constructor name="JobConf" type="boolean" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[A new map/reduce configuration where the behavior of reading from the |
| default resources can be turned off. |
| <p/> |
| If the parameter {@code loadDefaults} is false, the new instance |
| will not load resources from the default files. |
| |
| @param loadDefaults specifies whether to load from the default files]]> |
| </doc> |
| </constructor> |
| <method name="getJar" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the user jar for the map-reduce job. |
| |
| @return the user jar for the map-reduce job.]]> |
| </doc> |
| </method> |
| <method name="setJar" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jar" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the user jar for the map-reduce job. |
| |
| @param jar the user jar for the map-reduce job.]]> |
| </doc> |
| </method> |
| <method name="setJarByClass" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="cls" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Set the job's jar file by finding an example class location. |
| |
| @param cls the example class.]]> |
| </doc> |
| </method> |
| <method name="getLocalDirs" return="java.lang.String[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="deleteLocalFiles" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="deleteLocalFiles" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="subdir" type="java.lang.String"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getLocalPath" return="org.apache.hadoop.fs.Path" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="pathString" type="java.lang.String"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Constructs a local file name. Files are distributed among configured |
| local directories.]]> |
| </doc> |
| </method> |
| <method name="getUser" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the reported username for this job. |
| |
| @return the username]]> |
| </doc> |
| </method> |
| <method name="setUser" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="user" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the reported username for this job. |
| |
| @param user the username for this job.]]> |
| </doc> |
| </method> |
| <method name="setKeepFailedTaskFiles" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="keep" type="boolean"/> |
| <doc> |
| <![CDATA[Set whether the framework should keep the intermediate files for |
| failed tasks. |
| |
| @param keep <code>true</code> if framework should keep the intermediate files |
| for failed tasks, <code>false</code> otherwise.]]> |
| </doc> |
| </method> |
| <method name="getKeepFailedTaskFiles" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Should the temporary files for failed tasks be kept? |
| |
| @return should the files be kept?]]> |
| </doc> |
| </method> |
| <method name="setKeepTaskFilesPattern" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="pattern" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set a regular expression for task names that should be kept. |
| The regular expression ".*_m_000123_0" would keep the files |
| for the first instance of map 123 that ran. |
| |
| @param pattern the java.util.regex.Pattern to match against the |
| task names.]]> |
| </doc> |
| </method> |
| <method name="getKeepTaskFilesPattern" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the regular expression that is matched against the task names |
| to see if we need to keep the files. |
| |
| @return the pattern as a string, if it was set, othewise null.]]> |
| </doc> |
| </method> |
| <method name="setWorkingDirectory" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="dir" type="org.apache.hadoop.fs.Path"/> |
| <doc> |
| <![CDATA[Set the current working directory for the default file system. |
| |
| @param dir the new current working directory.]]> |
| </doc> |
| </method> |
| <method name="getWorkingDirectory" return="org.apache.hadoop.fs.Path" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the current working directory for the default file system. |
| |
| @return the directory name.]]> |
| </doc> |
| </method> |
| <method name="setNumTasksToExecutePerJvm" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="numTasks" type="int"/> |
| <doc> |
| <![CDATA[Sets the number of tasks that a spawned task JVM should run |
| before it exits |
| @param numTasks the number of tasks to execute; defaults to 1; |
| -1 signifies no limit]]> |
| </doc> |
| </method> |
| <method name="getNumTasksToExecutePerJvm" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the number of tasks that a spawned JVM should execute]]> |
| </doc> |
| </method> |
| <method name="getInputFormat" return="org.apache.hadoop.mapred.InputFormat" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the {@link InputFormat} implementation for the map-reduce job, |
| defaults to {@link TextInputFormat} if not specified explicity. |
| |
| @return the {@link InputFormat} implementation for the map-reduce job.]]> |
| </doc> |
| </method> |
| <method name="setInputFormat" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="theClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Set the {@link InputFormat} implementation for the map-reduce job. |
| |
| @param theClass the {@link InputFormat} implementation for the map-reduce |
| job.]]> |
| </doc> |
| </method> |
| <method name="getOutputFormat" return="org.apache.hadoop.mapred.OutputFormat" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the {@link OutputFormat} implementation for the map-reduce job, |
| defaults to {@link TextOutputFormat} if not specified explicity. |
| |
| @return the {@link OutputFormat} implementation for the map-reduce job.]]> |
| </doc> |
| </method> |
| <method name="getOutputCommitter" return="org.apache.hadoop.mapred.OutputCommitter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the {@link OutputCommitter} implementation for the map-reduce job, |
| defaults to {@link FileOutputCommitter} if not specified explicitly. |
| |
| @return the {@link OutputCommitter} implementation for the map-reduce job.]]> |
| </doc> |
| </method> |
| <method name="setOutputCommitter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="theClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Set the {@link OutputCommitter} implementation for the map-reduce job. |
| |
| @param theClass the {@link OutputCommitter} implementation for the map-reduce |
| job.]]> |
| </doc> |
| </method> |
| <method name="setOutputFormat" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="theClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Set the {@link OutputFormat} implementation for the map-reduce job. |
| |
| @param theClass the {@link OutputFormat} implementation for the map-reduce |
| job.]]> |
| </doc> |
| </method> |
| <method name="setCompressMapOutput" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="compress" type="boolean"/> |
| <doc> |
| <![CDATA[Should the map outputs be compressed before transfer? |
| Uses the SequenceFile compression. |
| |
| @param compress should the map outputs be compressed?]]> |
| </doc> |
| </method> |
| <method name="getCompressMapOutput" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Are the outputs of the maps be compressed? |
| |
| @return <code>true</code> if the outputs of the maps are to be compressed, |
| <code>false</code> otherwise.]]> |
| </doc> |
| </method> |
| <method name="setMapOutputCompressorClass" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="codecClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Set the given class as the {@link CompressionCodec} for the map outputs. |
| |
| @param codecClass the {@link CompressionCodec} class that will compress |
| the map outputs.]]> |
| </doc> |
| </method> |
| <method name="getMapOutputCompressorClass" return="java.lang.Class" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="defaultValue" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Get the {@link CompressionCodec} for compressing the map outputs. |
| |
| @param defaultValue the {@link CompressionCodec} to return if not set |
| @return the {@link CompressionCodec} class that should be used to compress the |
| map outputs. |
| @throws IllegalArgumentException if the class was specified, but not found]]> |
| </doc> |
| </method> |
| <method name="getMapOutputKeyClass" return="java.lang.Class" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the key class for the map output data. If it is not set, use the |
| (final) output key class. This allows the map output key class to be |
| different than the final output key class. |
| |
| @return the map output key class.]]> |
| </doc> |
| </method> |
| <method name="setMapOutputKeyClass" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="theClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Set the key class for the map output data. This allows the user to |
| specify the map output key class to be different than the final output |
| value class. |
| |
| @param theClass the map output key class.]]> |
| </doc> |
| </method> |
| <method name="getMapOutputValueClass" return="java.lang.Class" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the value class for the map output data. If it is not set, use the |
| (final) output value class This allows the map output value class to be |
| different than the final output value class. |
| |
| @return the map output value class.]]> |
| </doc> |
| </method> |
| <method name="setMapOutputValueClass" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="theClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Set the value class for the map output data. This allows the user to |
| specify the map output value class to be different than the final output |
| value class. |
| |
| @param theClass the map output value class.]]> |
| </doc> |
| </method> |
| <method name="getOutputKeyClass" return="java.lang.Class" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the key class for the job output data. |
| |
| @return the key class for the job output data.]]> |
| </doc> |
| </method> |
| <method name="setOutputKeyClass" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="theClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Set the key class for the job output data. |
| |
| @param theClass the key class for the job output data.]]> |
| </doc> |
| </method> |
| <method name="getOutputKeyComparator" return="org.apache.hadoop.io.RawComparator" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the {@link RawComparator} comparator used to compare keys. |
| |
| @return the {@link RawComparator} comparator used to compare keys.]]> |
| </doc> |
| </method> |
| <method name="setOutputKeyComparatorClass" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="theClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Set the {@link RawComparator} comparator used to compare keys. |
| |
| @param theClass the {@link RawComparator} comparator used to |
| compare keys. |
| @see #setOutputValueGroupingComparator(Class)]]> |
| </doc> |
| </method> |
| <method name="setKeyFieldComparatorOptions" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="keySpec" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the {@link KeyFieldBasedComparator} options used to compare keys. |
| |
| @param keySpec the key specification of the form -k pos1[,pos2], where, |
| pos is of the form f[.c][opts], where f is the number |
| of the key field to use, and c is the number of the first character from |
| the beginning of the field. Fields and character posns are numbered |
| starting with 1; a character position of zero in pos2 indicates the |
| field's last character. If '.c' is omitted from pos1, it defaults to 1 |
| (the beginning of the field); if omitted from pos2, it defaults to 0 |
| (the end of the field). opts are ordering options. The supported options |
| are: |
| -n, (Sort numerically) |
| -r, (Reverse the result of comparison)]]> |
| </doc> |
| </method> |
| <method name="getKeyFieldComparatorOption" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the {@link KeyFieldBasedComparator} options]]> |
| </doc> |
| </method> |
| <method name="setKeyFieldPartitionerOptions" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="keySpec" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the {@link KeyFieldBasedPartitioner} options used for |
| {@link Partitioner} |
| |
| @param keySpec the key specification of the form -k pos1[,pos2], where, |
| pos is of the form f[.c][opts], where f is the number |
| of the key field to use, and c is the number of the first character from |
| the beginning of the field. Fields and character posns are numbered |
| starting with 1; a character position of zero in pos2 indicates the |
| field's last character. If '.c' is omitted from pos1, it defaults to 1 |
| (the beginning of the field); if omitted from pos2, it defaults to 0 |
| (the end of the field).]]> |
| </doc> |
| </method> |
| <method name="getKeyFieldPartitionerOption" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the {@link KeyFieldBasedPartitioner} options]]> |
| </doc> |
| </method> |
| <method name="getOutputValueGroupingComparator" return="org.apache.hadoop.io.RawComparator" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the user defined {@link WritableComparable} comparator for |
| grouping keys of inputs to the reduce. |
| |
| @return comparator set by the user for grouping values. |
| @see #setOutputValueGroupingComparator(Class) for details.]]> |
| </doc> |
| </method> |
| <method name="setOutputValueGroupingComparator" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="theClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Set the user defined {@link RawComparator} comparator for |
| grouping keys in the input to the reduce. |
| |
| <p>This comparator should be provided if the equivalence rules for keys |
| for sorting the intermediates are different from those for grouping keys |
| before each call to |
| {@link Reducer#reduce(Object, java.util.Iterator, OutputCollector, Reporter)}.</p> |
| |
| <p>For key-value pairs (K1,V1) and (K2,V2), the values (V1, V2) are passed |
| in a single call to the reduce function if K1 and K2 compare as equal.</p> |
| |
| <p>Since {@link #setOutputKeyComparatorClass(Class)} can be used to control |
| how keys are sorted, this can be used in conjunction to simulate |
| <i>secondary sort on values</i>.</p> |
| |
| <p><i>Note</i>: This is not a guarantee of the reduce sort being |
| <i>stable</i> in any sense. (In any case, with the order of available |
| map-outputs to the reduce being non-deterministic, it wouldn't make |
| that much sense.)</p> |
| |
| @param theClass the comparator class to be used for grouping keys. |
| It should implement <code>RawComparator</code>. |
| @see #setOutputKeyComparatorClass(Class)]]> |
| </doc> |
| </method> |
| <method name="getUseNewMapper" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Should the framework use the new context-object code for running |
| the mapper? |
| @return true, if the new api should be used]]> |
| </doc> |
| </method> |
| <method name="setUseNewMapper" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="flag" type="boolean"/> |
| <doc> |
| <![CDATA[Set whether the framework should use the new api for the mapper. |
| This is the default for jobs submitted with the new Job api. |
| @param flag true, if the new api should be used]]> |
| </doc> |
| </method> |
| <method name="getUseNewReducer" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Should the framework use the new context-object code for running |
| the reducer? |
| @return true, if the new api should be used]]> |
| </doc> |
| </method> |
| <method name="setUseNewReducer" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="flag" type="boolean"/> |
| <doc> |
| <![CDATA[Set whether the framework should use the new api for the reducer. |
| This is the default for jobs submitted with the new Job api. |
| @param flag true, if the new api should be used]]> |
| </doc> |
| </method> |
| <method name="getOutputValueClass" return="java.lang.Class" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the value class for job outputs. |
| |
| @return the value class for job outputs.]]> |
| </doc> |
| </method> |
| <method name="setOutputValueClass" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="theClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Set the value class for job outputs. |
| |
| @param theClass the value class for job outputs.]]> |
| </doc> |
| </method> |
| <method name="getMapperClass" return="java.lang.Class" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the {@link Mapper} class for the job. |
| |
| @return the {@link Mapper} class for the job.]]> |
| </doc> |
| </method> |
| <method name="setMapperClass" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="theClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Set the {@link Mapper} class for the job. |
| |
| @param theClass the {@link Mapper} class for the job.]]> |
| </doc> |
| </method> |
| <method name="getMapRunnerClass" return="java.lang.Class" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the {@link MapRunnable} class for the job. |
| |
| @return the {@link MapRunnable} class for the job.]]> |
| </doc> |
| </method> |
| <method name="setMapRunnerClass" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="theClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Expert: Set the {@link MapRunnable} class for the job. |
| |
| Typically used to exert greater control on {@link Mapper}s. |
| |
| @param theClass the {@link MapRunnable} class for the job.]]> |
| </doc> |
| </method> |
| <method name="getPartitionerClass" return="java.lang.Class" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the {@link Partitioner} used to partition {@link Mapper}-outputs |
| to be sent to the {@link Reducer}s. |
| |
| @return the {@link Partitioner} used to partition map-outputs.]]> |
| </doc> |
| </method> |
| <method name="setPartitionerClass" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="theClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Set the {@link Partitioner} class used to partition |
| {@link Mapper}-outputs to be sent to the {@link Reducer}s. |
| |
| @param theClass the {@link Partitioner} used to partition map-outputs.]]> |
| </doc> |
| </method> |
| <method name="getReducerClass" return="java.lang.Class" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the {@link Reducer} class for the job. |
| |
| @return the {@link Reducer} class for the job.]]> |
| </doc> |
| </method> |
| <method name="setReducerClass" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="theClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Set the {@link Reducer} class for the job. |
| |
| @param theClass the {@link Reducer} class for the job.]]> |
| </doc> |
| </method> |
| <method name="getCombinerClass" return="java.lang.Class" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the user-defined <i>combiner</i> class used to combine map-outputs |
| before being sent to the reducers. Typically the combiner is same as the |
| the {@link Reducer} for the job i.e. {@link #getReducerClass()}. |
| |
| @return the user-defined combiner class used to combine map-outputs.]]> |
| </doc> |
| </method> |
| <method name="setCombinerClass" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="theClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Set the user-defined <i>combiner</i> class used to combine map-outputs |
| before being sent to the reducers. |
| |
| <p>The combiner is an application-specified aggregation operation, which |
| can help cut down the amount of data transferred between the |
| {@link Mapper} and the {@link Reducer}, leading to better performance.</p> |
| |
| <p>The framework may invoke the combiner 0, 1, or multiple times, in both |
| the mapper and reducer tasks. In general, the combiner is called as the |
| sort/merge result is written to disk. The combiner must: |
| <ul> |
| <li> be side-effect free</li> |
| <li> have the same input and output key types and the same input and |
| output value types</li> |
| </ul></p> |
| |
| <p>Typically the combiner is same as the <code>Reducer</code> for the |
| job i.e. {@link #setReducerClass(Class)}.</p> |
| |
| @param theClass the user-defined combiner class used to combine |
| map-outputs.]]> |
| </doc> |
| </method> |
| <method name="getSpeculativeExecution" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Should speculative execution be used for this job? |
| Defaults to <code>true</code>. |
| |
| @return <code>true</code> if speculative execution be used for this job, |
| <code>false</code> otherwise.]]> |
| </doc> |
| </method> |
| <method name="setSpeculativeExecution" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="speculativeExecution" type="boolean"/> |
| <doc> |
| <![CDATA[Turn speculative execution on or off for this job. |
| |
| @param speculativeExecution <code>true</code> if speculative execution |
| should be turned on, else <code>false</code>.]]> |
| </doc> |
| </method> |
| <method name="getMapSpeculativeExecution" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Should speculative execution be used for this job for map tasks? |
| Defaults to <code>true</code>. |
| |
| @return <code>true</code> if speculative execution be |
| used for this job for map tasks, |
| <code>false</code> otherwise.]]> |
| </doc> |
| </method> |
| <method name="setMapSpeculativeExecution" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="speculativeExecution" type="boolean"/> |
| <doc> |
| <![CDATA[Turn speculative execution on or off for this job for map tasks. |
| |
| @param speculativeExecution <code>true</code> if speculative execution |
| should be turned on for map tasks, |
| else <code>false</code>.]]> |
| </doc> |
| </method> |
| <method name="getReduceSpeculativeExecution" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Should speculative execution be used for this job for reduce tasks? |
| Defaults to <code>true</code>. |
| |
| @return <code>true</code> if speculative execution be used |
| for reduce tasks for this job, |
| <code>false</code> otherwise.]]> |
| </doc> |
| </method> |
| <method name="setReduceSpeculativeExecution" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="speculativeExecution" type="boolean"/> |
| <doc> |
| <![CDATA[Turn speculative execution on or off for this job for reduce tasks. |
| |
| @param speculativeExecution <code>true</code> if speculative execution |
| should be turned on for reduce tasks, |
| else <code>false</code>.]]> |
| </doc> |
| </method> |
| <method name="getNumMapTasks" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get configured the number of reduce tasks for this job. |
| Defaults to <code>1</code>. |
| |
| @return the number of reduce tasks for this job.]]> |
| </doc> |
| </method> |
| <method name="setNumMapTasks" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="n" type="int"/> |
| <doc> |
| <![CDATA[Set the number of map tasks for this job. |
| |
| <p><i>Note</i>: This is only a <i>hint</i> to the framework. The actual |
| number of spawned map tasks depends on the number of {@link InputSplit}s |
| generated by the job's {@link InputFormat#getSplits(JobConf, int)}. |
| |
| A custom {@link InputFormat} is typically used to accurately control |
| the number of map tasks for the job.</p> |
| |
| <h4 id="NoOfMaps">How many maps?</h4> |
| |
| <p>The number of maps is usually driven by the total size of the inputs |
| i.e. total number of blocks of the input files.</p> |
| |
| <p>The right level of parallelism for maps seems to be around 10-100 maps |
| per-node, although it has been set up to 300 or so for very cpu-light map |
| tasks. Task setup takes awhile, so it is best if the maps take at least a |
| minute to execute.</p> |
| |
| <p>The default behavior of file-based {@link InputFormat}s is to split the |
| input into <i>logical</i> {@link InputSplit}s based on the total size, in |
| bytes, of input files. However, the {@link FileSystem} blocksize of the |
| input files is treated as an upper bound for input splits. A lower bound |
| on the split size can be set via |
| <a href="{@docRoot}/../mapred-default.html#mapred.min.split.size"> |
| mapred.min.split.size</a>.</p> |
| |
| <p>Thus, if you expect 10TB of input data and have a blocksize of 128MB, |
| you'll end up with 82,000 maps, unless {@link #setNumMapTasks(int)} is |
| used to set it even higher.</p> |
| |
| @param n the number of map tasks for this job. |
| @see InputFormat#getSplits(JobConf, int) |
| @see FileInputFormat |
| @see FileSystem#getDefaultBlockSize() |
| @see FileStatus#getBlockSize()]]> |
| </doc> |
| </method> |
| <method name="getNumReduceTasks" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get configured the number of reduce tasks for this job. Defaults to |
| <code>1</code>. |
| |
| @return the number of reduce tasks for this job.]]> |
| </doc> |
| </method> |
| <method name="setNumReduceTasks" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="n" type="int"/> |
| <doc> |
| <![CDATA[Set the requisite number of reduce tasks for this job. |
| |
| <h4 id="NoOfReduces">How many reduces?</h4> |
| |
| <p>The right number of reduces seems to be <code>0.95</code> or |
| <code>1.75</code> multiplied by (<<i>no. of nodes</i>> * |
| <a href="{@docRoot}/../mapred-default.html#mapred.tasktracker.reduce.tasks.maximum"> |
| mapred.tasktracker.reduce.tasks.maximum</a>). |
| </p> |
| |
| <p>With <code>0.95</code> all of the reduces can launch immediately and |
| start transfering map outputs as the maps finish. With <code>1.75</code> |
| the faster nodes will finish their first round of reduces and launch a |
| second wave of reduces doing a much better job of load balancing.</p> |
| |
| <p>Increasing the number of reduces increases the framework overhead, but |
| increases load balancing and lowers the cost of failures.</p> |
| |
| <p>The scaling factors above are slightly less than whole numbers to |
| reserve a few reduce slots in the framework for speculative-tasks, failures |
| etc.</p> |
| |
| <h4 id="ReducerNone">Reducer NONE</h4> |
| |
| <p>It is legal to set the number of reduce-tasks to <code>zero</code>.</p> |
| |
| <p>In this case the output of the map-tasks directly go to distributed |
| file-system, to the path set by |
| {@link FileOutputFormat#setOutputPath(JobConf, Path)}. Also, the |
| framework doesn't sort the map-outputs before writing it out to HDFS.</p> |
| |
| @param n the number of reduce tasks for this job.]]> |
| </doc> |
| </method> |
| <method name="getMaxMapAttempts" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the configured number of maximum attempts that will be made to run a |
| map task, as specified by the <code>mapred.map.max.attempts</code> |
| property. If this property is not already set, the default is 4 attempts. |
| |
| @return the max number of attempts per map task.]]> |
| </doc> |
| </method> |
| <method name="setMaxMapAttempts" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="n" type="int"/> |
| <doc> |
| <![CDATA[Expert: Set the number of maximum attempts that will be made to run a |
| map task. |
| |
| @param n the number of attempts per map task.]]> |
| </doc> |
| </method> |
| <method name="getMaxReduceAttempts" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the configured number of maximum attempts that will be made to run a |
| reduce task, as specified by the <code>mapred.reduce.max.attempts</code> |
| property. If this property is not already set, the default is 4 attempts. |
| |
| @return the max number of attempts per reduce task.]]> |
| </doc> |
| </method> |
| <method name="setMaxReduceAttempts" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="n" type="int"/> |
| <doc> |
| <![CDATA[Expert: Set the number of maximum attempts that will be made to run a |
| reduce task. |
| |
| @param n the number of attempts per reduce task.]]> |
| </doc> |
| </method> |
| <method name="getJobName" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the user-specified job name. This is only used to identify the |
| job to the user. |
| |
| @return the job's name, defaulting to "".]]> |
| </doc> |
| </method> |
| <method name="setJobName" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="name" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the user-specified job name. |
| |
| @param name the job's new name.]]> |
| </doc> |
| </method> |
| <method name="getSessionId" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the user-specified session identifier. The default is the empty string. |
| |
| The session identifier is used to tag metric data that is reported to some |
| performance metrics system via the org.apache.hadoop.metrics API. The |
| session identifier is intended, in particular, for use by Hadoop-On-Demand |
| (HOD) which allocates a virtual Hadoop cluster dynamically and transiently. |
| HOD will set the session identifier by modifying the mapred-site.xml file |
| before starting the cluster. |
| |
| When not running under HOD, this identifer is expected to remain set to |
| the empty string. |
| |
| @return the session identifier, defaulting to "".]]> |
| </doc> |
| </method> |
| <method name="setSessionId" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="sessionId" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the user-specified session identifier. |
| |
| @param sessionId the new session id.]]> |
| </doc> |
| </method> |
| <method name="setMaxTaskFailuresPerTracker" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="noFailures" type="int"/> |
| <doc> |
| <![CDATA[Set the maximum no. of failures of a given job per tasktracker. |
| If the no. of task failures exceeds <code>noFailures</code>, the |
| tasktracker is <i>blacklisted</i> for this job. |
| |
| @param noFailures maximum no. of failures of a given job per tasktracker.]]> |
| </doc> |
| </method> |
| <method name="getMaxTaskFailuresPerTracker" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Expert: Get the maximum no. of failures of a given job per tasktracker. |
| If the no. of task failures exceeds this, the tasktracker is |
| <i>blacklisted</i> for this job. |
| |
| @return the maximum no. of failures of a given job per tasktracker.]]> |
| </doc> |
| </method> |
| <method name="getMaxMapTaskFailuresPercent" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the maximum percentage of map tasks that can fail without |
| the job being aborted. |
| |
| Each map task is executed a minimum of {@link #getMaxMapAttempts()} |
| attempts before being declared as <i>failed</i>. |
| |
| Defaults to <code>zero</code>, i.e. <i>any</i> failed map-task results in |
| the job being declared as {@link JobStatus#FAILED}. |
| |
| @return the maximum percentage of map tasks that can fail without |
| the job being aborted.]]> |
| </doc> |
| </method> |
| <method name="setMaxMapTaskFailuresPercent" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="percent" type="int"/> |
| <doc> |
| <![CDATA[Expert: Set the maximum percentage of map tasks that can fail without the |
| job being aborted. |
| |
| Each map task is executed a minimum of {@link #getMaxMapAttempts} attempts |
| before being declared as <i>failed</i>. |
| |
| @param percent the maximum percentage of map tasks that can fail without |
| the job being aborted.]]> |
| </doc> |
| </method> |
| <method name="getMaxReduceTaskFailuresPercent" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the maximum percentage of reduce tasks that can fail without |
| the job being aborted. |
| |
| Each reduce task is executed a minimum of {@link #getMaxReduceAttempts()} |
| attempts before being declared as <i>failed</i>. |
| |
| Defaults to <code>zero</code>, i.e. <i>any</i> failed reduce-task results |
| in the job being declared as {@link JobStatus#FAILED}. |
| |
| @return the maximum percentage of reduce tasks that can fail without |
| the job being aborted.]]> |
| </doc> |
| </method> |
| <method name="setMaxReduceTaskFailuresPercent" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="percent" type="int"/> |
| <doc> |
| <![CDATA[Set the maximum percentage of reduce tasks that can fail without the job |
| being aborted. |
| |
| Each reduce task is executed a minimum of {@link #getMaxReduceAttempts()} |
| attempts before being declared as <i>failed</i>. |
| |
| @param percent the maximum percentage of reduce tasks that can fail without |
| the job being aborted.]]> |
| </doc> |
| </method> |
| <method name="setJobPriority" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="prio" type="org.apache.hadoop.mapred.JobPriority"/> |
| <doc> |
| <![CDATA[Set {@link JobPriority} for this job. |
| |
| @param prio the {@link JobPriority} for this job.]]> |
| </doc> |
| </method> |
| <method name="getJobPriority" return="org.apache.hadoop.mapred.JobPriority" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the {@link JobPriority} for this job. |
| |
| @return the {@link JobPriority} for this job.]]> |
| </doc> |
| </method> |
| <method name="getProfileEnabled" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get whether the task profiling is enabled. |
| @return true if some tasks will be profiled]]> |
| </doc> |
| </method> |
| <method name="setProfileEnabled" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="newValue" type="boolean"/> |
| <doc> |
| <![CDATA[Set whether the system should collect profiler information for some of |
| the tasks in this job? The information is stored in the user log |
| directory. |
| @param newValue true means it should be gathered]]> |
| </doc> |
| </method> |
| <method name="getProfileParams" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the profiler configuration arguments. |
| |
| The default value for this property is |
| "-agentlib:hprof=cpu=samples,heap=sites,force=n,thread=y,verbose=n,file=%s" |
| |
| @return the parameters to pass to the task child to configure profiling]]> |
| </doc> |
| </method> |
| <method name="setProfileParams" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="value" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the profiler configuration arguments. If the string contains a '%s' it |
| will be replaced with the name of the profiling output file when the task |
| runs. |
| |
| This value is passed to the task child JVM on the command line. |
| |
| @param value the configuration string]]> |
| </doc> |
| </method> |
| <method name="getProfileTaskRange" return="org.apache.hadoop.conf.Configuration.IntegerRanges" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="isMap" type="boolean"/> |
| <doc> |
| <![CDATA[Get the range of maps or reduces to profile. |
| @param isMap is the task a map? |
| @return the task ranges]]> |
| </doc> |
| </method> |
| <method name="setProfileTaskRange" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="isMap" type="boolean"/> |
| <param name="newValue" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the ranges of maps or reduces to profile. setProfileEnabled(true) |
| must also be called. |
| @param newValue a set of integer ranges of the map ids]]> |
| </doc> |
| </method> |
| <method name="setMapDebugScript" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="mDbgScript" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the debug script to run when the map tasks fail. |
| |
| <p>The debug script can aid debugging of failed map tasks. The script is |
| given task's stdout, stderr, syslog, jobconf files as arguments.</p> |
| |
| <p>The debug command, run on the node where the map failed, is:</p> |
| <p><pre><blockquote> |
| $script $stdout $stderr $syslog $jobconf. |
| </blockquote></pre></p> |
| |
| <p> The script file is distributed through {@link DistributedCache} |
| APIs. The script needs to be symlinked. </p> |
| |
| <p>Here is an example on how to submit a script |
| <p><blockquote><pre> |
| job.setMapDebugScript("./myscript"); |
| DistributedCache.createSymlink(job); |
| DistributedCache.addCacheFile("/debug/scripts/myscript#myscript"); |
| </pre></blockquote></p> |
| |
| @param mDbgScript the script name]]> |
| </doc> |
| </method> |
| <method name="getMapDebugScript" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the map task's debug script. |
| |
| @return the debug Script for the mapred job for failed map tasks. |
| @see #setMapDebugScript(String)]]> |
| </doc> |
| </method> |
| <method name="setReduceDebugScript" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="rDbgScript" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the debug script to run when the reduce tasks fail. |
| |
| <p>The debug script can aid debugging of failed reduce tasks. The script |
| is given task's stdout, stderr, syslog, jobconf files as arguments.</p> |
| |
| <p>The debug command, run on the node where the map failed, is:</p> |
| <p><pre><blockquote> |
| $script $stdout $stderr $syslog $jobconf. |
| </blockquote></pre></p> |
| |
| <p> The script file is distributed through {@link DistributedCache} |
| APIs. The script file needs to be symlinked </p> |
| |
| <p>Here is an example on how to submit a script |
| <p><blockquote><pre> |
| job.setReduceDebugScript("./myscript"); |
| DistributedCache.createSymlink(job); |
| DistributedCache.addCacheFile("/debug/scripts/myscript#myscript"); |
| </pre></blockquote></p> |
| |
| @param rDbgScript the script name]]> |
| </doc> |
| </method> |
| <method name="getReduceDebugScript" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the reduce task's debug Script |
| |
| @return the debug script for the mapred job for failed reduce tasks. |
| @see #setReduceDebugScript(String)]]> |
| </doc> |
| </method> |
| <method name="getJobEndNotificationURI" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the uri to be invoked in-order to send a notification after the job |
| has completed (success/failure). |
| |
| @return the job end notification uri, <code>null</code> if it hasn't |
| been set. |
| @see #setJobEndNotificationURI(String)]]> |
| </doc> |
| </method> |
| <method name="setJobEndNotificationURI" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="uri" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the uri to be invoked in-order to send a notification after the job |
| has completed (success/failure). |
| |
| <p>The uri can contain 2 special parameters: <tt>$jobId</tt> and |
| <tt>$jobStatus</tt>. Those, if present, are replaced by the job's |
| identifier and completion-status respectively.</p> |
| |
| <p>This is typically used by application-writers to implement chaining of |
| Map-Reduce jobs in an <i>asynchronous manner</i>.</p> |
| |
| @param uri the job end notification uri |
| @see JobStatus |
| @see <a href="{@docRoot}/org/apache/hadoop/mapred/JobClient.html#JobCompletionAndChaining">Job Completion and Chaining</a>]]> |
| </doc> |
| </method> |
| <method name="getJobLocalDir" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get job-specific shared directory for use as scratch space |
| |
| <p> |
| When a job starts, a shared directory is created at location |
| <code> |
| ${mapred.local.dir}/taskTracker/jobcache/$jobid/work/ </code>. |
| This directory is exposed to the users through |
| <code>job.local.dir </code>. |
| So, the tasks can use this space |
| as scratch space and share files among them. </p> |
| This value is available as System property also. |
| |
| @return The localized job specific shared directory]]> |
| </doc> |
| </method> |
| <method name="getMaxVirtualMemoryForTask" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[The maximum amount of memory any task of this job will use. See |
| {@link #MAPRED_TASK_MAXVMEM_PROPERTY} |
| |
| @return The maximum amount of memory any task of this job will use, in |
| bytes. |
| @see #setMaxVirtualMemoryForTask(long)]]> |
| </doc> |
| </method> |
| <method name="setMaxVirtualMemoryForTask" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="vmem" type="long"/> |
| <doc> |
| <![CDATA[Set the maximum amount of memory any task of this job can use. See |
| {@link #MAPRED_TASK_MAXVMEM_PROPERTY} |
| |
| @param vmem Maximum amount of virtual memory in bytes any task of this job |
| can use. |
| @see #getMaxVirtualMemoryForTask()]]> |
| </doc> |
| </method> |
| <method name="getMaxPhysicalMemoryForTask" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[The maximum amount of physical memory any task of this job will use. See |
| {@link #MAPRED_TASK_MAXPMEM_PROPERTY} |
| |
| @return The maximum amount of physical memory any task of this job will |
| use, in bytes. |
| @see #setMaxPhysicalMemoryForTask(long)]]> |
| </doc> |
| </method> |
| <method name="setMaxPhysicalMemoryForTask" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="pmem" type="long"/> |
| <doc> |
| <![CDATA[Set the maximum amount of physical memory any task of this job can use. See |
| {@link #MAPRED_TASK_MAXPMEM_PROPERTY} |
| |
| @param pmem Maximum amount of physical memory in bytes any task of this job |
| can use. |
| @see #getMaxPhysicalMemoryForTask()]]> |
| </doc> |
| </method> |
| <method name="getQueueName" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Return the name of the queue to which this job is submitted. |
| Defaults to 'default'. |
| |
| @return name of the queue]]> |
| </doc> |
| </method> |
| <method name="setQueueName" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="queueName" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the name of the queue to which this job should be submitted. |
| |
| @param queueName Name of the queue]]> |
| </doc> |
| </method> |
| <field name="DISABLED_MEMORY_LIMIT" type="long" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[A value which if set for memory related configuration options, |
| indicates that the options are turned off.]]> |
| </doc> |
| </field> |
| <field name="DEFAULT_QUEUE_NAME" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Name of the queue to which jobs will be submitted, if no queue |
| name is mentioned.]]> |
| </doc> |
| </field> |
| <field name="MAPRED_TASK_DEFAULT_MAXVMEM_PROPERTY" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Cluster-wide configuration to be set by the administrators that provides |
| default amount of maximum virtual memory for job's tasks. This has to be |
| set on both the JobTracker node for the sake of scheduling decisions and on |
| the TaskTracker nodes for the sake of memory management. |
| |
| <p> |
| |
| If a job doesn't specify its virtual memory requirement by setting |
| {@link #MAPRED_TASK_MAXVMEM_PROPERTY} to {@link #DISABLED_MEMORY_LIMIT}, |
| tasks are assured a memory limit set to this property. This property is |
| disabled by default, and if not explicitly set to a valid value by the |
| administrators and if a job doesn't specify its virtual memory |
| requirements, the job's tasks will not be assured anything and may be |
| killed by a TT that intends to control the total memory usage of the tasks |
| via memory management functionality. |
| |
| <p> |
| |
| This value should in general be less than the cluster-wide configuration |
| {@link #UPPER_LIMIT_ON_TASK_VMEM_PROPERTY} . If not or if it not set, |
| TaskTracker's memory management may be disabled and a scheduler's memory |
| based scheduling decisions will be affected. Please refer to the |
| documentation of the configured scheduler to see how this property is used.]]> |
| </doc> |
| </field> |
| <field name="MAPRED_TASK_MAXVMEM_PROPERTY" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[The maximum amount of memory any task of this job will use. |
| |
| <p> |
| |
| This value will be used by TaskTrackers for monitoring the memory usage of |
| tasks of this jobs. If a TaskTracker's memory management functionality is |
| enabled, each task of this job will be allowed to use a maximum virtual |
| memory specified by this property. If the task's memory usage goes over |
| this value, the task will be failed by the TT. If not set, the cluster-wide |
| configuration {@link #MAPRED_TASK_DEFAULT_MAXVMEM_PROPERTY} is used as the |
| default value for memory requirements. If this property cascaded with |
| {@link #MAPRED_TASK_DEFAULT_MAXVMEM_PROPERTY} becomes equal to -1, job's |
| tasks will not be assured anything and may be killed by a TT that intends |
| to control the total memory usage of the tasks via memory management |
| functionality. If the memory management functionality is disabled on a TT, |
| this value is ignored. |
| |
| <p> |
| |
| This value should also be not more than the cluster-wide configuration |
| {@link #UPPER_LIMIT_ON_TASK_VMEM_PROPERTY} which has to be set by the site |
| administrators. |
| |
| <p> |
| |
| This value may be used by schedulers that support scheduling based on job's |
| memory requirements. In general, a task of this job will be scheduled on a |
| TaskTracker only if the amount of virtual memory still unoccupied on the |
| TaskTracker is greater than or equal to this value. But different |
| schedulers can take different decisions. Please refer to the documentation |
| of the scheduler being configured to see if it does memory based scheduling |
| and if it does, how this property is used by that scheduler. |
| |
| @see #setMaxVirtualMemoryForTask(long) |
| @see #getMaxVirtualMemoryForTask()]]> |
| </doc> |
| </field> |
| <field name="MAPRED_TASK_MAXPMEM_PROPERTY" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[The maximum amount of physical memory any task of a job will use. |
| |
| <p> |
| |
| This value may be used by schedulers that support scheduling based on job's |
| memory requirements. In general, a task of this job will be scheduled on a |
| TaskTracker, only if the amount of physical memory still unoccupied on the |
| TaskTracker is greater than or equal to this value. But different |
| schedulers can take different decisions. Please refer to the documentation |
| of the scheduler being configured to see how it does memory based |
| scheduling and how this variable is used by that scheduler. |
| |
| @see #setMaxPhysicalMemoryForTask(long) |
| @see #getMaxPhysicalMemoryForTask()]]> |
| </doc> |
| </field> |
| <field name="UPPER_LIMIT_ON_TASK_VMEM_PROPERTY" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Cluster-wide configuration to be set by the site administrators that |
| provides an upper limit on the maximum virtual memory that can be specified |
| by a job. The job configuration {@link #MAPRED_TASK_MAXVMEM_PROPERTY} and |
| the cluster-wide configuration |
| {@link #MAPRED_TASK_DEFAULT_MAXVMEM_PROPERTY} should, by definition, be |
| less than this value. If the job configuration |
| {@link #MAPRED_TASK_DEFAULT_MAXVMEM_PROPERTY} is more than this value, |
| depending on the scheduler being configured, the job may be rejected or the |
| job configuration may just be ignored. |
| |
| <p> |
| |
| If it is not set on a TaskTracker, TaskTracker's memory management will be |
| disabled.]]> |
| </doc> |
| </field> |
| <doc> |
| <![CDATA[A map/reduce job configuration. |
| |
| <p><code>JobConf</code> is the primary interface for a user to describe a |
| map-reduce job to the Hadoop framework for execution. The framework tries to |
| faithfully execute the job as-is described by <code>JobConf</code>, however: |
| <ol> |
| <li> |
| Some configuration parameters might have been marked as |
| <a href="{@docRoot}/org/apache/hadoop/conf/Configuration.html#FinalParams"> |
| final</a> by administrators and hence cannot be altered. |
| </li> |
| <li> |
| While some job parameters are straight-forward to set |
| (e.g. {@link #setNumReduceTasks(int)}), some parameters interact subtly |
| rest of the framework and/or job-configuration and is relatively more |
| complex for the user to control finely (e.g. {@link #setNumMapTasks(int)}). |
| </li> |
| </ol></p> |
| |
| <p><code>JobConf</code> typically specifies the {@link Mapper}, combiner |
| (if any), {@link Partitioner}, {@link Reducer}, {@link InputFormat} and |
| {@link OutputFormat} implementations to be used etc. |
| |
| <p>Optionally <code>JobConf</code> is used to specify other advanced facets |
| of the job such as <code>Comparator</code>s to be used, files to be put in |
| the {@link DistributedCache}, whether or not intermediate and/or job outputs |
| are to be compressed (and how), debugability via user-provided scripts |
| ( {@link #setMapDebugScript(String)}/{@link #setReduceDebugScript(String)}), |
| for doing post-processing on task logs, task's stdout, stderr, syslog. |
| and etc.</p> |
| |
| <p>Here is an example on how to configure a job via <code>JobConf</code>:</p> |
| <p><blockquote><pre> |
| // Create a new JobConf |
| JobConf job = new JobConf(new Configuration(), MyJob.class); |
| |
| // Specify various job-specific parameters |
| job.setJobName("myjob"); |
| |
| FileInputFormat.setInputPaths(job, new Path("in")); |
| FileOutputFormat.setOutputPath(job, new Path("out")); |
| |
| job.setMapperClass(MyJob.MyMapper.class); |
| job.setCombinerClass(MyJob.MyReducer.class); |
| job.setReducerClass(MyJob.MyReducer.class); |
| |
| job.setInputFormat(SequenceFileInputFormat.class); |
| job.setOutputFormat(SequenceFileOutputFormat.class); |
| </pre></blockquote></p> |
| |
| @see JobClient |
| @see ClusterStatus |
| @see Tool |
| @see DistributedCache |
| @deprecated Use {@link Configuration} instead]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.JobConf --> |
| <!-- start interface org.apache.hadoop.mapred.JobConfigurable --> |
| <interface name="JobConfigurable" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <method name="configure" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <doc> |
| <![CDATA[Initializes a new instance from a {@link JobConf}. |
| |
| @param job the configuration]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[That what may be configured.]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapred.JobConfigurable --> |
| <!-- start class org.apache.hadoop.mapred.JobContext --> |
| <class name="JobContext" extends="org.apache.hadoop.mapreduce.JobContext" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="Use {@link org.apache.hadoop.mapreduce.JobContext} instead."> |
| <method name="getJobConf" return="org.apache.hadoop.mapred.JobConf" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the job Configuration |
| |
| @return JobConf]]> |
| </doc> |
| </method> |
| <method name="getProgressible" return="org.apache.hadoop.util.Progressable" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the progress mechanism for reporting progress. |
| |
| @return progress mechanism]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[@deprecated Use {@link org.apache.hadoop.mapreduce.JobContext} instead.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.JobContext --> |
| <!-- start class org.apache.hadoop.mapred.JobEndNotifier --> |
| <class name="JobEndNotifier" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="JobEndNotifier" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="startNotifier" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="stopNotifier" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="registerNotification" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobConf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="status" type="org.apache.hadoop.mapred.JobStatus"/> |
| </method> |
| <method name="localRunnerNotification" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="status" type="org.apache.hadoop.mapred.JobStatus"/> |
| </method> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.JobEndNotifier --> |
| <!-- start class org.apache.hadoop.mapred.JobHistory --> |
| <class name="JobHistory" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="JobHistory" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="init" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="hostname" type="java.lang.String"/> |
| <param name="jobTrackerStartTime" type="long"/> |
| <doc> |
| <![CDATA[Initialize JobHistory files. |
| @param conf Jobconf of the job tracker. |
| @param hostname jobtracker's hostname |
| @param jobTrackerStartTime jobtracker's start time |
| @return true if intialized properly |
| false otherwise]]> |
| </doc> |
| </method> |
| <method name="parseHistoryFromFS" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="path" type="java.lang.String"/> |
| <param name="l" type="org.apache.hadoop.mapred.JobHistory.Listener"/> |
| <param name="fs" type="org.apache.hadoop.fs.FileSystem"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Parses history file and invokes Listener.handle() for |
| each line of history. It can be used for looking through history |
| files for specific items without having to keep whole history in memory. |
| @param path path to history file |
| @param l Listener for history events |
| @param fs FileSystem where history file is present |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="isDisableHistory" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns history disable status. by default history is enabled so this |
| method returns false. |
| @return true if history logging is disabled, false otherwise.]]> |
| </doc> |
| </method> |
| <method name="setDisableHistory" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="disableHistory" type="boolean"/> |
| <doc> |
| <![CDATA[Enable/disable history logging. Default value is false, so history |
| is enabled by default. |
| @param disableHistory true if history should be disabled, false otherwise.]]> |
| </doc> |
| </method> |
| <method name="getTaskLogsUrl" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="attempt" type="org.apache.hadoop.mapred.JobHistory.TaskAttempt"/> |
| <doc> |
| <![CDATA[Return the TaskLogsUrl of a particular TaskAttempt |
| |
| @param attempt |
| @return the taskLogsUrl. null if http-port or tracker-name or |
| task-attempt-id are unavailable.]]> |
| </doc> |
| </method> |
| <field name="LOG" type="org.apache.commons.logging.Log" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="JOB_NAME_TRIM_LENGTH" type="int" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[Provides methods for writing to and reading from job history. |
| Job History works in an append mode, JobHistory and its inner classes provide methods |
| to log job events. |
| |
| JobHistory is split into multiple files, format of each file is plain text where each line |
| is of the format [type (key=value)*], where type identifies the type of the record. |
| Type maps to UID of one of the inner classes of this class. |
| |
| Job history is maintained in a master index which contains star/stop times of all jobs with |
| a few other job level properties. Apart from this each job's history is maintained in a seperate history |
| file. name of job history files follows the format jobtrackerId_jobid |
| |
| For parsing the job history it supports a listener based interface where each line is parsed |
| and passed to listener. The listener can create an object model of history or look for specific |
| events and discard rest of the history. |
| |
| CHANGE LOG : |
| Version 0 : The history has the following format : |
| TAG KEY1="VALUE1" KEY2="VALUE2" and so on. |
| TAG can be Job, Task, MapAttempt or ReduceAttempt. |
| Note that a '"' is the line delimiter. |
| Version 1 : Changes the line delimiter to '.' |
| Values are now escaped for unambiguous parsing. |
| Added the Meta tag to store version info.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.JobHistory --> |
| <!-- start class org.apache.hadoop.mapred.JobHistory.HistoryCleaner --> |
| <class name="JobHistory.HistoryCleaner" extends="java.lang.Object" |
| abstract="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="java.lang.Runnable"/> |
| <constructor name="JobHistory.HistoryCleaner" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="run" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Cleans up history data.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Delete history files older than one month. Update master index and remove all |
| jobs older than one month. Also if a job tracker has no jobs in last one month |
| remove reference to the job tracker.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.JobHistory.HistoryCleaner --> |
| <!-- start class org.apache.hadoop.mapred.JobHistory.JobInfo --> |
| <class name="JobHistory.JobInfo" extends="org.apache.hadoop.mapred.JobHistory.KeyValuePair" |
| abstract="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="JobHistory.JobInfo" type="java.lang.String" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Create new JobInfo]]> |
| </doc> |
| </constructor> |
| <method name="getAllTasks" return="java.util.Map" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns all map and reduce tasks <taskid-Task>.]]> |
| </doc> |
| </method> |
| <method name="getLocalJobFilePath" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobId" type="org.apache.hadoop.mapred.JobID"/> |
| <doc> |
| <![CDATA[Get the path of the locally stored job file |
| @param jobId id of the job |
| @return the path of the job file on the local file system]]> |
| </doc> |
| </method> |
| <method name="encodeJobHistoryFilePath" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="logFile" type="java.lang.String"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Helper function to encode the URL of the path of the job-history |
| log file. |
| |
| @param logFile path of the job-history file |
| @return URL encoded path |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="encodeJobHistoryFileName" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="logFileName" type="java.lang.String"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Helper function to encode the URL of the filename of the job-history |
| log file. |
| |
| @param logFileName file name of the job-history file |
| @return URL encoded filename |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="decodeJobHistoryFileName" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="logFileName" type="java.lang.String"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Helper function to decode the URL of the filename of the job-history |
| log file. |
| |
| @param logFileName file name of the job-history file |
| @return URL decoded filename |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getUserName" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobConf" type="org.apache.hadoop.mapred.JobConf"/> |
| <doc> |
| <![CDATA[Get the user name from the job conf]]> |
| </doc> |
| </method> |
| <method name="getJobHistoryLogLocation" return="org.apache.hadoop.fs.Path" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="logFileName" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Get the job history file path given the history filename]]> |
| </doc> |
| </method> |
| <method name="getJobHistoryLogLocationForUser" return="org.apache.hadoop.fs.Path" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="logFileName" type="java.lang.String"/> |
| <param name="jobConf" type="org.apache.hadoop.mapred.JobConf"/> |
| <doc> |
| <![CDATA[Get the user job history file path]]> |
| </doc> |
| </method> |
| <method name="getJobHistoryFileName" return="java.lang.String" |
| abstract="false" native="false" synchronized="true" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobConf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="id" type="org.apache.hadoop.mapred.JobID"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Recover the job history filename from the history folder. |
| Uses the following pattern |
| $jt-hostname_[0-9]*_$job-id_$user-$job-name* |
| @param jobConf the job conf |
| @param id job id]]> |
| </doc> |
| </method> |
| <method name="recoverJobHistoryFile" return="org.apache.hadoop.fs.Path" |
| abstract="false" native="false" synchronized="true" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="logFilePath" type="org.apache.hadoop.fs.Path"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Selects one of the two files generated as a part of recovery. |
| The thumb rule is that always select the oldest file. |
| This call makes sure that only one file is left in the end. |
| @param conf job conf |
| @param logFilePath Path of the log file |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="logSubmitted" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobId" type="org.apache.hadoop.mapred.JobID"/> |
| <param name="jobConf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="jobConfPath" type="java.lang.String"/> |
| <param name="submitTime" type="long"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Log job submitted event to history. Creates a new file in history |
| for the job. if history file creation fails, it disables history |
| for all other events. |
| @param jobId job id assigned by job tracker. |
| @param jobConf job conf of the job |
| @param jobConfPath path to job conf xml file in HDFS. |
| @param submitTime time when job tracker received the job |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="logInited" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobId" type="org.apache.hadoop.mapred.JobID"/> |
| <param name="startTime" type="long"/> |
| <param name="totalMaps" type="int"/> |
| <param name="totalReduces" type="int"/> |
| <doc> |
| <![CDATA[Logs launch time of job. |
| |
| @param jobId job id, assigned by jobtracker. |
| @param startTime start time of job. |
| @param totalMaps total maps assigned by jobtracker. |
| @param totalReduces total reduces.]]> |
| </doc> |
| </method> |
| <method name="logStarted" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="Use {@link #logInited(JobID, long, int, int)} and |
| {@link #logStarted(JobID)}"> |
| <param name="jobId" type="org.apache.hadoop.mapred.JobID"/> |
| <param name="startTime" type="long"/> |
| <param name="totalMaps" type="int"/> |
| <param name="totalReduces" type="int"/> |
| <doc> |
| <![CDATA[Logs the job as RUNNING. |
| |
| @param jobId job id, assigned by jobtracker. |
| @param startTime start time of job. |
| @param totalMaps total maps assigned by jobtracker. |
| @param totalReduces total reduces. |
| @deprecated Use {@link #logInited(JobID, long, int, int)} and |
| {@link #logStarted(JobID)}]]> |
| </doc> |
| </method> |
| <method name="logStarted" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobId" type="org.apache.hadoop.mapred.JobID"/> |
| <doc> |
| <![CDATA[Logs job as running |
| @param jobId job id, assigned by jobtracker.]]> |
| </doc> |
| </method> |
| <method name="logFinished" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobId" type="org.apache.hadoop.mapred.JobID"/> |
| <param name="finishTime" type="long"/> |
| <param name="finishedMaps" type="int"/> |
| <param name="finishedReduces" type="int"/> |
| <param name="failedMaps" type="int"/> |
| <param name="failedReduces" type="int"/> |
| <param name="counters" type="org.apache.hadoop.mapred.Counters"/> |
| <doc> |
| <![CDATA[Log job finished. closes the job file in history. |
| @param jobId job id, assigned by jobtracker. |
| @param finishTime finish time of job in ms. |
| @param finishedMaps no of maps successfully finished. |
| @param finishedReduces no of reduces finished sucessfully. |
| @param failedMaps no of failed map tasks. |
| @param failedReduces no of failed reduce tasks. |
| @param counters the counters from the job]]> |
| </doc> |
| </method> |
| <method name="logFailed" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobid" type="org.apache.hadoop.mapred.JobID"/> |
| <param name="timestamp" type="long"/> |
| <param name="finishedMaps" type="int"/> |
| <param name="finishedReduces" type="int"/> |
| <doc> |
| <![CDATA[Logs job failed event. Closes the job history log file. |
| @param jobid job id |
| @param timestamp time when job failure was detected in ms. |
| @param finishedMaps no finished map tasks. |
| @param finishedReduces no of finished reduce tasks.]]> |
| </doc> |
| </method> |
| <method name="logKilled" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobid" type="org.apache.hadoop.mapred.JobID"/> |
| <param name="timestamp" type="long"/> |
| <param name="finishedMaps" type="int"/> |
| <param name="finishedReduces" type="int"/> |
| <doc> |
| <![CDATA[Logs job killed event. Closes the job history log file. |
| |
| @param jobid |
| job id |
| @param timestamp |
| time when job killed was issued in ms. |
| @param finishedMaps |
| no finished map tasks. |
| @param finishedReduces |
| no of finished reduce tasks.]]> |
| </doc> |
| </method> |
| <method name="logJobPriority" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobid" type="org.apache.hadoop.mapred.JobID"/> |
| <param name="priority" type="org.apache.hadoop.mapred.JobPriority"/> |
| <doc> |
| <![CDATA[Log job's priority. |
| @param jobid job id |
| @param priority Jobs priority]]> |
| </doc> |
| </method> |
| <method name="logJobInfo" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="Use {@link #logJobInfo(JobID, long, long)} instead."> |
| <param name="jobid" type="org.apache.hadoop.mapred.JobID"/> |
| <param name="submitTime" type="long"/> |
| <param name="launchTime" type="long"/> |
| <param name="restartCount" type="int"/> |
| <doc> |
| <![CDATA[Log job's submit-time/launch-time |
| @param jobid job id |
| @param submitTime job's submit time |
| @param launchTime job's launch time |
| @param restartCount number of times the job got restarted |
| @deprecated Use {@link #logJobInfo(JobID, long, long)} instead.]]> |
| </doc> |
| </method> |
| <method name="logJobInfo" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobid" type="org.apache.hadoop.mapred.JobID"/> |
| <param name="submitTime" type="long"/> |
| <param name="launchTime" type="long"/> |
| </method> |
| <doc> |
| <![CDATA[Helper class for logging or reading back events related to job start, finish or failure.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.JobHistory.JobInfo --> |
| <!-- start class org.apache.hadoop.mapred.JobHistory.Keys --> |
| <class name="JobHistory.Keys" extends="java.lang.Enum" |
| abstract="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <method name="values" return="org.apache.hadoop.mapred.JobHistory.Keys[]" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="valueOf" return="org.apache.hadoop.mapred.JobHistory.Keys" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="name" type="java.lang.String"/> |
| </method> |
| <field name="JOBTRACKERID" type="org.apache.hadoop.mapred.JobHistory.Keys" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="START_TIME" type="org.apache.hadoop.mapred.JobHistory.Keys" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="FINISH_TIME" type="org.apache.hadoop.mapred.JobHistory.Keys" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="JOBID" type="org.apache.hadoop.mapred.JobHistory.Keys" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="JOBNAME" type="org.apache.hadoop.mapred.JobHistory.Keys" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="USER" type="org.apache.hadoop.mapred.JobHistory.Keys" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="JOBCONF" type="org.apache.hadoop.mapred.JobHistory.Keys" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="SUBMIT_TIME" type="org.apache.hadoop.mapred.JobHistory.Keys" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="LAUNCH_TIME" type="org.apache.hadoop.mapred.JobHistory.Keys" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="TOTAL_MAPS" type="org.apache.hadoop.mapred.JobHistory.Keys" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="TOTAL_REDUCES" type="org.apache.hadoop.mapred.JobHistory.Keys" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="FAILED_MAPS" type="org.apache.hadoop.mapred.JobHistory.Keys" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="FAILED_REDUCES" type="org.apache.hadoop.mapred.JobHistory.Keys" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="FINISHED_MAPS" type="org.apache.hadoop.mapred.JobHistory.Keys" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="FINISHED_REDUCES" type="org.apache.hadoop.mapred.JobHistory.Keys" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="JOB_STATUS" type="org.apache.hadoop.mapred.JobHistory.Keys" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="TASKID" type="org.apache.hadoop.mapred.JobHistory.Keys" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="HOSTNAME" type="org.apache.hadoop.mapred.JobHistory.Keys" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="TASK_TYPE" type="org.apache.hadoop.mapred.JobHistory.Keys" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="ERROR" type="org.apache.hadoop.mapred.JobHistory.Keys" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="TASK_ATTEMPT_ID" type="org.apache.hadoop.mapred.JobHistory.Keys" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="TASK_STATUS" type="org.apache.hadoop.mapred.JobHistory.Keys" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="COPY_PHASE" type="org.apache.hadoop.mapred.JobHistory.Keys" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="SORT_PHASE" type="org.apache.hadoop.mapred.JobHistory.Keys" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="REDUCE_PHASE" type="org.apache.hadoop.mapred.JobHistory.Keys" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="SHUFFLE_FINISHED" type="org.apache.hadoop.mapred.JobHistory.Keys" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="SORT_FINISHED" type="org.apache.hadoop.mapred.JobHistory.Keys" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="COUNTERS" type="org.apache.hadoop.mapred.JobHistory.Keys" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="SPLITS" type="org.apache.hadoop.mapred.JobHistory.Keys" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="JOB_PRIORITY" type="org.apache.hadoop.mapred.JobHistory.Keys" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="HTTP_PORT" type="org.apache.hadoop.mapred.JobHistory.Keys" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="TRACKER_NAME" type="org.apache.hadoop.mapred.JobHistory.Keys" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="STATE_STRING" type="org.apache.hadoop.mapred.JobHistory.Keys" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="VERSION" type="org.apache.hadoop.mapred.JobHistory.Keys" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[Job history files contain key="value" pairs, where keys belong to this enum. |
| It acts as a global namespace for all keys.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.JobHistory.Keys --> |
| <!-- start interface org.apache.hadoop.mapred.JobHistory.Listener --> |
| <interface name="JobHistory.Listener" abstract="true" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <method name="handle" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="recType" type="org.apache.hadoop.mapred.JobHistory.RecordTypes"/> |
| <param name="values" type="java.util.Map"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Callback method for history parser. |
| @param recType type of record, which is the first entry in the line. |
| @param values a map of key-value pairs as thry appear in history. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Callback interface for reading back log events from JobHistory. This interface |
| should be implemented and passed to JobHistory.parseHistory()]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapred.JobHistory.Listener --> |
| <!-- start class org.apache.hadoop.mapred.JobHistory.MapAttempt --> |
| <class name="JobHistory.MapAttempt" extends="org.apache.hadoop.mapred.JobHistory.TaskAttempt" |
| abstract="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="JobHistory.MapAttempt" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="logStarted" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="Use |
| {@link #logStarted(TaskAttemptID, long, String, int, String)}"> |
| <param name="taskAttemptId" type="org.apache.hadoop.mapred.TaskAttemptID"/> |
| <param name="startTime" type="long"/> |
| <param name="hostName" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Log start time of this map task attempt. |
| @param taskAttemptId task attempt id |
| @param startTime start time of task attempt as reported by task tracker. |
| @param hostName host name of the task attempt. |
| @deprecated Use |
| {@link #logStarted(TaskAttemptID, long, String, int, String)}]]> |
| </doc> |
| </method> |
| <method name="logStarted" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskAttemptId" type="org.apache.hadoop.mapred.TaskAttemptID"/> |
| <param name="startTime" type="long"/> |
| <param name="trackerName" type="java.lang.String"/> |
| <param name="httpPort" type="int"/> |
| <param name="taskType" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Log start time of this map task attempt. |
| |
| @param taskAttemptId task attempt id |
| @param startTime start time of task attempt as reported by task tracker. |
| @param trackerName name of the tracker executing the task attempt. |
| @param httpPort http port of the task tracker executing the task attempt |
| @param taskType Whether the attempt is cleanup or setup or map]]> |
| </doc> |
| </method> |
| <method name="logFinished" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="Use |
| {@link #logFinished(TaskAttemptID, long, String, String, String, Counters)}"> |
| <param name="taskAttemptId" type="org.apache.hadoop.mapred.TaskAttemptID"/> |
| <param name="finishTime" type="long"/> |
| <param name="hostName" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Log finish time of map task attempt. |
| @param taskAttemptId task attempt id |
| @param finishTime finish time |
| @param hostName host name |
| @deprecated Use |
| {@link #logFinished(TaskAttemptID, long, String, String, String, Counters)}]]> |
| </doc> |
| </method> |
| <method name="logFinished" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskAttemptId" type="org.apache.hadoop.mapred.TaskAttemptID"/> |
| <param name="finishTime" type="long"/> |
| <param name="hostName" type="java.lang.String"/> |
| <param name="taskType" type="java.lang.String"/> |
| <param name="stateString" type="java.lang.String"/> |
| <param name="counter" type="org.apache.hadoop.mapred.Counters"/> |
| <doc> |
| <![CDATA[Log finish time of map task attempt. |
| |
| @param taskAttemptId task attempt id |
| @param finishTime finish time |
| @param hostName host name |
| @param taskType Whether the attempt is cleanup or setup or map |
| @param stateString state string of the task attempt |
| @param counter counters of the task attempt]]> |
| </doc> |
| </method> |
| <method name="logFailed" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="Use |
| {@link #logFailed(TaskAttemptID, long, String, String, String)}"> |
| <param name="taskAttemptId" type="org.apache.hadoop.mapred.TaskAttemptID"/> |
| <param name="timestamp" type="long"/> |
| <param name="hostName" type="java.lang.String"/> |
| <param name="error" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Log task attempt failed event. |
| @param taskAttemptId task attempt id |
| @param timestamp timestamp |
| @param hostName hostname of this task attempt. |
| @param error error message if any for this task attempt. |
| @deprecated Use |
| {@link #logFailed(TaskAttemptID, long, String, String, String)}]]> |
| </doc> |
| </method> |
| <method name="logFailed" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskAttemptId" type="org.apache.hadoop.mapred.TaskAttemptID"/> |
| <param name="timestamp" type="long"/> |
| <param name="hostName" type="java.lang.String"/> |
| <param name="error" type="java.lang.String"/> |
| <param name="taskType" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Log task attempt failed event. |
| |
| @param taskAttemptId task attempt id |
| @param timestamp timestamp |
| @param hostName hostname of this task attempt. |
| @param error error message if any for this task attempt. |
| @param taskType Whether the attempt is cleanup or setup or map]]> |
| </doc> |
| </method> |
| <method name="logKilled" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="Use |
| {@link #logKilled(TaskAttemptID, long, String, String, String)}"> |
| <param name="taskAttemptId" type="org.apache.hadoop.mapred.TaskAttemptID"/> |
| <param name="timestamp" type="long"/> |
| <param name="hostName" type="java.lang.String"/> |
| <param name="error" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Log task attempt killed event. |
| @param taskAttemptId task attempt id |
| @param timestamp timestamp |
| @param hostName hostname of this task attempt. |
| @param error error message if any for this task attempt. |
| @deprecated Use |
| {@link #logKilled(TaskAttemptID, long, String, String, String)}]]> |
| </doc> |
| </method> |
| <method name="logKilled" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskAttemptId" type="org.apache.hadoop.mapred.TaskAttemptID"/> |
| <param name="timestamp" type="long"/> |
| <param name="hostName" type="java.lang.String"/> |
| <param name="error" type="java.lang.String"/> |
| <param name="taskType" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Log task attempt killed event. |
| |
| @param taskAttemptId task attempt id |
| @param timestamp timestamp |
| @param hostName hostname of this task attempt. |
| @param error error message if any for this task attempt. |
| @param taskType Whether the attempt is cleanup or setup or map]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Helper class for logging or reading back events related to start, finish or failure of |
| a Map Attempt on a node.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.JobHistory.MapAttempt --> |
| <!-- start class org.apache.hadoop.mapred.JobHistory.RecordTypes --> |
| <class name="JobHistory.RecordTypes" extends="java.lang.Enum" |
| abstract="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <method name="values" return="org.apache.hadoop.mapred.JobHistory.RecordTypes[]" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="valueOf" return="org.apache.hadoop.mapred.JobHistory.RecordTypes" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="name" type="java.lang.String"/> |
| </method> |
| <field name="Jobtracker" type="org.apache.hadoop.mapred.JobHistory.RecordTypes" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="Job" type="org.apache.hadoop.mapred.JobHistory.RecordTypes" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="Task" type="org.apache.hadoop.mapred.JobHistory.RecordTypes" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="MapAttempt" type="org.apache.hadoop.mapred.JobHistory.RecordTypes" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="ReduceAttempt" type="org.apache.hadoop.mapred.JobHistory.RecordTypes" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="Meta" type="org.apache.hadoop.mapred.JobHistory.RecordTypes" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[Record types are identifiers for each line of log in history files. |
| A record type appears as the first token in a single line of log.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.JobHistory.RecordTypes --> |
| <!-- start class org.apache.hadoop.mapred.JobHistory.ReduceAttempt --> |
| <class name="JobHistory.ReduceAttempt" extends="org.apache.hadoop.mapred.JobHistory.TaskAttempt" |
| abstract="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="JobHistory.ReduceAttempt" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="logStarted" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="Use |
| {@link #logStarted(TaskAttemptID, long, String, int, String)}"> |
| <param name="taskAttemptId" type="org.apache.hadoop.mapred.TaskAttemptID"/> |
| <param name="startTime" type="long"/> |
| <param name="hostName" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Log start time of Reduce task attempt. |
| @param taskAttemptId task attempt id |
| @param startTime start time |
| @param hostName host name |
| @deprecated Use |
| {@link #logStarted(TaskAttemptID, long, String, int, String)}]]> |
| </doc> |
| </method> |
| <method name="logStarted" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskAttemptId" type="org.apache.hadoop.mapred.TaskAttemptID"/> |
| <param name="startTime" type="long"/> |
| <param name="trackerName" type="java.lang.String"/> |
| <param name="httpPort" type="int"/> |
| <param name="taskType" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Log start time of Reduce task attempt. |
| |
| @param taskAttemptId task attempt id |
| @param startTime start time |
| @param trackerName tracker name |
| @param httpPort the http port of the tracker executing the task attempt |
| @param taskType Whether the attempt is cleanup or setup or reduce]]> |
| </doc> |
| </method> |
| <method name="logFinished" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="Use |
| {@link #logFinished(TaskAttemptID, long, long, long, String, String, String, Counters)}"> |
| <param name="taskAttemptId" type="org.apache.hadoop.mapred.TaskAttemptID"/> |
| <param name="shuffleFinished" type="long"/> |
| <param name="sortFinished" type="long"/> |
| <param name="finishTime" type="long"/> |
| <param name="hostName" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Log finished event of this task. |
| @param taskAttemptId task attempt id |
| @param shuffleFinished shuffle finish time |
| @param sortFinished sort finish time |
| @param finishTime finish time of task |
| @param hostName host name where task attempt executed |
| @deprecated Use |
| {@link #logFinished(TaskAttemptID, long, long, long, String, String, String, Counters)}]]> |
| </doc> |
| </method> |
| <method name="logFinished" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskAttemptId" type="org.apache.hadoop.mapred.TaskAttemptID"/> |
| <param name="shuffleFinished" type="long"/> |
| <param name="sortFinished" type="long"/> |
| <param name="finishTime" type="long"/> |
| <param name="hostName" type="java.lang.String"/> |
| <param name="taskType" type="java.lang.String"/> |
| <param name="stateString" type="java.lang.String"/> |
| <param name="counter" type="org.apache.hadoop.mapred.Counters"/> |
| <doc> |
| <![CDATA[Log finished event of this task. |
| |
| @param taskAttemptId task attempt id |
| @param shuffleFinished shuffle finish time |
| @param sortFinished sort finish time |
| @param finishTime finish time of task |
| @param hostName host name where task attempt executed |
| @param taskType Whether the attempt is cleanup or setup or reduce |
| @param stateString the state string of the attempt |
| @param counter counters of the attempt]]> |
| </doc> |
| </method> |
| <method name="logFailed" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="Use |
| {@link #logFailed(TaskAttemptID, long, String, String, String)}"> |
| <param name="taskAttemptId" type="org.apache.hadoop.mapred.TaskAttemptID"/> |
| <param name="timestamp" type="long"/> |
| <param name="hostName" type="java.lang.String"/> |
| <param name="error" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Log failed reduce task attempt. |
| @param taskAttemptId task attempt id |
| @param timestamp time stamp when task failed |
| @param hostName host name of the task attempt. |
| @param error error message of the task. |
| @deprecated Use |
| {@link #logFailed(TaskAttemptID, long, String, String, String)}]]> |
| </doc> |
| </method> |
| <method name="logFailed" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskAttemptId" type="org.apache.hadoop.mapred.TaskAttemptID"/> |
| <param name="timestamp" type="long"/> |
| <param name="hostName" type="java.lang.String"/> |
| <param name="error" type="java.lang.String"/> |
| <param name="taskType" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Log failed reduce task attempt. |
| |
| @param taskAttemptId task attempt id |
| @param timestamp time stamp when task failed |
| @param hostName host name of the task attempt. |
| @param error error message of the task. |
| @param taskType Whether the attempt is cleanup or setup or reduce]]> |
| </doc> |
| </method> |
| <method name="logKilled" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="Use |
| {@link #logKilled(TaskAttemptID, long, String, String, String)}"> |
| <param name="taskAttemptId" type="org.apache.hadoop.mapred.TaskAttemptID"/> |
| <param name="timestamp" type="long"/> |
| <param name="hostName" type="java.lang.String"/> |
| <param name="error" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Log killed reduce task attempt. |
| @param taskAttemptId task attempt id |
| @param timestamp time stamp when task failed |
| @param hostName host name of the task attempt. |
| @param error error message of the task. |
| @deprecated Use |
| {@link #logKilled(TaskAttemptID, long, String, String, String)}]]> |
| </doc> |
| </method> |
| <method name="logKilled" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskAttemptId" type="org.apache.hadoop.mapred.TaskAttemptID"/> |
| <param name="timestamp" type="long"/> |
| <param name="hostName" type="java.lang.String"/> |
| <param name="error" type="java.lang.String"/> |
| <param name="taskType" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Log killed reduce task attempt. |
| |
| @param taskAttemptId task attempt id |
| @param timestamp time stamp when task failed |
| @param hostName host name of the task attempt. |
| @param error error message of the task. |
| @param taskType Whether the attempt is cleanup or setup or reduce]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Helper class for logging or reading back events related to start, finish or failure of |
| a Map Attempt on a node.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.JobHistory.ReduceAttempt --> |
| <!-- start class org.apache.hadoop.mapred.JobHistory.Task --> |
| <class name="JobHistory.Task" extends="org.apache.hadoop.mapred.JobHistory.KeyValuePair" |
| abstract="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="JobHistory.Task" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="logStarted" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskId" type="org.apache.hadoop.mapred.TaskID"/> |
| <param name="taskType" type="java.lang.String"/> |
| <param name="startTime" type="long"/> |
| <param name="splitLocations" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Log start time of task (TIP). |
| @param taskId task id |
| @param taskType MAP or REDUCE |
| @param startTime startTime of tip.]]> |
| </doc> |
| </method> |
| <method name="logFinished" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskId" type="org.apache.hadoop.mapred.TaskID"/> |
| <param name="taskType" type="java.lang.String"/> |
| <param name="finishTime" type="long"/> |
| <param name="counters" type="org.apache.hadoop.mapred.Counters"/> |
| <doc> |
| <![CDATA[Log finish time of task. |
| @param taskId task id |
| @param taskType MAP or REDUCE |
| @param finishTime finish timeof task in ms]]> |
| </doc> |
| </method> |
| <method name="logUpdates" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskId" type="org.apache.hadoop.mapred.TaskID"/> |
| <param name="finishTime" type="long"/> |
| <doc> |
| <![CDATA[Update the finish time of task. |
| @param taskId task id |
| @param finishTime finish time of task in ms]]> |
| </doc> |
| </method> |
| <method name="logFailed" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskId" type="org.apache.hadoop.mapred.TaskID"/> |
| <param name="taskType" type="java.lang.String"/> |
| <param name="time" type="long"/> |
| <param name="error" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Log job failed event. |
| @param taskId task id |
| @param taskType MAP or REDUCE. |
| @param time timestamp when job failed detected. |
| @param error error message for failure.]]> |
| </doc> |
| </method> |
| <method name="logFailed" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskId" type="org.apache.hadoop.mapred.TaskID"/> |
| <param name="taskType" type="java.lang.String"/> |
| <param name="time" type="long"/> |
| <param name="error" type="java.lang.String"/> |
| <param name="failedDueToAttempt" type="org.apache.hadoop.mapred.TaskAttemptID"/> |
| <doc> |
| <![CDATA[@param failedDueToAttempt The attempt that caused the failure, if any]]> |
| </doc> |
| </method> |
| <method name="getTaskAttempts" return="java.util.Map" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns all task attempts for this task. <task attempt id - TaskAttempt>]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Helper class for logging or reading back events related to Task's start, finish or failure. |
| All events logged by this class are logged in a separate file per job in |
| job tracker history. These events map to TIPs in jobtracker.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.JobHistory.Task --> |
| <!-- start class org.apache.hadoop.mapred.JobHistory.TaskAttempt --> |
| <class name="JobHistory.TaskAttempt" extends="org.apache.hadoop.mapred.JobHistory.Task" |
| abstract="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="JobHistory.TaskAttempt" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <doc> |
| <![CDATA[Base class for Map and Reduce TaskAttempts.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.JobHistory.TaskAttempt --> |
| <!-- start class org.apache.hadoop.mapred.JobHistory.Values --> |
| <class name="JobHistory.Values" extends="java.lang.Enum" |
| abstract="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <method name="values" return="org.apache.hadoop.mapred.JobHistory.Values[]" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="valueOf" return="org.apache.hadoop.mapred.JobHistory.Values" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="name" type="java.lang.String"/> |
| </method> |
| <field name="SUCCESS" type="org.apache.hadoop.mapred.JobHistory.Values" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="FAILED" type="org.apache.hadoop.mapred.JobHistory.Values" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="KILLED" type="org.apache.hadoop.mapred.JobHistory.Values" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="MAP" type="org.apache.hadoop.mapred.JobHistory.Values" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="REDUCE" type="org.apache.hadoop.mapred.JobHistory.Values" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="CLEANUP" type="org.apache.hadoop.mapred.JobHistory.Values" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="RUNNING" type="org.apache.hadoop.mapred.JobHistory.Values" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="PREP" type="org.apache.hadoop.mapred.JobHistory.Values" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="SETUP" type="org.apache.hadoop.mapred.JobHistory.Values" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[This enum contains some of the values commonly used by history log events. |
| since values in history can only be strings - Values.name() is used in |
| most places in history file.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.JobHistory.Values --> |
| <!-- start class org.apache.hadoop.mapred.JobID --> |
| <class name="JobID" extends="org.apache.hadoop.mapreduce.JobID" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="JobID" type="java.lang.String, int" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Constructs a JobID object |
| @param jtIdentifier jobTracker identifier |
| @param id job number]]> |
| </doc> |
| </constructor> |
| <constructor name="JobID" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="downgrade" return="org.apache.hadoop.mapred.JobID" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="old" type="org.apache.hadoop.mapreduce.JobID"/> |
| <doc> |
| <![CDATA[Downgrade a new JobID to an old one |
| @param old a new or old JobID |
| @return either old or a new JobID build to match old]]> |
| </doc> |
| </method> |
| <method name="read" return="org.apache.hadoop.mapred.JobID" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="in" type="java.io.DataInput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="forName" return="org.apache.hadoop.mapred.JobID" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="str" type="java.lang.String"/> |
| <exception name="IllegalArgumentException" type="java.lang.IllegalArgumentException"/> |
| <doc> |
| <![CDATA[Construct a JobId object from given string |
| @return constructed JobId object or null if the given String is null |
| @throws IllegalArgumentException if the given string is malformed]]> |
| </doc> |
| </method> |
| <method name="getJobIDsPattern" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jtIdentifier" type="java.lang.String"/> |
| <param name="jobId" type="java.lang.Integer"/> |
| <doc> |
| <![CDATA[Returns a regex pattern which matches task IDs. Arguments can |
| be given null, in which case that part of the regex will be generic. |
| For example to obtain a regex matching <i>any job</i> |
| run on the jobtracker started at <i>200707121733</i>, we would use : |
| <pre> |
| JobID.getTaskIDsPattern("200707121733", null); |
| </pre> |
| which will return : |
| <pre> "job_200707121733_[0-9]*" </pre> |
| @param jtIdentifier jobTracker identifier, or null |
| @param jobId job number, or null |
| @return a regex pattern matching JobIDs]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[JobID represents the immutable and unique identifier for |
| the job. JobID consists of two parts. First part |
| represents the jobtracker identifier, so that jobID to jobtracker map |
| is defined. For cluster setup this string is the jobtracker |
| start time, for local setting, it is "local". |
| Second part of the JobID is the job number. <br> |
| An example JobID is : |
| <code>job_200707121733_0003</code> , which represents the third job |
| running at the jobtracker started at <code>200707121733</code>. |
| <p> |
| Applications should never construct or parse JobID strings, but rather |
| use appropriate constructors or {@link #forName(String)} method. |
| |
| @see TaskID |
| @see TaskAttemptID]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.JobID --> |
| <!-- start class org.apache.hadoop.mapred.JobPriority --> |
| <class name="JobPriority" extends="java.lang.Enum" |
| abstract="false" |
| static="false" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <method name="values" return="org.apache.hadoop.mapred.JobPriority[]" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="valueOf" return="org.apache.hadoop.mapred.JobPriority" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="name" type="java.lang.String"/> |
| </method> |
| <field name="VERY_HIGH" type="org.apache.hadoop.mapred.JobPriority" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="HIGH" type="org.apache.hadoop.mapred.JobPriority" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="NORMAL" type="org.apache.hadoop.mapred.JobPriority" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="LOW" type="org.apache.hadoop.mapred.JobPriority" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="VERY_LOW" type="org.apache.hadoop.mapred.JobPriority" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[Used to describe the priority of the running job.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.JobPriority --> |
| <!-- start class org.apache.hadoop.mapred.JobProfile --> |
| <class name="JobProfile" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.io.Writable"/> |
| <constructor name="JobProfile" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Construct an empty {@link JobProfile}.]]> |
| </doc> |
| </constructor> |
| <constructor name="JobProfile" type="java.lang.String, org.apache.hadoop.mapreduce.JobID, java.lang.String, java.lang.String, java.lang.String" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Construct a {@link JobProfile} the userid, jobid, |
| job config-file, job-details url and job name. |
| |
| @param user userid of the person who submitted the job. |
| @param jobid id of the job. |
| @param jobFile job configuration file. |
| @param url link to the web-ui for details of the job. |
| @param name user-specified job name.]]> |
| </doc> |
| </constructor> |
| <constructor name="JobProfile" type="java.lang.String, org.apache.hadoop.mapreduce.JobID, java.lang.String, java.lang.String, java.lang.String, java.lang.String" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Construct a {@link JobProfile} the userid, jobid, |
| job config-file, job-details url and job name. |
| |
| @param user userid of the person who submitted the job. |
| @param jobid id of the job. |
| @param jobFile job configuration file. |
| @param url link to the web-ui for details of the job. |
| @param name user-specified job name. |
| @param queueName name of the queue to which the job is submitted]]> |
| </doc> |
| </constructor> |
| <constructor name="JobProfile" type="java.lang.String, java.lang.String, java.lang.String, java.lang.String, java.lang.String" |
| static="false" final="false" visibility="public" |
| deprecated="use JobProfile(String, JobID, String, String, String) instead"> |
| <doc> |
| <![CDATA[@deprecated use JobProfile(String, JobID, String, String, String) instead]]> |
| </doc> |
| </constructor> |
| <method name="getUser" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the user id.]]> |
| </doc> |
| </method> |
| <method name="getJobID" return="org.apache.hadoop.mapred.JobID" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the job id.]]> |
| </doc> |
| </method> |
| <method name="getJobId" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="use getJobID() instead"> |
| <doc> |
| <![CDATA[@deprecated use getJobID() instead]]> |
| </doc> |
| </method> |
| <method name="getJobFile" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the configuration file for the job.]]> |
| </doc> |
| </method> |
| <method name="getURL" return="java.net.URL" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the link to the web-ui for details of the job.]]> |
| </doc> |
| </method> |
| <method name="getJobName" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the user-specified job name.]]> |
| </doc> |
| </method> |
| <method name="getQueueName" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the name of the queue to which the job is submitted. |
| @return name of the queue.]]> |
| </doc> |
| </method> |
| <method name="write" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="out" type="java.io.DataOutput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="readFields" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="in" type="java.io.DataInput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[A JobProfile is a MapReduce primitive. Tracks a job, |
| whether living or dead.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.JobProfile --> |
| <!-- start class org.apache.hadoop.mapred.JobQueueInfo --> |
| <class name="JobQueueInfo" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.io.Writable"/> |
| <constructor name="JobQueueInfo" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Default constructor for Job Queue Info.]]> |
| </doc> |
| </constructor> |
| <constructor name="JobQueueInfo" type="java.lang.String, java.lang.String" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Construct a new JobQueueInfo object using the queue name and the |
| scheduling information passed. |
| |
| @param queueName Name of the job queue |
| @param schedulingInfo Scheduling Information associated with the job |
| queue]]> |
| </doc> |
| </constructor> |
| <method name="setQueueName" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="queueName" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the queue name of the JobQueueInfo |
| |
| @param queueName Name of the job queue.]]> |
| </doc> |
| </method> |
| <method name="getQueueName" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the queue name from JobQueueInfo |
| |
| @return queue name]]> |
| </doc> |
| </method> |
| <method name="setSchedulingInfo" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="schedulingInfo" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the scheduling information associated to particular job queue |
| |
| @param schedulingInfo]]> |
| </doc> |
| </method> |
| <method name="getSchedulingInfo" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Gets the scheduling information associated to particular job queue. |
| If nothing is set would return <b>"N/A"</b> |
| |
| @return Scheduling information associated to particular Job Queue]]> |
| </doc> |
| </method> |
| <method name="readFields" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="in" type="java.io.DataInput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="write" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="out" type="java.io.DataOutput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[Class that contains the information regarding the Job Queues which are |
| maintained by the Hadoop Map/Reduce framework.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.JobQueueInfo --> |
| <!-- start class org.apache.hadoop.mapred.JobStatus --> |
| <class name="JobStatus" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.io.Writable"/> |
| <implements name="java.lang.Cloneable"/> |
| <constructor name="JobStatus" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <constructor name="JobStatus" type="org.apache.hadoop.mapred.JobID, float, float, float, int" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Create a job status object for a given jobid. |
| @param jobid The jobid of the job |
| @param mapProgress The progress made on the maps |
| @param reduceProgress The progress made on the reduces |
| @param cleanupProgress The progress made on cleanup |
| @param runState The current state of the job]]> |
| </doc> |
| </constructor> |
| <constructor name="JobStatus" type="org.apache.hadoop.mapred.JobID, float, float, int" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Create a job status object for a given jobid. |
| @param jobid The jobid of the job |
| @param mapProgress The progress made on the maps |
| @param reduceProgress The progress made on the reduces |
| @param runState The current state of the job]]> |
| </doc> |
| </constructor> |
| <constructor name="JobStatus" type="org.apache.hadoop.mapred.JobID, float, float, float, int, org.apache.hadoop.mapred.JobPriority" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Create a job status object for a given jobid. |
| @param jobid The jobid of the job |
| @param mapProgress The progress made on the maps |
| @param reduceProgress The progress made on the reduces |
| @param runState The current state of the job |
| @param jp Priority of the job.]]> |
| </doc> |
| </constructor> |
| <constructor name="JobStatus" type="org.apache.hadoop.mapred.JobID, float, float, float, float, int, org.apache.hadoop.mapred.JobPriority" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Create a job status object for a given jobid. |
| @param jobid The jobid of the job |
| @param setupProgress The progress made on the setup |
| @param mapProgress The progress made on the maps |
| @param reduceProgress The progress made on the reduces |
| @param cleanupProgress The progress made on the cleanup |
| @param runState The current state of the job |
| @param jp Priority of the job.]]> |
| </doc> |
| </constructor> |
| <method name="getJobId" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="use getJobID instead"> |
| <doc> |
| <![CDATA[@deprecated use getJobID instead]]> |
| </doc> |
| </method> |
| <method name="getJobID" return="org.apache.hadoop.mapred.JobID" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return The jobid of the Job]]> |
| </doc> |
| </method> |
| <method name="mapProgress" return="float" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return Percentage of progress in maps]]> |
| </doc> |
| </method> |
| <method name="cleanupProgress" return="float" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return Percentage of progress in cleanup]]> |
| </doc> |
| </method> |
| <method name="setupProgress" return="float" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return Percentage of progress in setup]]> |
| </doc> |
| </method> |
| <method name="reduceProgress" return="float" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return Percentage of progress in reduce]]> |
| </doc> |
| </method> |
| <method name="getRunState" return="int" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return running state of the job]]> |
| </doc> |
| </method> |
| <method name="setRunState" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="state" type="int"/> |
| <doc> |
| <![CDATA[Change the current run state of the job.]]> |
| </doc> |
| </method> |
| <method name="getStartTime" return="long" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return start time of the job]]> |
| </doc> |
| </method> |
| <method name="clone" return="java.lang.Object" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getUsername" return="java.lang.String" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the username of the job]]> |
| </doc> |
| </method> |
| <method name="getSchedulingInfo" return="java.lang.String" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Gets the Scheduling information associated to a particular Job. |
| @return the scheduling information of the job]]> |
| </doc> |
| </method> |
| <method name="setSchedulingInfo" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="schedulingInfo" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Used to set the scheduling information associated to a particular Job. |
| |
| @param schedulingInfo Scheduling information of the job]]> |
| </doc> |
| </method> |
| <method name="getJobPriority" return="org.apache.hadoop.mapred.JobPriority" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Return the priority of the job |
| @return job priority]]> |
| </doc> |
| </method> |
| <method name="setJobPriority" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jp" type="org.apache.hadoop.mapred.JobPriority"/> |
| <doc> |
| <![CDATA[Set the priority of the job, defaulting to NORMAL. |
| @param jp new job priority]]> |
| </doc> |
| </method> |
| <method name="isJobComplete" return="boolean" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns true if the status is for a completed job.]]> |
| </doc> |
| </method> |
| <method name="write" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="out" type="java.io.DataOutput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="readFields" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="in" type="java.io.DataInput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <field name="RUNNING" type="int" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="SUCCEEDED" type="int" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="FAILED" type="int" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="PREP" type="int" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="KILLED" type="int" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[Describes the current status of a job. This is |
| not intended to be a comprehensive piece of data. |
| For that, look at JobProfile.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.JobStatus --> |
| <!-- start class org.apache.hadoop.mapred.JobTracker --> |
| <class name="JobTracker" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.MRConstants"/> |
| <implements name="org.apache.hadoop.mapred.InterTrackerProtocol"/> |
| <implements name="org.apache.hadoop.mapred.JobSubmissionProtocol"/> |
| <implements name="org.apache.hadoop.mapred.TaskTrackerManager"/> |
| <implements name="org.apache.hadoop.security.authorize.RefreshAuthorizationPolicyProtocol"/> |
| <method name="startTracker" return="org.apache.hadoop.mapred.JobTracker" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Start the JobTracker with given configuration. |
| |
| The conf will be modified to reflect the actual ports on which |
| the JobTracker is up and running if the user passes the port as |
| <code>zero</code>. |
| |
| @param conf configuration for the JobTracker. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="stopTracker" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getProtocolVersion" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="protocol" type="java.lang.String"/> |
| <param name="clientVersion" type="long"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="hasRestarted" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Whether the JT has restarted]]> |
| </doc> |
| </method> |
| <method name="hasRecovered" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Whether the JT has recovered upon restart]]> |
| </doc> |
| </method> |
| <method name="getRecoveryDuration" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[How long the jobtracker took to recover from restart.]]> |
| </doc> |
| </method> |
| <method name="getInstrumentationClass" return="java.lang.Class" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| </method> |
| <method name="setInstrumentationClass" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <param name="t" type="java.lang.Class"/> |
| </method> |
| <method name="getAddress" return="java.net.InetSocketAddress" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| </method> |
| <method name="offerService" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Run forever]]> |
| </doc> |
| </method> |
| <method name="getTotalSubmissions" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getJobTrackerMachine" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getTrackerIdentifier" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the unique identifier (ie. timestamp) of this job tracker start. |
| @return a string with a unique identifier]]> |
| </doc> |
| </method> |
| <method name="getTrackerPort" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getInfoPort" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getStartTime" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="runningJobs" return="java.util.Vector" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getRunningJobs" return="java.util.List" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Version that is called from a timer thread, and therefore needs to be |
| careful to synchronize.]]> |
| </doc> |
| </method> |
| <method name="failedJobs" return="java.util.Vector" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="completedJobs" return="java.util.Vector" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="taskTrackers" return="java.util.Collection" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get all the task trackers in the cluster |
| |
| @return {@link Collection} of {@link TaskTrackerStatus}]]> |
| </doc> |
| </method> |
| <method name="activeTaskTrackers" return="java.util.Collection" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the active task tracker statuses in the cluster |
| |
| @return {@link Collection} of active {@link TaskTrackerStatus}]]> |
| </doc> |
| </method> |
| <method name="taskTrackerNames" return="java.util.List" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the active and blacklisted task tracker names in the cluster. The first |
| element in the returned list contains the list of active tracker names. |
| The second element in the returned list contains the list of blacklisted |
| tracker names.]]> |
| </doc> |
| </method> |
| <method name="blacklistedTaskTrackers" return="java.util.Collection" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the blacklisted task tracker statuses in the cluster |
| |
| @return {@link Collection} of blacklisted {@link TaskTrackerStatus}]]> |
| </doc> |
| </method> |
| <method name="isBlacklisted" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="trackerID" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Whether the tracker is blacklisted or not |
| |
| @param trackerID |
| |
| @return true if blacklisted, false otherwise]]> |
| </doc> |
| </method> |
| <method name="getTaskTracker" return="org.apache.hadoop.mapred.TaskTrackerStatus" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="trackerID" type="java.lang.String"/> |
| </method> |
| <method name="resolveAndAddToTopology" return="org.apache.hadoop.net.Node" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="name" type="java.lang.String"/> |
| </method> |
| <method name="getNodesAtMaxLevel" return="java.util.Collection" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns a collection of nodes at the max level]]> |
| </doc> |
| </method> |
| <method name="getParentNode" return="org.apache.hadoop.net.Node" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="node" type="org.apache.hadoop.net.Node"/> |
| <param name="level" type="int"/> |
| </method> |
| <method name="getNode" return="org.apache.hadoop.net.Node" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="name" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Return the Node in the network topology that corresponds to the hostname]]> |
| </doc> |
| </method> |
| <method name="getNumTaskCacheLevels" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getNumResolvedTaskTrackers" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getNumberOfUniqueHosts" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="addJobInProgressListener" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="listener" type="org.apache.hadoop.mapred.JobInProgressListener"/> |
| </method> |
| <method name="removeJobInProgressListener" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="listener" type="org.apache.hadoop.mapred.JobInProgressListener"/> |
| </method> |
| <method name="getQueueManager" return="org.apache.hadoop.mapred.QueueManager" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Return the {@link QueueManager} associated with the JobTracker.]]> |
| </doc> |
| </method> |
| <method name="getBuildVersion" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="heartbeat" return="org.apache.hadoop.mapred.HeartbeatResponse" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="status" type="org.apache.hadoop.mapred.TaskTrackerStatus"/> |
| <param name="restarted" type="boolean"/> |
| <param name="initialContact" type="boolean"/> |
| <param name="acceptNewTasks" type="boolean"/> |
| <param name="responseId" type="short"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[The periodic heartbeat mechanism between the {@link TaskTracker} and |
| the {@link JobTracker}. |
| |
| The {@link JobTracker} processes the status information sent by the |
| {@link TaskTracker} and responds with instructions to start/stop |
| tasks or jobs, and also 'reset' instructions during contingencies.]]> |
| </doc> |
| </method> |
| <method name="getNextHeartbeatInterval" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Calculates next heartbeat interval using cluster size. |
| Heartbeat interval is incremented 1second for every 50 nodes. |
| @return next heartbeat interval.]]> |
| </doc> |
| </method> |
| <method name="getFilesystemName" return="java.lang.String" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Grab the local fs name]]> |
| </doc> |
| </method> |
| <method name="reportTaskTrackerError" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskTracker" type="java.lang.String"/> |
| <param name="errorClass" type="java.lang.String"/> |
| <param name="errorMessage" type="java.lang.String"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getNewJobId" return="org.apache.hadoop.mapred.JobID" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Allocates a new JobId string.]]> |
| </doc> |
| </method> |
| <method name="submitJob" return="org.apache.hadoop.mapred.JobStatus" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobId" type="org.apache.hadoop.mapred.JobID"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[JobTracker.submitJob() kicks off a new job. |
| |
| Create a 'JobInProgress' object, which contains both JobProfile |
| and JobStatus. Those two sub-objects are sometimes shipped outside |
| of the JobTracker. But JobInProgress adds info that's useful for |
| the JobTracker alone.]]> |
| </doc> |
| </method> |
| <method name="getClusterStatus" return="org.apache.hadoop.mapred.ClusterStatus" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="use {@link #getClusterStatus(boolean)}"> |
| <doc> |
| <![CDATA[@deprecated use {@link #getClusterStatus(boolean)}]]> |
| </doc> |
| </method> |
| <method name="getClusterStatus" return="org.apache.hadoop.mapred.ClusterStatus" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="detailed" type="boolean"/> |
| </method> |
| <method name="killJob" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobid" type="org.apache.hadoop.mapred.JobID"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="setJobPriority" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobid" type="org.apache.hadoop.mapred.JobID"/> |
| <param name="priority" type="java.lang.String"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Set the priority of a job |
| @param jobid id of the job |
| @param priority new priority of the job]]> |
| </doc> |
| </method> |
| <method name="getJobProfile" return="org.apache.hadoop.mapred.JobProfile" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobid" type="org.apache.hadoop.mapred.JobID"/> |
| </method> |
| <method name="getJobStatus" return="org.apache.hadoop.mapred.JobStatus" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobid" type="org.apache.hadoop.mapred.JobID"/> |
| </method> |
| <method name="getJobCounters" return="org.apache.hadoop.mapred.Counters" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobid" type="org.apache.hadoop.mapred.JobID"/> |
| </method> |
| <method name="getMapTaskReports" return="org.apache.hadoop.mapred.TaskReport[]" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobid" type="org.apache.hadoop.mapred.JobID"/> |
| </method> |
| <method name="getReduceTaskReports" return="org.apache.hadoop.mapred.TaskReport[]" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobid" type="org.apache.hadoop.mapred.JobID"/> |
| </method> |
| <method name="getCleanupTaskReports" return="org.apache.hadoop.mapred.TaskReport[]" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobid" type="org.apache.hadoop.mapred.JobID"/> |
| </method> |
| <method name="getSetupTaskReports" return="org.apache.hadoop.mapred.TaskReport[]" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobid" type="org.apache.hadoop.mapred.JobID"/> |
| </method> |
| <method name="getTaskCompletionEvents" return="org.apache.hadoop.mapred.TaskCompletionEvent[]" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobid" type="org.apache.hadoop.mapred.JobID"/> |
| <param name="fromEventId" type="int"/> |
| <param name="maxEvents" type="int"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getTaskDiagnostics" return="java.lang.String[]" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskId" type="org.apache.hadoop.mapred.TaskAttemptID"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get the diagnostics for a given task |
| @param taskId the id of the task |
| @return an array of the diagnostic messages]]> |
| </doc> |
| </method> |
| <method name="getTip" return="org.apache.hadoop.mapred.TaskInProgress" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="tipid" type="org.apache.hadoop.mapred.TaskID"/> |
| <doc> |
| <![CDATA[Returns specified TaskInProgress, or null.]]> |
| </doc> |
| </method> |
| <method name="killTask" return="boolean" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskid" type="org.apache.hadoop.mapred.TaskAttemptID"/> |
| <param name="shouldFail" type="boolean"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Mark a Task to be killed]]> |
| </doc> |
| </method> |
| <method name="getAssignedTracker" return="java.lang.String" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskId" type="org.apache.hadoop.mapred.TaskAttemptID"/> |
| <doc> |
| <![CDATA[Get tracker name for a given task id. |
| @param taskId the name of the task |
| @return The name of the task tracker]]> |
| </doc> |
| </method> |
| <method name="jobsToComplete" return="org.apache.hadoop.mapred.JobStatus[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getAllJobs" return="org.apache.hadoop.mapred.JobStatus[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getSystemDir" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@see org.apache.hadoop.mapred.JobSubmissionProtocol#getSystemDir()]]> |
| </doc> |
| </method> |
| <method name="getJob" return="org.apache.hadoop.mapred.JobInProgress" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobid" type="org.apache.hadoop.mapred.JobID"/> |
| </method> |
| <method name="getLocalJobFilePath" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobId" type="org.apache.hadoop.mapred.JobID"/> |
| <doc> |
| <![CDATA[Get the localized job file path on the job trackers local file system |
| @param jobId id of the job |
| @return the path of the job conf file on the local file system]]> |
| </doc> |
| </method> |
| <method name="main" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="argv" type="java.lang.String[]"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <exception name="InterruptedException" type="java.lang.InterruptedException"/> |
| <doc> |
| <![CDATA[Start the JobTracker process. This is used only for debugging. As a rule, |
| JobTracker should be run as part of the DFS Namenode process.]]> |
| </doc> |
| </method> |
| <method name="getQueues" return="org.apache.hadoop.mapred.JobQueueInfo[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getQueueInfo" return="org.apache.hadoop.mapred.JobQueueInfo" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="queue" type="java.lang.String"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getJobsFromQueue" return="org.apache.hadoop.mapred.JobStatus[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="queue" type="java.lang.String"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="refreshServiceAcl" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <field name="LOG" type="org.apache.commons.logging.Log" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[JobTracker is the central location for submitting and |
| tracking MR jobs in a network environment.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.JobTracker --> |
| <!-- start class org.apache.hadoop.mapred.JobTracker.IllegalStateException --> |
| <class name="JobTracker.IllegalStateException" extends="java.io.IOException" |
| abstract="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="JobTracker.IllegalStateException" type="java.lang.String" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <doc> |
| <![CDATA[A client tried to submit a job before the Job Tracker was ready.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.JobTracker.IllegalStateException --> |
| <!-- start class org.apache.hadoop.mapred.JobTracker.State --> |
| <class name="JobTracker.State" extends="java.lang.Enum" |
| abstract="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <method name="values" return="org.apache.hadoop.mapred.JobTracker.State[]" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="valueOf" return="org.apache.hadoop.mapred.JobTracker.State" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="name" type="java.lang.String"/> |
| </method> |
| <field name="INITIALIZING" type="org.apache.hadoop.mapred.JobTracker.State" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="RUNNING" type="org.apache.hadoop.mapred.JobTracker.State" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.JobTracker.State --> |
| <!-- start class org.apache.hadoop.mapred.KeyValueLineRecordReader --> |
| <class name="KeyValueLineRecordReader" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.RecordReader"/> |
| <constructor name="KeyValueLineRecordReader" type="org.apache.hadoop.conf.Configuration, org.apache.hadoop.mapred.FileSplit" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </constructor> |
| <method name="getKeyClass" return="java.lang.Class" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="createKey" return="org.apache.hadoop.io.Text" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="createValue" return="org.apache.hadoop.io.Text" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="findSeparator" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="utf" type="byte[]"/> |
| <param name="start" type="int"/> |
| <param name="length" type="int"/> |
| <param name="sep" type="byte"/> |
| </method> |
| <method name="next" return="boolean" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="org.apache.hadoop.io.Text"/> |
| <param name="value" type="org.apache.hadoop.io.Text"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Read key/value pair in a line.]]> |
| </doc> |
| </method> |
| <method name="getProgress" return="float" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getPos" return="long" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="close" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[This class treats a line in the input as a key/value pair separated by a |
| separator character. The separator can be specified in config file |
| under the attribute name key.value.separator.in.input.line. The default |
| separator is the tab character ('\t').]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.KeyValueLineRecordReader --> |
| <!-- start class org.apache.hadoop.mapred.KeyValueTextInputFormat --> |
| <class name="KeyValueTextInputFormat" extends="org.apache.hadoop.mapred.FileInputFormat" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.JobConfigurable"/> |
| <constructor name="KeyValueTextInputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="configure" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| </method> |
| <method name="isSplitable" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="fs" type="org.apache.hadoop.fs.FileSystem"/> |
| <param name="file" type="org.apache.hadoop.fs.Path"/> |
| </method> |
| <method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="genericSplit" type="org.apache.hadoop.mapred.InputSplit"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[An {@link InputFormat} for plain text files. Files are broken into lines. |
| Either linefeed or carriage-return are used to signal end of line. Each line |
| is divided into key and value parts by a separator byte. If no such a byte |
| exists, the key will be the entire line and value will be empty.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.KeyValueTextInputFormat --> |
| <!-- start class org.apache.hadoop.mapred.LineRecordReader --> |
| <class name="LineRecordReader" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="Use |
| {@link org.apache.hadoop.mapreduce.lib.input.LineRecordReader} instead."> |
| <implements name="org.apache.hadoop.mapred.RecordReader"/> |
| <constructor name="LineRecordReader" type="org.apache.hadoop.conf.Configuration, org.apache.hadoop.mapred.FileSplit" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </constructor> |
| <constructor name="LineRecordReader" type="java.io.InputStream, long, long, int" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <constructor name="LineRecordReader" type="java.io.InputStream, long, long, org.apache.hadoop.conf.Configuration" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </constructor> |
| <method name="createKey" return="org.apache.hadoop.io.LongWritable" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="createValue" return="org.apache.hadoop.io.Text" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="next" return="boolean" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="org.apache.hadoop.io.LongWritable"/> |
| <param name="value" type="org.apache.hadoop.io.Text"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Read a line.]]> |
| </doc> |
| </method> |
| <method name="getProgress" return="float" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the progress within the split]]> |
| </doc> |
| </method> |
| <method name="getPos" return="long" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="close" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[Treats keys as offset in file and value as line. |
| @deprecated Use |
| {@link org.apache.hadoop.mapreduce.lib.input.LineRecordReader} instead.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.LineRecordReader --> |
| <!-- start class org.apache.hadoop.mapred.LineRecordReader.LineReader --> |
| <class name="LineRecordReader.LineReader" extends="org.apache.hadoop.util.LineReader" |
| abstract="false" |
| static="true" final="false" visibility="public" |
| deprecated="Use {@link org.apache.hadoop.util.LineReader} instead."> |
| <constructor name="LineRecordReader.LineReader" type="java.io.InputStream, org.apache.hadoop.conf.Configuration" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </constructor> |
| <doc> |
| <![CDATA[A class that provides a line reader from an input stream. |
| @deprecated Use {@link org.apache.hadoop.util.LineReader} instead.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.LineRecordReader.LineReader --> |
| <!-- start class org.apache.hadoop.mapred.MapFileOutputFormat --> |
| <class name="MapFileOutputFormat" extends="org.apache.hadoop.mapred.FileOutputFormat" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="MapFileOutputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="ignored" type="org.apache.hadoop.fs.FileSystem"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="name" type="java.lang.String"/> |
| <param name="progress" type="org.apache.hadoop.util.Progressable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getReaders" return="org.apache.hadoop.io.MapFile.Reader[]" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="ignored" type="org.apache.hadoop.fs.FileSystem"/> |
| <param name="dir" type="org.apache.hadoop.fs.Path"/> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Open the output generated by this format.]]> |
| </doc> |
| </method> |
| <method name="getEntry" return="org.apache.hadoop.io.Writable" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="readers" type="org.apache.hadoop.io.MapFile.Reader[]"/> |
| <param name="partitioner" type="org.apache.hadoop.mapred.Partitioner"/> |
| <param name="key" type="org.apache.hadoop.io.WritableComparable"/> |
| <param name="value" type="org.apache.hadoop.io.Writable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get an entry from output generated by this class.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[An {@link OutputFormat} that writes {@link MapFile}s.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.MapFileOutputFormat --> |
| <!-- start interface org.apache.hadoop.mapred.Mapper --> |
| <interface name="Mapper" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="Use {@link org.apache.hadoop.mapreduce.Mapper} instead."> |
| <implements name="org.apache.hadoop.mapred.JobConfigurable"/> |
| <implements name="org.apache.hadoop.io.Closeable"/> |
| <method name="map" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="java.lang.Object"/> |
| <param name="value" type="java.lang.Object"/> |
| <param name="output" type="org.apache.hadoop.mapred.OutputCollector"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Maps a single input key/value pair into an intermediate key/value pair. |
| |
| <p>Output pairs need not be of the same types as input pairs. A given |
| input pair may map to zero or many output pairs. Output pairs are |
| collected with calls to |
| {@link OutputCollector#collect(Object,Object)}.</p> |
| |
| <p>Applications can use the {@link Reporter} provided to report progress |
| or just indicate that they are alive. In scenarios where the application |
| takes an insignificant amount of time to process individual key/value |
| pairs, this is crucial since the framework might assume that the task has |
| timed-out and kill that task. The other way of avoiding this is to set |
| <a href="{@docRoot}/../mapred-default.html#mapred.task.timeout"> |
| mapred.task.timeout</a> to a high-enough value (or even zero for no |
| time-outs).</p> |
| |
| @param key the input key. |
| @param value the input value. |
| @param output collects mapped keys and values. |
| @param reporter facility to report progress.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Maps input key/value pairs to a set of intermediate key/value pairs. |
| |
| <p>Maps are the individual tasks which transform input records into a |
| intermediate records. The transformed intermediate records need not be of |
| the same type as the input records. A given input pair may map to zero or |
| many output pairs.</p> |
| |
| <p>The Hadoop Map-Reduce framework spawns one map task for each |
| {@link InputSplit} generated by the {@link InputFormat} for the job. |
| <code>Mapper</code> implementations can access the {@link JobConf} for the |
| job via the {@link JobConfigurable#configure(JobConf)} and initialize |
| themselves. Similarly they can use the {@link Closeable#close()} method for |
| de-initialization.</p> |
| |
| <p>The framework then calls |
| {@link #map(Object, Object, OutputCollector, Reporter)} |
| for each key/value pair in the <code>InputSplit</code> for that task.</p> |
| |
| <p>All intermediate values associated with a given output key are |
| subsequently grouped by the framework, and passed to a {@link Reducer} to |
| determine the final output. Users can control the grouping by specifying |
| a <code>Comparator</code> via |
| {@link JobConf#setOutputKeyComparatorClass(Class)}.</p> |
| |
| <p>The grouped <code>Mapper</code> outputs are partitioned per |
| <code>Reducer</code>. Users can control which keys (and hence records) go to |
| which <code>Reducer</code> by implementing a custom {@link Partitioner}. |
| |
| <p>Users can optionally specify a <code>combiner</code>, via |
| {@link JobConf#setCombinerClass(Class)}, to perform local aggregation of the |
| intermediate outputs, which helps to cut down the amount of data transferred |
| from the <code>Mapper</code> to the <code>Reducer</code>. |
| |
| <p>The intermediate, grouped outputs are always stored in |
| {@link SequenceFile}s. Applications can specify if and how the intermediate |
| outputs are to be compressed and which {@link CompressionCodec}s are to be |
| used via the <code>JobConf</code>.</p> |
| |
| <p>If the job has |
| <a href="{@docRoot}/org/apache/hadoop/mapred/JobConf.html#ReducerNone">zero |
| reduces</a> then the output of the <code>Mapper</code> is directly written |
| to the {@link FileSystem} without grouping by keys.</p> |
| |
| <p>Example:</p> |
| <p><blockquote><pre> |
| public class MyMapper<K extends WritableComparable, V extends Writable> |
| extends MapReduceBase implements Mapper<K, V, K, V> { |
| |
| static enum MyCounters { NUM_RECORDS } |
| |
| private String mapTaskId; |
| private String inputFile; |
| private int noRecords = 0; |
| |
| public void configure(JobConf job) { |
| mapTaskId = job.get("mapred.task.id"); |
| inputFile = job.get("map.input.file"); |
| } |
| |
| public void map(K key, V val, |
| OutputCollector<K, V> output, Reporter reporter) |
| throws IOException { |
| // Process the <key, value> pair (assume this takes a while) |
| // ... |
| // ... |
| |
| // Let the framework know that we are alive, and kicking! |
| // reporter.progress(); |
| |
| // Process some more |
| // ... |
| // ... |
| |
| // Increment the no. of <key, value> pairs processed |
| ++noRecords; |
| |
| // Increment counters |
| reporter.incrCounter(NUM_RECORDS, 1); |
| |
| // Every 100 records update application-level status |
| if ((noRecords%100) == 0) { |
| reporter.setStatus(mapTaskId + " processed " + noRecords + |
| " from input-file: " + inputFile); |
| } |
| |
| // Output the result |
| output.collect(key, val); |
| } |
| } |
| </pre></blockquote></p> |
| |
| <p>Applications may write a custom {@link MapRunnable} to exert greater |
| control on map processing e.g. multi-threaded <code>Mapper</code>s etc.</p> |
| |
| @see JobConf |
| @see InputFormat |
| @see Partitioner |
| @see Reducer |
| @see MapReduceBase |
| @see MapRunnable |
| @see SequenceFile |
| @deprecated Use {@link org.apache.hadoop.mapreduce.Mapper} instead.]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapred.Mapper --> |
| <!-- start class org.apache.hadoop.mapred.MapReduceBase --> |
| <class name="MapReduceBase" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.io.Closeable"/> |
| <implements name="org.apache.hadoop.mapred.JobConfigurable"/> |
| <constructor name="MapReduceBase" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="close" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Default implementation that does nothing.]]> |
| </doc> |
| </method> |
| <method name="configure" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <doc> |
| <![CDATA[Default implementation that does nothing.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Base class for {@link Mapper} and {@link Reducer} implementations. |
| |
| <p>Provides default no-op implementations for a few methods, most non-trivial |
| applications need to override some of them.</p>]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.MapReduceBase --> |
| <!-- start class org.apache.hadoop.mapred.MapReducePolicyProvider --> |
| <class name="MapReducePolicyProvider" extends="org.apache.hadoop.security.authorize.PolicyProvider" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="MapReducePolicyProvider" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getServices" return="org.apache.hadoop.security.authorize.Service[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <doc> |
| <![CDATA[{@link PolicyProvider} for Map-Reduce protocols.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.MapReducePolicyProvider --> |
| <!-- start interface org.apache.hadoop.mapred.MapRunnable --> |
| <interface name="MapRunnable" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="Use {@link org.apache.hadoop.mapreduce.Mapper} instead."> |
| <implements name="org.apache.hadoop.mapred.JobConfigurable"/> |
| <method name="run" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="input" type="org.apache.hadoop.mapred.RecordReader"/> |
| <param name="output" type="org.apache.hadoop.mapred.OutputCollector"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Start mapping input <tt><key, value></tt> pairs. |
| |
| <p>Mapping of input records to output records is complete when this method |
| returns.</p> |
| |
| @param input the {@link RecordReader} to read the input records. |
| @param output the {@link OutputCollector} to collect the outputrecords. |
| @param reporter {@link Reporter} to report progress, status-updates etc. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Expert: Generic interface for {@link Mapper}s. |
| |
| <p>Custom implementations of <code>MapRunnable</code> can exert greater |
| control on map processing e.g. multi-threaded, asynchronous mappers etc.</p> |
| |
| @see Mapper |
| @deprecated Use {@link org.apache.hadoop.mapreduce.Mapper} instead.]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapred.MapRunnable --> |
| <!-- start class org.apache.hadoop.mapred.MapRunner --> |
| <class name="MapRunner" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.MapRunnable"/> |
| <constructor name="MapRunner" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="configure" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| </method> |
| <method name="run" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="input" type="org.apache.hadoop.mapred.RecordReader"/> |
| <param name="output" type="org.apache.hadoop.mapred.OutputCollector"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getMapper" return="org.apache.hadoop.mapred.Mapper" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </method> |
| <doc> |
| <![CDATA[Default {@link MapRunnable} implementation.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.MapRunner --> |
| <!-- start class org.apache.hadoop.mapred.MultiFileInputFormat --> |
| <class name="MultiFileInputFormat" extends="org.apache.hadoop.mapred.FileInputFormat" |
| abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="Use {@link org.apache.hadoop.mapred.lib.CombineFileInputFormat} instead"> |
| <constructor name="MultiFileInputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="numSplits" type="int"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="split" type="org.apache.hadoop.mapred.InputSplit"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[An abstract {@link InputFormat} that returns {@link MultiFileSplit}'s |
| in {@link #getSplits(JobConf, int)} method. Splits are constructed from |
| the files under the input paths. Each split returned contains <i>nearly</i> |
| equal content length. <br> |
| Subclasses implement {@link #getRecordReader(InputSplit, JobConf, Reporter)} |
| to construct <code>RecordReader</code>'s for <code>MultiFileSplit</code>'s. |
| @see MultiFileSplit |
| @deprecated Use {@link org.apache.hadoop.mapred.lib.CombineFileInputFormat} instead]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.MultiFileInputFormat --> |
| <!-- start class org.apache.hadoop.mapred.MultiFileSplit --> |
| <class name="MultiFileSplit" extends="org.apache.hadoop.mapred.lib.CombineFileSplit" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="Use {@link org.apache.hadoop.mapred.lib.CombineFileSplit} instead"> |
| <constructor name="MultiFileSplit" type="org.apache.hadoop.mapred.JobConf, org.apache.hadoop.fs.Path[], long[]" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getLocations" return="java.lang.String[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="toString" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <doc> |
| <![CDATA[A sub-collection of input files. Unlike {@link FileSplit}, MultiFileSplit |
| class does not represent a split of a file, but a split of input files |
| into smaller sets. The atomic unit of split is a file. <br> |
| MultiFileSplit can be used to implement {@link RecordReader}'s, with |
| reading one record per file. |
| @see FileSplit |
| @see MultiFileInputFormat |
| @deprecated Use {@link org.apache.hadoop.mapred.lib.CombineFileSplit} instead]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.MultiFileSplit --> |
| <!-- start interface org.apache.hadoop.mapred.OutputCollector --> |
| <interface name="OutputCollector" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <method name="collect" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="java.lang.Object"/> |
| <param name="value" type="java.lang.Object"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Adds a key/value pair to the output. |
| |
| @param key the key to collect. |
| @param value to value to collect. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Collects the <code><key, value></code> pairs output by {@link Mapper}s |
| and {@link Reducer}s. |
| |
| <p><code>OutputCollector</code> is the generalization of the facility |
| provided by the Map-Reduce framework to collect data output by either the |
| <code>Mapper</code> or the <code>Reducer</code> i.e. intermediate outputs |
| or the output of the job.</p>]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapred.OutputCollector --> |
| <!-- start class org.apache.hadoop.mapred.OutputCommitter --> |
| <class name="OutputCommitter" extends="org.apache.hadoop.mapreduce.OutputCommitter" |
| abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="Use {@link org.apache.hadoop.mapreduce.OutputCommitter} instead."> |
| <constructor name="OutputCommitter" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="setupJob" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobContext" type="org.apache.hadoop.mapred.JobContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[For the framework to setup the job output during initialization |
| |
| @param jobContext Context of the job whose output is being written. |
| @throws IOException if temporary output could not be created]]> |
| </doc> |
| </method> |
| <method name="cleanupJob" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobContext" type="org.apache.hadoop.mapred.JobContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[For cleaning up the job's output after job completion |
| |
| @param jobContext Context of the job whose output is being written. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="setupTask" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskContext" type="org.apache.hadoop.mapred.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Sets up output for the task. |
| |
| @param taskContext Context of the task whose output is being written. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="needsTaskCommit" return="boolean" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskContext" type="org.apache.hadoop.mapred.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Check whether task needs a commit |
| |
| @param taskContext |
| @return true/false |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="commitTask" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskContext" type="org.apache.hadoop.mapred.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[To promote the task's temporary output to final output location |
| |
| The task's output is moved to the job's output directory. |
| |
| @param taskContext Context of the task whose output is being written. |
| @throws IOException if commit is not]]> |
| </doc> |
| </method> |
| <method name="abortTask" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskContext" type="org.apache.hadoop.mapred.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Discard the task output |
| |
| @param taskContext |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="setupJob" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobContext" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[This method implements the new interface by calling the old method. Note |
| that the input types are different between the new and old apis and this |
| is a bridge between the two.]]> |
| </doc> |
| </method> |
| <method name="cleanupJob" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <param name="context" type="org.apache.hadoop.mapreduce.JobContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[This method implements the new interface by calling the old method. Note |
| that the input types are different between the new and old apis and this |
| is a bridge between the two.]]> |
| </doc> |
| </method> |
| <method name="setupTask" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[This method implements the new interface by calling the old method. Note |
| that the input types are different between the new and old apis and this |
| is a bridge between the two.]]> |
| </doc> |
| </method> |
| <method name="needsTaskCommit" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[This method implements the new interface by calling the old method. Note |
| that the input types are different between the new and old apis and this |
| is a bridge between the two.]]> |
| </doc> |
| </method> |
| <method name="commitTask" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[This method implements the new interface by calling the old method. Note |
| that the input types are different between the new and old apis and this |
| is a bridge between the two.]]> |
| </doc> |
| </method> |
| <method name="abortTask" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[This method implements the new interface by calling the old method. Note |
| that the input types are different between the new and old apis and this |
| is a bridge between the two.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[<code>OutputCommitter</code> describes the commit of task output for a |
| Map-Reduce job. |
| |
| <p>The Map-Reduce framework relies on the <code>OutputCommitter</code> of |
| the job to:<p> |
| <ol> |
| <li> |
| Setup the job during initialization. For example, create the temporary |
| output directory for the job during the initialization of the job. |
| </li> |
| <li> |
| Cleanup the job after the job completion. For example, remove the |
| temporary output directory after the job completion. |
| </li> |
| <li> |
| Setup the task temporary output. |
| </li> |
| <li> |
| Check whether a task needs a commit. This is to avoid the commit |
| procedure if a task does not need commit. |
| </li> |
| <li> |
| Commit of the task output. |
| </li> |
| <li> |
| Discard the task commit. |
| </li> |
| </ol> |
| |
| @see FileOutputCommitter |
| @see JobContext |
| @see TaskAttemptContext |
| @deprecated Use {@link org.apache.hadoop.mapreduce.OutputCommitter} instead.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.OutputCommitter --> |
| <!-- start interface org.apache.hadoop.mapred.OutputFormat --> |
| <interface name="OutputFormat" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="Use {@link org.apache.hadoop.mapreduce.OutputFormat} instead."> |
| <method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="ignored" type="org.apache.hadoop.fs.FileSystem"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="name" type="java.lang.String"/> |
| <param name="progress" type="org.apache.hadoop.util.Progressable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get the {@link RecordWriter} for the given job. |
| |
| @param ignored |
| @param job configuration for the job whose output is being written. |
| @param name the unique name for this part of the output. |
| @param progress mechanism for reporting progress while writing to file. |
| @return a {@link RecordWriter} to write the output for the job. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="checkOutputSpecs" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="ignored" type="org.apache.hadoop.fs.FileSystem"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Check for validity of the output-specification for the job. |
| |
| <p>This is to validate the output specification for the job when it is |
| a job is submitted. Typically checks that it does not already exist, |
| throwing an exception when it already exists, so that output is not |
| overwritten.</p> |
| |
| @param ignored |
| @param job job configuration. |
| @throws IOException when output should not be attempted]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[<code>OutputFormat</code> describes the output-specification for a |
| Map-Reduce job. |
| |
| <p>The Map-Reduce framework relies on the <code>OutputFormat</code> of the |
| job to:<p> |
| <ol> |
| <li> |
| Validate the output-specification of the job. For e.g. check that the |
| output directory doesn't already exist. |
| <li> |
| Provide the {@link RecordWriter} implementation to be used to write out |
| the output files of the job. Output files are stored in a |
| {@link FileSystem}. |
| </li> |
| </ol> |
| |
| @see RecordWriter |
| @see JobConf |
| @deprecated Use {@link org.apache.hadoop.mapreduce.OutputFormat} instead.]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapred.OutputFormat --> |
| <!-- start class org.apache.hadoop.mapred.OutputLogFilter --> |
| <class name="OutputLogFilter" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.fs.PathFilter"/> |
| <constructor name="OutputLogFilter" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="accept" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="path" type="org.apache.hadoop.fs.Path"/> |
| </method> |
| <doc> |
| <![CDATA[This class filters log files from directory given |
| It doesnt accept paths having _logs. |
| This can be used to list paths of output directory as follows: |
| Path[] fileList = FileUtil.stat2Paths(fs.listStatus(outDir, |
| new OutputLogFilter()));]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.OutputLogFilter --> |
| <!-- start interface org.apache.hadoop.mapred.Partitioner --> |
| <interface name="Partitioner" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="Use {@link org.apache.hadoop.mapreduce.Partitioner} instead."> |
| <implements name="org.apache.hadoop.mapred.JobConfigurable"/> |
| <method name="getPartition" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="java.lang.Object"/> |
| <param name="value" type="java.lang.Object"/> |
| <param name="numPartitions" type="int"/> |
| <doc> |
| <![CDATA[Get the paritition number for a given key (hence record) given the total |
| number of partitions i.e. number of reduce-tasks for the job. |
| |
| <p>Typically a hash function on a all or a subset of the key.</p> |
| |
| @param key the key to be paritioned. |
| @param value the entry value. |
| @param numPartitions the total number of partitions. |
| @return the partition number for the <code>key</code>.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Partitions the key space. |
| |
| <p><code>Partitioner</code> controls the partitioning of the keys of the |
| intermediate map-outputs. The key (or a subset of the key) is used to derive |
| the partition, typically by a hash function. The total number of partitions |
| is the same as the number of reduce tasks for the job. Hence this controls |
| which of the <code>m</code> reduce tasks the intermediate key (and hence the |
| record) is sent for reduction.</p> |
| |
| @see Reducer |
| @deprecated Use {@link org.apache.hadoop.mapreduce.Partitioner} instead.]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapred.Partitioner --> |
| <!-- start interface org.apache.hadoop.mapred.RawKeyValueIterator --> |
| <interface name="RawKeyValueIterator" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <method name="getKey" return="org.apache.hadoop.io.DataInputBuffer" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Gets the current raw key. |
| |
| @return Gets the current raw key as a DataInputBuffer |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getValue" return="org.apache.hadoop.io.DataInputBuffer" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Gets the current raw value. |
| |
| @return Gets the current raw value as a DataInputBuffer |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="next" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Sets up the current key and value (for getKey and getValue). |
| |
| @return <code>true</code> if there exists a key/value, |
| <code>false</code> otherwise. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="close" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Closes the iterator so that the underlying streams can be closed. |
| |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getProgress" return="org.apache.hadoop.util.Progress" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Gets the Progress object; this has a float (0.0 - 1.0) |
| indicating the bytes processed by the iterator so far]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[<code>RawKeyValueIterator</code> is an iterator used to iterate over |
| the raw keys and values during sort/merge of intermediate data.]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapred.RawKeyValueIterator --> |
| <!-- start interface org.apache.hadoop.mapred.RecordReader --> |
| <interface name="RecordReader" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <method name="next" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="java.lang.Object"/> |
| <param name="value" type="java.lang.Object"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Reads the next key/value pair from the input for processing. |
| |
| @param key the key to read data into |
| @param value the value to read data into |
| @return true iff a key/value was read, false if at EOF]]> |
| </doc> |
| </method> |
| <method name="createKey" return="java.lang.Object" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Create an object of the appropriate type to be used as a key. |
| |
| @return a new key object.]]> |
| </doc> |
| </method> |
| <method name="createValue" return="java.lang.Object" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Create an object of the appropriate type to be used as a value. |
| |
| @return a new value object.]]> |
| </doc> |
| </method> |
| <method name="getPos" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Returns the current position in the input. |
| |
| @return the current position in the input. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="close" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Close this {@link InputSplit} to future operations. |
| |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getProgress" return="float" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[How much of the input has the {@link RecordReader} consumed i.e. |
| has been processed by? |
| |
| @return progress from <code>0.0</code> to <code>1.0</code>. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[<code>RecordReader</code> reads <key, value> pairs from an |
| {@link InputSplit}. |
| |
| <p><code>RecordReader</code>, typically, converts the byte-oriented view of |
| the input, provided by the <code>InputSplit</code>, and presents a |
| record-oriented view for the {@link Mapper} & {@link Reducer} tasks for |
| processing. It thus assumes the responsibility of processing record |
| boundaries and presenting the tasks with keys and values.</p> |
| |
| @see InputSplit |
| @see InputFormat]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapred.RecordReader --> |
| <!-- start interface org.apache.hadoop.mapred.RecordWriter --> |
| <interface name="RecordWriter" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <method name="write" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="java.lang.Object"/> |
| <param name="value" type="java.lang.Object"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Writes a key/value pair. |
| |
| @param key the key to write. |
| @param value the value to write. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="close" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Close this <code>RecordWriter</code> to future operations. |
| |
| @param reporter facility to report progress. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[<code>RecordWriter</code> writes the output <key, value> pairs |
| to an output file. |
| |
| <p><code>RecordWriter</code> implementations write the job outputs to the |
| {@link FileSystem}. |
| |
| @see OutputFormat]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapred.RecordWriter --> |
| <!-- start interface org.apache.hadoop.mapred.Reducer --> |
| <interface name="Reducer" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="Use {@link org.apache.hadoop.mapreduce.Reducer} instead."> |
| <implements name="org.apache.hadoop.mapred.JobConfigurable"/> |
| <implements name="org.apache.hadoop.io.Closeable"/> |
| <method name="reduce" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="java.lang.Object"/> |
| <param name="values" type="java.util.Iterator"/> |
| <param name="output" type="org.apache.hadoop.mapred.OutputCollector"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[<i>Reduces</i> values for a given key. |
| |
| <p>The framework calls this method for each |
| <code><key, (list of values)></code> pair in the grouped inputs. |
| Output values must be of the same type as input values. Input keys must |
| not be altered. The framework will <b>reuse</b> the key and value objects |
| that are passed into the reduce, therefore the application should clone |
| the objects they want to keep a copy of. In many cases, all values are |
| combined into zero or one value. |
| </p> |
| |
| <p>Output pairs are collected with calls to |
| {@link OutputCollector#collect(Object,Object)}.</p> |
| |
| <p>Applications can use the {@link Reporter} provided to report progress |
| or just indicate that they are alive. In scenarios where the application |
| takes an insignificant amount of time to process individual key/value |
| pairs, this is crucial since the framework might assume that the task has |
| timed-out and kill that task. The other way of avoiding this is to set |
| <a href="{@docRoot}/../mapred-default.html#mapred.task.timeout"> |
| mapred.task.timeout</a> to a high-enough value (or even zero for no |
| time-outs).</p> |
| |
| @param key the key. |
| @param values the list of values to reduce. |
| @param output to collect keys and combined values. |
| @param reporter facility to report progress.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Reduces a set of intermediate values which share a key to a smaller set of |
| values. |
| |
| <p>The number of <code>Reducer</code>s for the job is set by the user via |
| {@link JobConf#setNumReduceTasks(int)}. <code>Reducer</code> implementations |
| can access the {@link JobConf} for the job via the |
| {@link JobConfigurable#configure(JobConf)} method and initialize themselves. |
| Similarly they can use the {@link Closeable#close()} method for |
| de-initialization.</p> |
| |
| <p><code>Reducer</code> has 3 primary phases:</p> |
| <ol> |
| <li> |
| |
| <h4 id="Shuffle">Shuffle</h4> |
| |
| <p><code>Reducer</code> is input the grouped output of a {@link Mapper}. |
| In the phase the framework, for each <code>Reducer</code>, fetches the |
| relevant partition of the output of all the <code>Mapper</code>s, via HTTP. |
| </p> |
| </li> |
| |
| <li> |
| <h4 id="Sort">Sort</h4> |
| |
| <p>The framework groups <code>Reducer</code> inputs by <code>key</code>s |
| (since different <code>Mapper</code>s may have output the same key) in this |
| stage.</p> |
| |
| <p>The shuffle and sort phases occur simultaneously i.e. while outputs are |
| being fetched they are merged.</p> |
| |
| <h5 id="SecondarySort">SecondarySort</h5> |
| |
| <p>If equivalence rules for keys while grouping the intermediates are |
| different from those for grouping keys before reduction, then one may |
| specify a <code>Comparator</code> via |
| {@link JobConf#setOutputValueGroupingComparator(Class)}.Since |
| {@link JobConf#setOutputKeyComparatorClass(Class)} can be used to |
| control how intermediate keys are grouped, these can be used in conjunction |
| to simulate <i>secondary sort on values</i>.</p> |
| |
| |
| For example, say that you want to find duplicate web pages and tag them |
| all with the url of the "best" known example. You would set up the job |
| like: |
| <ul> |
| <li>Map Input Key: url</li> |
| <li>Map Input Value: document</li> |
| <li>Map Output Key: document checksum, url pagerank</li> |
| <li>Map Output Value: url</li> |
| <li>Partitioner: by checksum</li> |
| <li>OutputKeyComparator: by checksum and then decreasing pagerank</li> |
| <li>OutputValueGroupingComparator: by checksum</li> |
| </ul> |
| </li> |
| |
| <li> |
| <h4 id="Reduce">Reduce</h4> |
| |
| <p>In this phase the |
| {@link #reduce(Object, Iterator, OutputCollector, Reporter)} |
| method is called for each <code><key, (list of values)></code> pair in |
| the grouped inputs.</p> |
| <p>The output of the reduce task is typically written to the |
| {@link FileSystem} via |
| {@link OutputCollector#collect(Object, Object)}.</p> |
| </li> |
| </ol> |
| |
| <p>The output of the <code>Reducer</code> is <b>not re-sorted</b>.</p> |
| |
| <p>Example:</p> |
| <p><blockquote><pre> |
| public class MyReducer<K extends WritableComparable, V extends Writable> |
| extends MapReduceBase implements Reducer<K, V, K, V> { |
| |
| static enum MyCounters { NUM_RECORDS } |
| |
| private String reduceTaskId; |
| private int noKeys = 0; |
| |
| public void configure(JobConf job) { |
| reduceTaskId = job.get("mapred.task.id"); |
| } |
| |
| public void reduce(K key, Iterator<V> values, |
| OutputCollector<K, V> output, |
| Reporter reporter) |
| throws IOException { |
| |
| // Process |
| int noValues = 0; |
| while (values.hasNext()) { |
| V value = values.next(); |
| |
| // Increment the no. of values for this key |
| ++noValues; |
| |
| // Process the <key, value> pair (assume this takes a while) |
| // ... |
| // ... |
| |
| // Let the framework know that we are alive, and kicking! |
| if ((noValues%10) == 0) { |
| reporter.progress(); |
| } |
| |
| // Process some more |
| // ... |
| // ... |
| |
| // Output the <key, value> |
| output.collect(key, value); |
| } |
| |
| // Increment the no. of <key, list of values> pairs processed |
| ++noKeys; |
| |
| // Increment counters |
| reporter.incrCounter(NUM_RECORDS, 1); |
| |
| // Every 100 keys update application-level status |
| if ((noKeys%100) == 0) { |
| reporter.setStatus(reduceTaskId + " processed " + noKeys); |
| } |
| } |
| } |
| </pre></blockquote></p> |
| |
| @see Mapper |
| @see Partitioner |
| @see Reporter |
| @see MapReduceBase |
| @deprecated Use {@link org.apache.hadoop.mapreduce.Reducer} instead.]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapred.Reducer --> |
| <!-- start interface org.apache.hadoop.mapred.Reporter --> |
| <interface name="Reporter" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.util.Progressable"/> |
| <method name="setStatus" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="status" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the status description for the task. |
| |
| @param status brief description of the current status.]]> |
| </doc> |
| </method> |
| <method name="getCounter" return="org.apache.hadoop.mapred.Counters.Counter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="name" type="java.lang.Enum"/> |
| <doc> |
| <![CDATA[Get the {@link Counter} of the given group with the given name. |
| |
| @param name counter name |
| @return the <code>Counter</code> of the given group/name.]]> |
| </doc> |
| </method> |
| <method name="getCounter" return="org.apache.hadoop.mapred.Counters.Counter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="group" type="java.lang.String"/> |
| <param name="name" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Get the {@link Counter} of the given group with the given name. |
| |
| @param group counter group |
| @param name counter name |
| @return the <code>Counter</code> of the given group/name.]]> |
| </doc> |
| </method> |
| <method name="incrCounter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="java.lang.Enum"/> |
| <param name="amount" type="long"/> |
| <doc> |
| <![CDATA[Increments the counter identified by the key, which can be of |
| any {@link Enum} type, by the specified amount. |
| |
| @param key key to identify the counter to be incremented. The key can be |
| be any <code>Enum</code>. |
| @param amount A non-negative amount by which the counter is to |
| be incremented.]]> |
| </doc> |
| </method> |
| <method name="incrCounter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="group" type="java.lang.String"/> |
| <param name="counter" type="java.lang.String"/> |
| <param name="amount" type="long"/> |
| <doc> |
| <![CDATA[Increments the counter identified by the group and counter name |
| by the specified amount. |
| |
| @param group name to identify the group of the counter to be incremented. |
| @param counter name to identify the counter within the group. |
| @param amount A non-negative amount by which the counter is to |
| be incremented.]]> |
| </doc> |
| </method> |
| <method name="getInputSplit" return="org.apache.hadoop.mapred.InputSplit" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="UnsupportedOperationException" type="java.lang.UnsupportedOperationException"/> |
| <doc> |
| <![CDATA[Get the {@link InputSplit} object for a map. |
| |
| @return the <code>InputSplit</code> that the map is reading from. |
| @throws UnsupportedOperationException if called outside a mapper]]> |
| </doc> |
| </method> |
| <field name="NULL" type="org.apache.hadoop.mapred.Reporter" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[A constant of Reporter type that does nothing.]]> |
| </doc> |
| </field> |
| <doc> |
| <![CDATA[A facility for Map-Reduce applications to report progress and update |
| counters, status information etc. |
| |
| <p>{@link Mapper} and {@link Reducer} can use the <code>Reporter</code> |
| provided to report progress or just indicate that they are alive. In |
| scenarios where the application takes an insignificant amount of time to |
| process individual key/value pairs, this is crucial since the framework |
| might assume that the task has timed-out and kill that task. |
| |
| <p>Applications can also update {@link Counters} via the provided |
| <code>Reporter</code> .</p> |
| |
| @see Progressable |
| @see Counters]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapred.Reporter --> |
| <!-- start interface org.apache.hadoop.mapred.RunningJob --> |
| <interface name="RunningJob" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <method name="getID" return="org.apache.hadoop.mapred.JobID" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the job identifier. |
| |
| @return the job identifier.]]> |
| </doc> |
| </method> |
| <method name="getJobID" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="This method is deprecated and will be removed. Applications should |
| rather use {@link #getID()}."> |
| <doc> |
| <![CDATA[@deprecated This method is deprecated and will be removed. Applications should |
| rather use {@link #getID()}.]]> |
| </doc> |
| </method> |
| <method name="getJobName" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the name of the job. |
| |
| @return the name of the job.]]> |
| </doc> |
| </method> |
| <method name="getJobFile" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the path of the submitted job configuration. |
| |
| @return the path of the submitted job configuration.]]> |
| </doc> |
| </method> |
| <method name="getTrackingURL" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the URL where some job progress information will be displayed. |
| |
| @return the URL where some job progress information will be displayed.]]> |
| </doc> |
| </method> |
| <method name="mapProgress" return="float" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get the <i>progress</i> of the job's map-tasks, as a float between 0.0 |
| and 1.0. When all map tasks have completed, the function returns 1.0. |
| |
| @return the progress of the job's map-tasks. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="reduceProgress" return="float" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get the <i>progress</i> of the job's reduce-tasks, as a float between 0.0 |
| and 1.0. When all reduce tasks have completed, the function returns 1.0. |
| |
| @return the progress of the job's reduce-tasks. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="cleanupProgress" return="float" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get the <i>progress</i> of the job's cleanup-tasks, as a float between 0.0 |
| and 1.0. When all cleanup tasks have completed, the function returns 1.0. |
| |
| @return the progress of the job's cleanup-tasks. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="setupProgress" return="float" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get the <i>progress</i> of the job's setup-tasks, as a float between 0.0 |
| and 1.0. When all setup tasks have completed, the function returns 1.0. |
| |
| @return the progress of the job's setup-tasks. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="isComplete" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Check if the job is finished or not. |
| This is a non-blocking call. |
| |
| @return <code>true</code> if the job is complete, else <code>false</code>. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="isSuccessful" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Check if the job completed successfully. |
| |
| @return <code>true</code> if the job succeeded, else <code>false</code>. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="waitForCompletion" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Blocks until the job is complete. |
| |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getJobState" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Returns the current state of the Job. |
| {@link JobStatus} |
| |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="killJob" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Kill the running job. Blocks until all job tasks have been |
| killed as well. If the job is no longer running, it simply returns. |
| |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="setJobPriority" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="priority" type="java.lang.String"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Set the priority of a running job. |
| @param priority the new priority for the job. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getTaskCompletionEvents" return="org.apache.hadoop.mapred.TaskCompletionEvent[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="startFrom" type="int"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get events indicating completion (success/failure) of component tasks. |
| |
| @param startFrom index to start fetching events from |
| @return an array of {@link TaskCompletionEvent}s |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="killTask" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskId" type="org.apache.hadoop.mapred.TaskAttemptID"/> |
| <param name="shouldFail" type="boolean"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Kill indicated task attempt. |
| |
| @param taskId the id of the task to be terminated. |
| @param shouldFail if true the task is failed and added to failed tasks |
| list, otherwise it is just killed, w/o affecting |
| job failure status. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="killTask" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="Applications should rather use {@link #killTask(TaskAttemptID, boolean)}"> |
| <param name="taskId" type="java.lang.String"/> |
| <param name="shouldFail" type="boolean"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[@deprecated Applications should rather use {@link #killTask(TaskAttemptID, boolean)}]]> |
| </doc> |
| </method> |
| <method name="getCounters" return="org.apache.hadoop.mapred.Counters" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Gets the counters for this job. |
| |
| @return the counters for this job. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getTaskDiagnostics" return="java.lang.String[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskid" type="org.apache.hadoop.mapred.TaskAttemptID"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Gets the diagnostic messages for a given task attempt. |
| @param taskid |
| @return the list of diagnostic messages for the task |
| @throws IOException]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[<code>RunningJob</code> is the user-interface to query for details on a |
| running Map-Reduce job. |
| |
| <p>Clients can get hold of <code>RunningJob</code> via the {@link JobClient} |
| and then query the running-job for details such as name, configuration, |
| progress etc.</p> |
| |
| @see JobClient]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapred.RunningJob --> |
| <!-- start class org.apache.hadoop.mapred.SequenceFileAsBinaryInputFormat --> |
| <class name="SequenceFileAsBinaryInputFormat" extends="org.apache.hadoop.mapred.SequenceFileInputFormat" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="SequenceFileAsBinaryInputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="split" type="org.apache.hadoop.mapred.InputSplit"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[InputFormat reading keys, values from SequenceFiles in binary (raw) |
| format.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.SequenceFileAsBinaryInputFormat --> |
| <!-- start class org.apache.hadoop.mapred.SequenceFileAsBinaryInputFormat.SequenceFileAsBinaryRecordReader --> |
| <class name="SequenceFileAsBinaryInputFormat.SequenceFileAsBinaryRecordReader" extends="java.lang.Object" |
| abstract="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.RecordReader"/> |
| <constructor name="SequenceFileAsBinaryInputFormat.SequenceFileAsBinaryRecordReader" type="org.apache.hadoop.conf.Configuration, org.apache.hadoop.mapred.FileSplit" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </constructor> |
| <method name="createKey" return="org.apache.hadoop.io.BytesWritable" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="createValue" return="org.apache.hadoop.io.BytesWritable" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getKeyClassName" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Retrieve the name of the key class for this SequenceFile. |
| @see org.apache.hadoop.io.SequenceFile.Reader#getKeyClassName]]> |
| </doc> |
| </method> |
| <method name="getValueClassName" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Retrieve the name of the value class for this SequenceFile. |
| @see org.apache.hadoop.io.SequenceFile.Reader#getValueClassName]]> |
| </doc> |
| </method> |
| <method name="next" return="boolean" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="org.apache.hadoop.io.BytesWritable"/> |
| <param name="val" type="org.apache.hadoop.io.BytesWritable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Read raw bytes from a SequenceFile.]]> |
| </doc> |
| </method> |
| <method name="getPos" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="close" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getProgress" return="float" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Return the progress within the input split |
| @return 0.0 to 1.0 of the input byte range]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Read records from a SequenceFile as binary (raw) bytes.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.SequenceFileAsBinaryInputFormat.SequenceFileAsBinaryRecordReader --> |
| <!-- start class org.apache.hadoop.mapred.SequenceFileAsBinaryOutputFormat --> |
| <class name="SequenceFileAsBinaryOutputFormat" extends="org.apache.hadoop.mapred.SequenceFileOutputFormat" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="SequenceFileAsBinaryOutputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="setSequenceFileOutputKeyClass" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="theClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Set the key class for the {@link SequenceFile} |
| <p>This allows the user to specify the key class to be different |
| from the actual class ({@link BytesWritable}) used for writing </p> |
| |
| @param conf the {@link JobConf} to modify |
| @param theClass the SequenceFile output key class.]]> |
| </doc> |
| </method> |
| <method name="setSequenceFileOutputValueClass" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="theClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[Set the value class for the {@link SequenceFile} |
| <p>This allows the user to specify the value class to be different |
| from the actual class ({@link BytesWritable}) used for writing </p> |
| |
| @param conf the {@link JobConf} to modify |
| @param theClass the SequenceFile output key class.]]> |
| </doc> |
| </method> |
| <method name="getSequenceFileOutputKeyClass" return="java.lang.Class" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <doc> |
| <![CDATA[Get the key class for the {@link SequenceFile} |
| |
| @return the key class of the {@link SequenceFile}]]> |
| </doc> |
| </method> |
| <method name="getSequenceFileOutputValueClass" return="java.lang.Class" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <doc> |
| <![CDATA[Get the value class for the {@link SequenceFile} |
| |
| @return the value class of the {@link SequenceFile}]]> |
| </doc> |
| </method> |
| <method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="ignored" type="org.apache.hadoop.fs.FileSystem"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="name" type="java.lang.String"/> |
| <param name="progress" type="org.apache.hadoop.util.Progressable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="checkOutputSpecs" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="ignored" type="org.apache.hadoop.fs.FileSystem"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[An {@link OutputFormat} that writes keys, values to |
| {@link SequenceFile}s in binary(raw) format]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.SequenceFileAsBinaryOutputFormat --> |
| <!-- start class org.apache.hadoop.mapred.SequenceFileAsBinaryOutputFormat.WritableValueBytes --> |
| <class name="SequenceFileAsBinaryOutputFormat.WritableValueBytes" extends="java.lang.Object" |
| abstract="false" |
| static="true" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.io.SequenceFile.ValueBytes"/> |
| <constructor name="SequenceFileAsBinaryOutputFormat.WritableValueBytes" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <constructor name="SequenceFileAsBinaryOutputFormat.WritableValueBytes" type="org.apache.hadoop.io.BytesWritable" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="reset" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="value" type="org.apache.hadoop.io.BytesWritable"/> |
| </method> |
| <method name="writeUncompressedBytes" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="outStream" type="java.io.DataOutputStream"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="writeCompressedBytes" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="outStream" type="java.io.DataOutputStream"/> |
| <exception name="IllegalArgumentException" type="java.lang.IllegalArgumentException"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getSize" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <doc> |
| <![CDATA[Inner class used for appendRaw]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.SequenceFileAsBinaryOutputFormat.WritableValueBytes --> |
| <!-- start class org.apache.hadoop.mapred.SequenceFileAsTextInputFormat --> |
| <class name="SequenceFileAsTextInputFormat" extends="org.apache.hadoop.mapred.SequenceFileInputFormat" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="SequenceFileAsTextInputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="split" type="org.apache.hadoop.mapred.InputSplit"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[This class is similar to SequenceFileInputFormat, except it generates SequenceFileAsTextRecordReader |
| which converts the input keys and values to their String forms by calling toString() method.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.SequenceFileAsTextInputFormat --> |
| <!-- start class org.apache.hadoop.mapred.SequenceFileAsTextRecordReader --> |
| <class name="SequenceFileAsTextRecordReader" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.RecordReader"/> |
| <constructor name="SequenceFileAsTextRecordReader" type="org.apache.hadoop.conf.Configuration, org.apache.hadoop.mapred.FileSplit" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </constructor> |
| <method name="createKey" return="org.apache.hadoop.io.Text" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="createValue" return="org.apache.hadoop.io.Text" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="next" return="boolean" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="org.apache.hadoop.io.Text"/> |
| <param name="value" type="org.apache.hadoop.io.Text"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Read key/value pair in a line.]]> |
| </doc> |
| </method> |
| <method name="getProgress" return="float" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getPos" return="long" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="close" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[This class converts the input keys and values to their String forms by calling toString() |
| method. This class to SequenceFileAsTextInputFormat class is as LineRecordReader |
| class to TextInputFormat class.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.SequenceFileAsTextRecordReader --> |
| <!-- start class org.apache.hadoop.mapred.SequenceFileInputFilter --> |
| <class name="SequenceFileInputFilter" extends="org.apache.hadoop.mapred.SequenceFileInputFormat" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="SequenceFileInputFilter" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="split" type="org.apache.hadoop.mapred.InputSplit"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Create a record reader for the given split |
| @param split file split |
| @param job job configuration |
| @param reporter reporter who sends report to task tracker |
| @return RecordReader]]> |
| </doc> |
| </method> |
| <method name="setFilterClass" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <param name="filterClass" type="java.lang.Class"/> |
| <doc> |
| <![CDATA[set the filter class |
| |
| @param conf application configuration |
| @param filterClass filter class]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[A class that allows a map/red job to work on a sample of sequence files. |
| The sample is decided by the filter class set by the job.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.SequenceFileInputFilter --> |
| <!-- start interface org.apache.hadoop.mapred.SequenceFileInputFilter.Filter --> |
| <interface name="SequenceFileInputFilter.Filter" abstract="true" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.conf.Configurable"/> |
| <method name="accept" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="java.lang.Object"/> |
| <doc> |
| <![CDATA[filter function |
| Decide if a record should be filtered or not |
| @param key record key |
| @return true if a record is accepted; return false otherwise]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[filter interface]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapred.SequenceFileInputFilter.Filter --> |
| <!-- start class org.apache.hadoop.mapred.SequenceFileInputFilter.FilterBase --> |
| <class name="SequenceFileInputFilter.FilterBase" extends="java.lang.Object" |
| abstract="true" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.SequenceFileInputFilter.Filter"/> |
| <constructor name="SequenceFileInputFilter.FilterBase" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getConf" return="org.apache.hadoop.conf.Configuration" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <doc> |
| <![CDATA[base class for Filters]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.SequenceFileInputFilter.FilterBase --> |
| <!-- start class org.apache.hadoop.mapred.SequenceFileInputFilter.MD5Filter --> |
| <class name="SequenceFileInputFilter.MD5Filter" extends="org.apache.hadoop.mapred.SequenceFileInputFilter.FilterBase" |
| abstract="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="SequenceFileInputFilter.MD5Filter" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="setFrequency" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <param name="frequency" type="int"/> |
| <doc> |
| <![CDATA[set the filtering frequency in configuration |
| |
| @param conf configuration |
| @param frequency filtering frequency]]> |
| </doc> |
| </method> |
| <method name="setConf" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <doc> |
| <![CDATA[configure the filter according to configuration |
| |
| @param conf configuration]]> |
| </doc> |
| </method> |
| <method name="accept" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="java.lang.Object"/> |
| <doc> |
| <![CDATA[Filtering method |
| If MD5(key) % frequency==0, return true; otherwise return false |
| @see org.apache.hadoop.mapred.SequenceFileInputFilter.Filter#accept(Object)]]> |
| </doc> |
| </method> |
| <field name="MD5_LEN" type="int" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[This class returns a set of records by examing the MD5 digest of its |
| key against a filtering frequency <i>f</i>. The filtering criteria is |
| MD5(key) % f == 0.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.SequenceFileInputFilter.MD5Filter --> |
| <!-- start class org.apache.hadoop.mapred.SequenceFileInputFilter.PercentFilter --> |
| <class name="SequenceFileInputFilter.PercentFilter" extends="org.apache.hadoop.mapred.SequenceFileInputFilter.FilterBase" |
| abstract="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="SequenceFileInputFilter.PercentFilter" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="setFrequency" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <param name="frequency" type="int"/> |
| <doc> |
| <![CDATA[set the frequency and stores it in conf |
| @param conf configuration |
| @param frequency filtering frequencey]]> |
| </doc> |
| </method> |
| <method name="setConf" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <doc> |
| <![CDATA[configure the filter by checking the configuration |
| |
| @param conf configuration]]> |
| </doc> |
| </method> |
| <method name="accept" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="java.lang.Object"/> |
| <doc> |
| <![CDATA[Filtering method |
| If record# % frequency==0, return true; otherwise return false |
| @see org.apache.hadoop.mapred.SequenceFileInputFilter.Filter#accept(Object)]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[This class returns a percentage of records |
| The percentage is determined by a filtering frequency <i>f</i> using |
| the criteria record# % f == 0. |
| For example, if the frequency is 10, one out of 10 records is returned.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.SequenceFileInputFilter.PercentFilter --> |
| <!-- start class org.apache.hadoop.mapred.SequenceFileInputFilter.RegexFilter --> |
| <class name="SequenceFileInputFilter.RegexFilter" extends="org.apache.hadoop.mapred.SequenceFileInputFilter.FilterBase" |
| abstract="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="SequenceFileInputFilter.RegexFilter" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="setPattern" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <param name="regex" type="java.lang.String"/> |
| <exception name="PatternSyntaxException" type="java.util.regex.PatternSyntaxException"/> |
| <doc> |
| <![CDATA[Define the filtering regex and stores it in conf |
| @param conf where the regex is set |
| @param regex regex used as a filter]]> |
| </doc> |
| </method> |
| <method name="setConf" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <doc> |
| <![CDATA[configure the Filter by checking the configuration]]> |
| </doc> |
| </method> |
| <method name="accept" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="java.lang.Object"/> |
| <doc> |
| <![CDATA[Filtering method |
| If key matches the regex, return true; otherwise return false |
| @see org.apache.hadoop.mapred.SequenceFileInputFilter.Filter#accept(Object)]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Records filter by matching key to regex]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.SequenceFileInputFilter.RegexFilter --> |
| <!-- start class org.apache.hadoop.mapred.SequenceFileInputFormat --> |
| <class name="SequenceFileInputFormat" extends="org.apache.hadoop.mapred.FileInputFormat" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="Use |
| {@link org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat} |
| instead."> |
| <constructor name="SequenceFileInputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="listStatus" return="org.apache.hadoop.fs.FileStatus[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="split" type="org.apache.hadoop.mapred.InputSplit"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[An {@link InputFormat} for {@link SequenceFile}s. |
| @deprecated Use |
| {@link org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat} |
| instead.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.SequenceFileInputFormat --> |
| <!-- start class org.apache.hadoop.mapred.SequenceFileOutputFormat --> |
| <class name="SequenceFileOutputFormat" extends="org.apache.hadoop.mapred.FileOutputFormat" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="Use |
| {@link org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat} |
| instead."> |
| <constructor name="SequenceFileOutputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="ignored" type="org.apache.hadoop.fs.FileSystem"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="name" type="java.lang.String"/> |
| <param name="progress" type="org.apache.hadoop.util.Progressable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getReaders" return="org.apache.hadoop.io.SequenceFile.Reader[]" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <param name="dir" type="org.apache.hadoop.fs.Path"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Open the output generated by this format.]]> |
| </doc> |
| </method> |
| <method name="getOutputCompressionType" return="org.apache.hadoop.io.SequenceFile.CompressionType" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <doc> |
| <![CDATA[Get the {@link CompressionType} for the output {@link SequenceFile}. |
| @param conf the {@link JobConf} |
| @return the {@link CompressionType} for the output {@link SequenceFile}, |
| defaulting to {@link CompressionType#RECORD}]]> |
| </doc> |
| </method> |
| <method name="setOutputCompressionType" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="style" type="org.apache.hadoop.io.SequenceFile.CompressionType"/> |
| <doc> |
| <![CDATA[Set the {@link CompressionType} for the output {@link SequenceFile}. |
| @param conf the {@link JobConf} to modify |
| @param style the {@link CompressionType} for the output |
| {@link SequenceFile}]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[An {@link OutputFormat} that writes {@link SequenceFile}s. |
| @deprecated Use |
| {@link org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat} |
| instead.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.SequenceFileOutputFormat --> |
| <!-- start class org.apache.hadoop.mapred.SequenceFileRecordReader --> |
| <class name="SequenceFileRecordReader" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.RecordReader"/> |
| <constructor name="SequenceFileRecordReader" type="org.apache.hadoop.conf.Configuration, org.apache.hadoop.mapred.FileSplit" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </constructor> |
| <method name="getKeyClass" return="java.lang.Class" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[The class of key that must be passed to {@link |
| #next(Object, Object)}..]]> |
| </doc> |
| </method> |
| <method name="getValueClass" return="java.lang.Class" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[The class of value that must be passed to {@link |
| #next(Object, Object)}..]]> |
| </doc> |
| </method> |
| <method name="createKey" return="java.lang.Object" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="createValue" return="java.lang.Object" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="next" return="boolean" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="java.lang.Object"/> |
| <param name="value" type="java.lang.Object"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="next" return="boolean" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="key" type="java.lang.Object"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getCurrentValue" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="value" type="java.lang.Object"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getProgress" return="float" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Return the progress within the input split |
| @return 0.0 to 1.0 of the input byte range]]> |
| </doc> |
| </method> |
| <method name="getPos" return="long" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="seek" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="pos" type="long"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="close" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <field name="conf" type="org.apache.hadoop.conf.Configuration" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[An {@link RecordReader} for {@link SequenceFile}s.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.SequenceFileRecordReader --> |
| <!-- start class org.apache.hadoop.mapred.SkipBadRecords --> |
| <class name="SkipBadRecords" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="SkipBadRecords" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getAttemptsToStartSkipping" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <doc> |
| <![CDATA[Get the number of Task attempts AFTER which skip mode |
| will be kicked off. When skip mode is kicked off, the |
| tasks reports the range of records which it will process |
| next to the TaskTracker. So that on failures, TT knows which |
| ones are possibly the bad records. On further executions, |
| those are skipped. |
| Default value is 2. |
| |
| @param conf the configuration |
| @return attemptsToStartSkipping no of task attempts]]> |
| </doc> |
| </method> |
| <method name="setAttemptsToStartSkipping" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <param name="attemptsToStartSkipping" type="int"/> |
| <doc> |
| <![CDATA[Set the number of Task attempts AFTER which skip mode |
| will be kicked off. When skip mode is kicked off, the |
| tasks reports the range of records which it will process |
| next to the TaskTracker. So that on failures, TT knows which |
| ones are possibly the bad records. On further executions, |
| those are skipped. |
| Default value is 2. |
| |
| @param conf the configuration |
| @param attemptsToStartSkipping no of task attempts]]> |
| </doc> |
| </method> |
| <method name="getAutoIncrMapperProcCount" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <doc> |
| <![CDATA[Get the flag which if set to true, |
| {@link SkipBadRecords#COUNTER_MAP_PROCESSED_RECORDS} is incremented |
| by MapRunner after invoking the map function. This value must be set to |
| false for applications which process the records asynchronously |
| or buffer the input records. For example streaming. |
| In such cases applications should increment this counter on their own. |
| Default value is true. |
| |
| @param conf the configuration |
| @return <code>true</code> if auto increment |
| {@link SkipBadRecords#COUNTER_MAP_PROCESSED_RECORDS}. |
| <code>false</code> otherwise.]]> |
| </doc> |
| </method> |
| <method name="setAutoIncrMapperProcCount" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <param name="autoIncr" type="boolean"/> |
| <doc> |
| <![CDATA[Set the flag which if set to true, |
| {@link SkipBadRecords#COUNTER_MAP_PROCESSED_RECORDS} is incremented |
| by MapRunner after invoking the map function. This value must be set to |
| false for applications which process the records asynchronously |
| or buffer the input records. For example streaming. |
| In such cases applications should increment this counter on their own. |
| Default value is true. |
| |
| @param conf the configuration |
| @param autoIncr whether to auto increment |
| {@link SkipBadRecords#COUNTER_MAP_PROCESSED_RECORDS}.]]> |
| </doc> |
| </method> |
| <method name="getAutoIncrReducerProcCount" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <doc> |
| <![CDATA[Get the flag which if set to true, |
| {@link SkipBadRecords#COUNTER_REDUCE_PROCESSED_GROUPS} is incremented |
| by framework after invoking the reduce function. This value must be set to |
| false for applications which process the records asynchronously |
| or buffer the input records. For example streaming. |
| In such cases applications should increment this counter on their own. |
| Default value is true. |
| |
| @param conf the configuration |
| @return <code>true</code> if auto increment |
| {@link SkipBadRecords#COUNTER_REDUCE_PROCESSED_GROUPS}. |
| <code>false</code> otherwise.]]> |
| </doc> |
| </method> |
| <method name="setAutoIncrReducerProcCount" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <param name="autoIncr" type="boolean"/> |
| <doc> |
| <![CDATA[Set the flag which if set to true, |
| {@link SkipBadRecords#COUNTER_REDUCE_PROCESSED_GROUPS} is incremented |
| by framework after invoking the reduce function. This value must be set to |
| false for applications which process the records asynchronously |
| or buffer the input records. For example streaming. |
| In such cases applications should increment this counter on their own. |
| Default value is true. |
| |
| @param conf the configuration |
| @param autoIncr whether to auto increment |
| {@link SkipBadRecords#COUNTER_REDUCE_PROCESSED_GROUPS}.]]> |
| </doc> |
| </method> |
| <method name="getSkipOutputPath" return="org.apache.hadoop.fs.Path" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <doc> |
| <![CDATA[Get the directory to which skipped records are written. By default it is |
| the sub directory of the output _logs directory. |
| User can stop writing skipped records by setting the value null. |
| |
| @param conf the configuration. |
| @return path skip output directory. Null is returned if this is not set |
| and output directory is also not set.]]> |
| </doc> |
| </method> |
| <method name="setSkipOutputPath" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="path" type="org.apache.hadoop.fs.Path"/> |
| <doc> |
| <![CDATA[Set the directory to which skipped records are written. By default it is |
| the sub directory of the output _logs directory. |
| User can stop writing skipped records by setting the value null. |
| |
| @param conf the configuration. |
| @param path skip output directory path]]> |
| </doc> |
| </method> |
| <method name="getMapperMaxSkipRecords" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <doc> |
| <![CDATA[Get the number of acceptable skip records surrounding the bad record PER |
| bad record in mapper. The number includes the bad record as well. |
| To turn the feature of detection/skipping of bad records off, set the |
| value to 0. |
| The framework tries to narrow down the skipped range by retrying |
| until this threshold is met OR all attempts get exhausted for this task. |
| Set the value to Long.MAX_VALUE to indicate that framework need not try to |
| narrow down. Whatever records(depends on application) get skipped are |
| acceptable. |
| Default value is 0. |
| |
| @param conf the configuration |
| @return maxSkipRecs acceptable skip records.]]> |
| </doc> |
| </method> |
| <method name="setMapperMaxSkipRecords" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <param name="maxSkipRecs" type="long"/> |
| <doc> |
| <![CDATA[Set the number of acceptable skip records surrounding the bad record PER |
| bad record in mapper. The number includes the bad record as well. |
| To turn the feature of detection/skipping of bad records off, set the |
| value to 0. |
| The framework tries to narrow down the skipped range by retrying |
| until this threshold is met OR all attempts get exhausted for this task. |
| Set the value to Long.MAX_VALUE to indicate that framework need not try to |
| narrow down. Whatever records(depends on application) get skipped are |
| acceptable. |
| Default value is 0. |
| |
| @param conf the configuration |
| @param maxSkipRecs acceptable skip records.]]> |
| </doc> |
| </method> |
| <method name="getReducerMaxSkipGroups" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <doc> |
| <![CDATA[Get the number of acceptable skip groups surrounding the bad group PER |
| bad group in reducer. The number includes the bad group as well. |
| To turn the feature of detection/skipping of bad groups off, set the |
| value to 0. |
| The framework tries to narrow down the skipped range by retrying |
| until this threshold is met OR all attempts get exhausted for this task. |
| Set the value to Long.MAX_VALUE to indicate that framework need not try to |
| narrow down. Whatever groups(depends on application) get skipped are |
| acceptable. |
| Default value is 0. |
| |
| @param conf the configuration |
| @return maxSkipGrps acceptable skip groups.]]> |
| </doc> |
| </method> |
| <method name="setReducerMaxSkipGroups" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <param name="maxSkipGrps" type="long"/> |
| <doc> |
| <![CDATA[Set the number of acceptable skip groups surrounding the bad group PER |
| bad group in reducer. The number includes the bad group as well. |
| To turn the feature of detection/skipping of bad groups off, set the |
| value to 0. |
| The framework tries to narrow down the skipped range by retrying |
| until this threshold is met OR all attempts get exhausted for this task. |
| Set the value to Long.MAX_VALUE to indicate that framework need not try to |
| narrow down. Whatever groups(depends on application) get skipped are |
| acceptable. |
| Default value is 0. |
| |
| @param conf the configuration |
| @param maxSkipGrps acceptable skip groups.]]> |
| </doc> |
| </method> |
| <field name="COUNTER_GROUP" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Special counters which are written by the application and are |
| used by the framework for detecting bad records. For detecting bad records |
| these counters must be incremented by the application.]]> |
| </doc> |
| </field> |
| <field name="COUNTER_MAP_PROCESSED_RECORDS" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Number of processed map records. |
| @see SkipBadRecords#getAutoIncrMapperProcCount(Configuration)]]> |
| </doc> |
| </field> |
| <field name="COUNTER_REDUCE_PROCESSED_GROUPS" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Number of processed reduce groups. |
| @see SkipBadRecords#getAutoIncrReducerProcCount(Configuration)]]> |
| </doc> |
| </field> |
| <doc> |
| <![CDATA[Utility class for skip bad records functionality. It contains various |
| settings related to skipping of bad records. |
| |
| <p>Hadoop provides an optional mode of execution in which the bad records |
| are detected and skipped in further attempts. |
| |
| <p>This feature can be used when map/reduce tasks crashes deterministically on |
| certain input. This happens due to bugs in the map/reduce function. The usual |
| course would be to fix these bugs. But sometimes this is not possible; |
| perhaps the bug is in third party libraries for which the source code is |
| not available. Due to this, the task never reaches to completion even with |
| multiple attempts and complete data for that task is lost.</p> |
| |
| <p>With this feature, only a small portion of data is lost surrounding |
| the bad record, which may be acceptable for some user applications. |
| see {@link SkipBadRecords#setMapperMaxSkipRecords(Configuration, long)}</p> |
| |
| <p>The skipping mode gets kicked off after certain no of failures |
| see {@link SkipBadRecords#setAttemptsToStartSkipping(Configuration, int)}</p> |
| |
| <p>In the skipping mode, the map/reduce task maintains the record range which |
| is getting processed at all times. Before giving the input to the |
| map/reduce function, it sends this record range to the Task tracker. |
| If task crashes, the Task tracker knows which one was the last reported |
| range. On further attempts that range get skipped.</p>]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.SkipBadRecords --> |
| <!-- start class org.apache.hadoop.mapred.TaskAttemptContext --> |
| <class name="TaskAttemptContext" extends="org.apache.hadoop.mapreduce.TaskAttemptContext" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="Use {@link org.apache.hadoop.mapreduce.TaskAttemptContext} |
| instead."> |
| <method name="getTaskAttemptID" return="org.apache.hadoop.mapred.TaskAttemptID" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the taskAttemptID. |
| |
| @return TaskAttemptID]]> |
| </doc> |
| </method> |
| <method name="getProgressible" return="org.apache.hadoop.util.Progressable" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getJobConf" return="org.apache.hadoop.mapred.JobConf" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="progress" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <doc> |
| <![CDATA[@deprecated Use {@link org.apache.hadoop.mapreduce.TaskAttemptContext} |
| instead.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.TaskAttemptContext --> |
| <!-- start class org.apache.hadoop.mapred.TaskAttemptID --> |
| <class name="TaskAttemptID" extends="org.apache.hadoop.mapreduce.TaskAttemptID" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="TaskAttemptID" type="org.apache.hadoop.mapred.TaskID, int" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Constructs a TaskAttemptID object from given {@link TaskID}. |
| @param taskId TaskID that this task belongs to |
| @param id the task attempt number]]> |
| </doc> |
| </constructor> |
| <constructor name="TaskAttemptID" type="java.lang.String, int, boolean, int, int" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Constructs a TaskId object from given parts. |
| @param jtIdentifier jobTracker identifier |
| @param jobId job number |
| @param isMap whether the tip is a map |
| @param taskId taskId number |
| @param id the task attempt number]]> |
| </doc> |
| </constructor> |
| <constructor name="TaskAttemptID" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="downgrade" return="org.apache.hadoop.mapred.TaskAttemptID" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="old" type="org.apache.hadoop.mapreduce.TaskAttemptID"/> |
| <doc> |
| <![CDATA[Downgrade a new TaskAttemptID to an old one |
| @param old the new id |
| @return either old or a new TaskAttemptID constructed to match old]]> |
| </doc> |
| </method> |
| <method name="getTaskID" return="org.apache.hadoop.mapred.TaskID" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getJobID" return="org.apache.hadoop.mapred.JobID" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="read" return="org.apache.hadoop.mapred.TaskAttemptID" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="in" type="java.io.DataInput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="forName" return="org.apache.hadoop.mapred.TaskAttemptID" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="str" type="java.lang.String"/> |
| <exception name="IllegalArgumentException" type="java.lang.IllegalArgumentException"/> |
| <doc> |
| <![CDATA[Construct a TaskAttemptID object from given string |
| @return constructed TaskAttemptID object or null if the given String is null |
| @throws IllegalArgumentException if the given string is malformed]]> |
| </doc> |
| </method> |
| <method name="getTaskAttemptIDsPattern" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jtIdentifier" type="java.lang.String"/> |
| <param name="jobId" type="java.lang.Integer"/> |
| <param name="isMap" type="java.lang.Boolean"/> |
| <param name="taskId" type="java.lang.Integer"/> |
| <param name="attemptId" type="java.lang.Integer"/> |
| <doc> |
| <![CDATA[Returns a regex pattern which matches task attempt IDs. Arguments can |
| be given null, in which case that part of the regex will be generic. |
| For example to obtain a regex matching <i>all task attempt IDs</i> |
| of <i>any jobtracker</i>, in <i>any job</i>, of the <i>first |
| map task</i>, we would use : |
| <pre> |
| TaskAttemptID.getTaskAttemptIDsPattern(null, null, true, 1, null); |
| </pre> |
| which will return : |
| <pre> "attempt_[^_]*_[0-9]*_m_000001_[0-9]*" </pre> |
| @param jtIdentifier jobTracker identifier, or null |
| @param jobId job number, or null |
| @param isMap whether the tip is a map, or null |
| @param taskId taskId number, or null |
| @param attemptId the task attempt number, or null |
| @return a regex pattern matching TaskAttemptIDs]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[TaskAttemptID represents the immutable and unique identifier for |
| a task attempt. Each task attempt is one particular instance of a Map or |
| Reduce Task identified by its TaskID. |
| |
| TaskAttemptID consists of 2 parts. First part is the |
| {@link TaskID}, that this TaskAttemptID belongs to. |
| Second part is the task attempt number. <br> |
| An example TaskAttemptID is : |
| <code>attempt_200707121733_0003_m_000005_0</code> , which represents the |
| zeroth task attempt for the fifth map task in the third job |
| running at the jobtracker started at <code>200707121733</code>. |
| <p> |
| Applications should never construct or parse TaskAttemptID strings |
| , but rather use appropriate constructors or {@link #forName(String)} |
| method. |
| |
| @see JobID |
| @see TaskID]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.TaskAttemptID --> |
| <!-- start class org.apache.hadoop.mapred.TaskCompletionEvent --> |
| <class name="TaskCompletionEvent" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.io.Writable"/> |
| <constructor name="TaskCompletionEvent" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Default constructor for Writable.]]> |
| </doc> |
| </constructor> |
| <constructor name="TaskCompletionEvent" type="int, org.apache.hadoop.mapred.TaskAttemptID, int, boolean, org.apache.hadoop.mapred.TaskCompletionEvent.Status, java.lang.String" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Constructor. eventId should be created externally and incremented |
| per event for each job. |
| @param eventId event id, event id should be unique and assigned in |
| incrementally, starting from 0. |
| @param taskId task id |
| @param status task's status |
| @param taskTrackerHttp task tracker's host:port for http.]]> |
| </doc> |
| </constructor> |
| <method name="getEventId" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns event Id. |
| @return event id]]> |
| </doc> |
| </method> |
| <method name="getTaskId" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="use {@link #getTaskAttemptId()} instead."> |
| <doc> |
| <![CDATA[Returns task id. |
| @return task id |
| @deprecated use {@link #getTaskAttemptId()} instead.]]> |
| </doc> |
| </method> |
| <method name="getTaskAttemptId" return="org.apache.hadoop.mapred.TaskAttemptID" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns task id. |
| @return task id]]> |
| </doc> |
| </method> |
| <method name="getTaskStatus" return="org.apache.hadoop.mapred.TaskCompletionEvent.Status" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns enum Status.SUCESS or Status.FAILURE. |
| @return task tracker status]]> |
| </doc> |
| </method> |
| <method name="getTaskTrackerHttp" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[http location of the tasktracker where this task ran. |
| @return http location of tasktracker user logs]]> |
| </doc> |
| </method> |
| <method name="getTaskRunTime" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns time (in millisec) the task took to complete.]]> |
| </doc> |
| </method> |
| <method name="setTaskRunTime" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskCompletionTime" type="int"/> |
| <doc> |
| <![CDATA[Set the task completion time |
| @param taskCompletionTime time (in millisec) the task took to complete]]> |
| </doc> |
| </method> |
| <method name="setEventId" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="eventId" type="int"/> |
| <doc> |
| <![CDATA[set event Id. should be assigned incrementally starting from 0. |
| @param eventId]]> |
| </doc> |
| </method> |
| <method name="setTaskId" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="use {@link #setTaskID(TaskAttemptID)} instead."> |
| <param name="taskId" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Sets task id. |
| @param taskId |
| @deprecated use {@link #setTaskID(TaskAttemptID)} instead.]]> |
| </doc> |
| </method> |
| <method name="setTaskID" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskId" type="org.apache.hadoop.mapred.TaskAttemptID"/> |
| <doc> |
| <![CDATA[Sets task id. |
| @param taskId]]> |
| </doc> |
| </method> |
| <method name="setTaskStatus" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="status" type="org.apache.hadoop.mapred.TaskCompletionEvent.Status"/> |
| <doc> |
| <![CDATA[Set task status. |
| @param status]]> |
| </doc> |
| </method> |
| <method name="setTaskTrackerHttp" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskTrackerHttp" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set task tracker http location. |
| @param taskTrackerHttp]]> |
| </doc> |
| </method> |
| <method name="toString" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="equals" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="o" type="java.lang.Object"/> |
| </method> |
| <method name="hashCode" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="isMapTask" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="idWithinJob" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="write" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="out" type="java.io.DataOutput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="readFields" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="in" type="java.io.DataInput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <field name="EMPTY_ARRAY" type="org.apache.hadoop.mapred.TaskCompletionEvent[]" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[This is used to track task completion events on |
| job tracker.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.TaskCompletionEvent --> |
| <!-- start class org.apache.hadoop.mapred.TaskCompletionEvent.Status --> |
| <class name="TaskCompletionEvent.Status" extends="java.lang.Enum" |
| abstract="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <method name="values" return="org.apache.hadoop.mapred.TaskCompletionEvent.Status[]" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="valueOf" return="org.apache.hadoop.mapred.TaskCompletionEvent.Status" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="name" type="java.lang.String"/> |
| </method> |
| <field name="FAILED" type="org.apache.hadoop.mapred.TaskCompletionEvent.Status" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="KILLED" type="org.apache.hadoop.mapred.TaskCompletionEvent.Status" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="SUCCEEDED" type="org.apache.hadoop.mapred.TaskCompletionEvent.Status" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="OBSOLETE" type="org.apache.hadoop.mapred.TaskCompletionEvent.Status" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="TIPFAILED" type="org.apache.hadoop.mapred.TaskCompletionEvent.Status" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.TaskCompletionEvent.Status --> |
| <!-- start class org.apache.hadoop.mapred.TaskGraphServlet --> |
| <class name="TaskGraphServlet" extends="javax.servlet.http.HttpServlet" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="TaskGraphServlet" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="doGet" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="request" type="javax.servlet.http.HttpServletRequest"/> |
| <param name="response" type="javax.servlet.http.HttpServletResponse"/> |
| <exception name="ServletException" type="javax.servlet.ServletException"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <field name="width" type="int" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[height of the graph w/o margins]]> |
| </doc> |
| </field> |
| <field name="height" type="int" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[height of the graph w/o margins]]> |
| </doc> |
| </field> |
| <field name="ymargin" type="int" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[margin space on y axis]]> |
| </doc> |
| </field> |
| <field name="xmargin" type="int" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[margin space on x axis]]> |
| </doc> |
| </field> |
| <doc> |
| <![CDATA[The servlet that outputs svg graphics for map / reduce task |
| statuses]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.TaskGraphServlet --> |
| <!-- start class org.apache.hadoop.mapred.TaskID --> |
| <class name="TaskID" extends="org.apache.hadoop.mapreduce.TaskID" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="TaskID" type="org.apache.hadoop.mapreduce.JobID, boolean, int" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Constructs a TaskID object from given {@link JobID}. |
| @param jobId JobID that this tip belongs to |
| @param isMap whether the tip is a map |
| @param id the tip number]]> |
| </doc> |
| </constructor> |
| <constructor name="TaskID" type="java.lang.String, int, boolean, int" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Constructs a TaskInProgressId object from given parts. |
| @param jtIdentifier jobTracker identifier |
| @param jobId job number |
| @param isMap whether the tip is a map |
| @param id the tip number]]> |
| </doc> |
| </constructor> |
| <constructor name="TaskID" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="downgrade" return="org.apache.hadoop.mapred.TaskID" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="old" type="org.apache.hadoop.mapreduce.TaskID"/> |
| <doc> |
| <![CDATA[Downgrade a new TaskID to an old one |
| @param old a new or old TaskID |
| @return either old or a new TaskID build to match old]]> |
| </doc> |
| </method> |
| <method name="read" return="org.apache.hadoop.mapred.TaskID" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="in" type="java.io.DataInput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getJobID" return="org.apache.hadoop.mapred.JobID" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getTaskIDsPattern" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jtIdentifier" type="java.lang.String"/> |
| <param name="jobId" type="java.lang.Integer"/> |
| <param name="isMap" type="java.lang.Boolean"/> |
| <param name="taskId" type="java.lang.Integer"/> |
| <doc> |
| <![CDATA[Returns a regex pattern which matches task IDs. Arguments can |
| be given null, in which case that part of the regex will be generic. |
| For example to obtain a regex matching <i>the first map task</i> |
| of <i>any jobtracker</i>, of <i>any job</i>, we would use : |
| <pre> |
| TaskID.getTaskIDsPattern(null, null, true, 1); |
| </pre> |
| which will return : |
| <pre> "task_[^_]*_[0-9]*_m_000001*" </pre> |
| @param jtIdentifier jobTracker identifier, or null |
| @param jobId job number, or null |
| @param isMap whether the tip is a map, or null |
| @param taskId taskId number, or null |
| @return a regex pattern matching TaskIDs]]> |
| </doc> |
| </method> |
| <method name="forName" return="org.apache.hadoop.mapred.TaskID" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="str" type="java.lang.String"/> |
| <exception name="IllegalArgumentException" type="java.lang.IllegalArgumentException"/> |
| </method> |
| <doc> |
| <![CDATA[TaskID represents the immutable and unique identifier for |
| a Map or Reduce Task. Each TaskID encompasses multiple attempts made to |
| execute the Map or Reduce Task, each of which are uniquely indentified by |
| their TaskAttemptID. |
| |
| TaskID consists of 3 parts. First part is the {@link JobID}, that this |
| TaskInProgress belongs to. Second part of the TaskID is either 'm' or 'r' |
| representing whether the task is a map task or a reduce task. |
| And the third part is the task number. <br> |
| An example TaskID is : |
| <code>task_200707121733_0003_m_000005</code> , which represents the |
| fifth map task in the third job running at the jobtracker |
| started at <code>200707121733</code>. |
| <p> |
| Applications should never construct or parse TaskID strings |
| , but rather use appropriate constructors or {@link #forName(String)} |
| method. |
| |
| @see JobID |
| @see TaskAttemptID]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.TaskID --> |
| <!-- start class org.apache.hadoop.mapred.TaskLog --> |
| <class name="TaskLog" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="TaskLog" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getTaskLogFile" return="java.io.File" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskid" type="org.apache.hadoop.mapred.TaskAttemptID"/> |
| <param name="filter" type="org.apache.hadoop.mapred.TaskLog.LogName"/> |
| </method> |
| <method name="getRealTaskLogFileLocation" return="java.io.File" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskid" type="org.apache.hadoop.mapred.TaskAttemptID"/> |
| <param name="filter" type="org.apache.hadoop.mapred.TaskLog.LogName"/> |
| </method> |
| <method name="getIndexFile" return="java.io.File" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskid" type="java.lang.String"/> |
| </method> |
| <method name="getIndexFile" return="java.io.File" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskid" type="java.lang.String"/> |
| <param name="isCleanup" type="boolean"/> |
| </method> |
| <method name="syncLogs" |
| abstract="false" native="false" synchronized="true" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="firstTaskid" type="org.apache.hadoop.mapred.TaskAttemptID"/> |
| <param name="taskid" type="org.apache.hadoop.mapred.TaskAttemptID"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="syncLogs" |
| abstract="false" native="false" synchronized="true" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="firstTaskid" type="org.apache.hadoop.mapred.TaskAttemptID"/> |
| <param name="taskid" type="org.apache.hadoop.mapred.TaskAttemptID"/> |
| <param name="isCleanup" type="boolean"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="cleanup" |
| abstract="false" native="false" synchronized="true" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="logsRetainHours" type="int"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Purge old user logs. |
| |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="getTaskLogLength" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <doc> |
| <![CDATA[Get the desired maximum length of task's logs. |
| @param conf the job to look in |
| @return the number of bytes to cap the log files at]]> |
| </doc> |
| </method> |
| <method name="captureOutAndError" return="java.util.List" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="cmd" type="java.util.List"/> |
| <param name="stdoutFilename" type="java.io.File"/> |
| <param name="stderrFilename" type="java.io.File"/> |
| <param name="tailLength" type="long"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Wrap a command in a shell to capture stdout and stderr to files. |
| If the tailLength is 0, the entire output will be saved. |
| @param cmd The command and the arguments that should be run |
| @param stdoutFilename The filename that stdout should be saved to |
| @param stderrFilename The filename that stderr should be saved to |
| @param tailLength The length of the tail to be saved. |
| @return the modified command that should be run]]> |
| </doc> |
| </method> |
| <method name="captureOutAndError" return="java.util.List" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="setup" type="java.util.List"/> |
| <param name="cmd" type="java.util.List"/> |
| <param name="stdoutFilename" type="java.io.File"/> |
| <param name="stderrFilename" type="java.io.File"/> |
| <param name="tailLength" type="long"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Wrap a command in a shell to capture stdout and stderr to files. |
| Setup commands such as setting memory limit can be passed which |
| will be executed before exec. |
| If the tailLength is 0, the entire output will be saved. |
| @param setup The setup commands for the execed process. |
| @param cmd The command and the arguments that should be run |
| @param stdoutFilename The filename that stdout should be saved to |
| @param stderrFilename The filename that stderr should be saved to |
| @param tailLength The length of the tail to be saved. |
| @return the modified command that should be run]]> |
| </doc> |
| </method> |
| <method name="captureOutAndError" return="java.util.List" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="setup" type="java.util.List"/> |
| <param name="cmd" type="java.util.List"/> |
| <param name="stdoutFilename" type="java.io.File"/> |
| <param name="stderrFilename" type="java.io.File"/> |
| <param name="tailLength" type="long"/> |
| <param name="pidFileName" type="java.lang.String"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Wrap a command in a shell to capture stdout and stderr to files. |
| Setup commands such as setting memory limit can be passed which |
| will be executed before exec. |
| If the tailLength is 0, the entire output will be saved. |
| @param setup The setup commands for the execed process. |
| @param cmd The command and the arguments that should be run |
| @param stdoutFilename The filename that stdout should be saved to |
| @param stderrFilename The filename that stderr should be saved to |
| @param tailLength The length of the tail to be saved. |
| @param pidFileName The name of the pid-file |
| @return the modified command that should be run]]> |
| </doc> |
| </method> |
| <method name="addCommand" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="cmd" type="java.util.List"/> |
| <param name="isExecutable" type="boolean"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Add quotes to each of the command strings and |
| return as a single string |
| @param cmd The command to be quoted |
| @param isExecutable makes shell path if the first |
| argument is executable |
| @return returns The quoted string. |
| @throws IOException]]> |
| </doc> |
| </method> |
| <method name="captureDebugOut" return="java.util.List" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="cmd" type="java.util.List"/> |
| <param name="debugoutFilename" type="java.io.File"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Wrap a command in a shell to capture debug script's |
| stdout and stderr to debugout. |
| @param cmd The command and the arguments that should be run |
| @param debugoutFilename The filename that stdout and stderr |
| should be saved to. |
| @return the modified command that should be run |
| @throws IOException]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[A simple logger to handle the task-specific user logs. |
| This class uses the system property <code>hadoop.log.dir</code>.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.TaskLog --> |
| <!-- start class org.apache.hadoop.mapred.TaskLog.LogName --> |
| <class name="TaskLog.LogName" extends="java.lang.Enum" |
| abstract="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <method name="values" return="org.apache.hadoop.mapred.TaskLog.LogName[]" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="valueOf" return="org.apache.hadoop.mapred.TaskLog.LogName" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="name" type="java.lang.String"/> |
| </method> |
| <method name="toString" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <field name="STDOUT" type="org.apache.hadoop.mapred.TaskLog.LogName" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Log on the stdout of the task.]]> |
| </doc> |
| </field> |
| <field name="STDERR" type="org.apache.hadoop.mapred.TaskLog.LogName" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Log on the stderr of the task.]]> |
| </doc> |
| </field> |
| <field name="SYSLOG" type="org.apache.hadoop.mapred.TaskLog.LogName" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Log on the map-reduce system logs of the task.]]> |
| </doc> |
| </field> |
| <field name="PROFILE" type="org.apache.hadoop.mapred.TaskLog.LogName" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[The java profiler information.]]> |
| </doc> |
| </field> |
| <field name="DEBUGOUT" type="org.apache.hadoop.mapred.TaskLog.LogName" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Log the debug script's stdout]]> |
| </doc> |
| </field> |
| <doc> |
| <![CDATA[The filter for userlogs.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.TaskLog.LogName --> |
| <!-- start class org.apache.hadoop.mapred.TaskLogAppender --> |
| <class name="TaskLogAppender" extends="org.apache.log4j.FileAppender" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="TaskLogAppender" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="activateOptions" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="append" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="event" type="org.apache.log4j.spi.LoggingEvent"/> |
| </method> |
| <method name="flush" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="close" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getTaskId" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Getter/Setter methods for log4j.]]> |
| </doc> |
| </method> |
| <method name="setTaskId" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskId" type="java.lang.String"/> |
| </method> |
| <method name="getTotalLogFileSize" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="setTotalLogFileSize" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="logSize" type="long"/> |
| </method> |
| <doc> |
| <![CDATA[A simple log4j-appender for the task child's |
| map-reduce system logs.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.TaskLogAppender --> |
| <!-- start class org.apache.hadoop.mapred.TaskLogServlet --> |
| <class name="TaskLogServlet" extends="javax.servlet.http.HttpServlet" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="TaskLogServlet" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getTaskLogUrl" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskTrackerHostName" type="java.lang.String"/> |
| <param name="httpPort" type="java.lang.String"/> |
| <param name="taskAttemptID" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Construct the taskLogUrl |
| @param taskTrackerHostName |
| @param httpPort |
| @param taskAttemptID |
| @return the taskLogUrl]]> |
| </doc> |
| </method> |
| <method name="doGet" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="request" type="javax.servlet.http.HttpServletRequest"/> |
| <param name="response" type="javax.servlet.http.HttpServletResponse"/> |
| <exception name="ServletException" type="javax.servlet.ServletException"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get the logs via http.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[A servlet that is run by the TaskTrackers to provide the task logs via http.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.TaskLogServlet --> |
| <!-- start class org.apache.hadoop.mapred.TaskReport --> |
| <class name="TaskReport" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.io.Writable"/> |
| <constructor name="TaskReport" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getTaskId" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="use {@link #getTaskID()} instead"> |
| <doc> |
| <![CDATA[@deprecated use {@link #getTaskID()} instead]]> |
| </doc> |
| </method> |
| <method name="getTaskID" return="org.apache.hadoop.mapred.TaskID" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[The id of the task.]]> |
| </doc> |
| </method> |
| <method name="getProgress" return="float" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[The amount completed, between zero and one.]]> |
| </doc> |
| </method> |
| <method name="getState" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[The most recent state, reported by a {@link Reporter}.]]> |
| </doc> |
| </method> |
| <method name="getDiagnostics" return="java.lang.String[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[A list of error messages.]]> |
| </doc> |
| </method> |
| <method name="getCounters" return="org.apache.hadoop.mapred.Counters" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[A table of counters.]]> |
| </doc> |
| </method> |
| <method name="getCurrentStatus" return="org.apache.hadoop.mapred.TIPStatus" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[The current status]]> |
| </doc> |
| </method> |
| <method name="getFinishTime" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get finish time of task. |
| @return 0, if finish time was not set else returns finish time.]]> |
| </doc> |
| </method> |
| <method name="getStartTime" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get start time of task. |
| @return 0 if start time was not set, else start time.]]> |
| </doc> |
| </method> |
| <method name="setSuccessfulAttempt" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="t" type="org.apache.hadoop.mapred.TaskAttemptID"/> |
| <doc> |
| <![CDATA[set successful attempt ID of the task.]]> |
| </doc> |
| </method> |
| <method name="getSuccessfulTaskAttempt" return="org.apache.hadoop.mapred.TaskAttemptID" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the attempt ID that took this task to completion]]> |
| </doc> |
| </method> |
| <method name="setRunningTaskAttempts" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="runningAttempts" type="java.util.Collection"/> |
| <doc> |
| <![CDATA[set running attempt(s) of the task.]]> |
| </doc> |
| </method> |
| <method name="getRunningTaskAttempts" return="java.util.Collection" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Get the running task attempt IDs for this task]]> |
| </doc> |
| </method> |
| <method name="equals" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="o" type="java.lang.Object"/> |
| </method> |
| <method name="hashCode" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="write" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="out" type="java.io.DataOutput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="readFields" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="in" type="java.io.DataInput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[A report on the state of a task.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.TaskReport --> |
| <!-- start class org.apache.hadoop.mapred.TaskTracker --> |
| <class name="TaskTracker" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.MRConstants"/> |
| <implements name="org.apache.hadoop.mapred.TaskUmbilicalProtocol"/> |
| <implements name="java.lang.Runnable"/> |
| <constructor name="TaskTracker" type="org.apache.hadoop.mapred.JobConf" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Start with the local machine name, and the default JobTracker]]> |
| </doc> |
| </constructor> |
| <method name="getTaskTrackerInstrumentation" return="org.apache.hadoop.mapred.TaskTrackerInstrumentation" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getProtocolVersion" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="protocol" type="java.lang.String"/> |
| <param name="clientVersion" type="long"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getInstrumentationClass" return="java.lang.Class" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| </method> |
| <method name="setInstrumentationClass" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <param name="t" type="java.lang.Class"/> |
| </method> |
| <method name="cleanupStorage" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Removes all contents of temporary storage. Called upon |
| startup, to remove any leftovers from previous run.]]> |
| </doc> |
| </method> |
| <method name="shutdown" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="close" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Close down the TaskTracker and all its components. We must also shutdown |
| any running tasks or threads, and cleanup disk space. A new TaskTracker |
| within the same process space might be restarted, so everything must be |
| clean.]]> |
| </doc> |
| </method> |
| <method name="getJobClient" return="org.apache.hadoop.mapred.InterTrackerProtocol" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[The connection to the JobTracker, used by the TaskRunner |
| for locating remote files.]]> |
| </doc> |
| </method> |
| <method name="getTaskTrackerReportAddress" return="java.net.InetSocketAddress" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Return the port at which the tasktracker bound to]]> |
| </doc> |
| </method> |
| <method name="getJvmManagerInstance" return="org.apache.hadoop.mapred.JvmManager" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="run" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[The server retry loop. |
| This while-loop attempts to connect to the JobTracker. It only |
| loops when the old TaskTracker has gone bad (its state is |
| stale somehow) and we need to reinitialize everything.]]> |
| </doc> |
| </method> |
| <method name="getTask" return="org.apache.hadoop.mapred.JvmTask" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jvmId" type="org.apache.hadoop.mapred.JVMId"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Called upon startup by the child process, to fetch Task data.]]> |
| </doc> |
| </method> |
| <method name="statusUpdate" return="boolean" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskid" type="org.apache.hadoop.mapred.TaskAttemptID"/> |
| <param name="taskStatus" type="org.apache.hadoop.mapred.TaskStatus"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Called periodically to report Task progress, from 0.0 to 1.0.]]> |
| </doc> |
| </method> |
| <method name="reportDiagnosticInfo" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskid" type="org.apache.hadoop.mapred.TaskAttemptID"/> |
| <param name="info" type="java.lang.String"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Called when the task dies before completion, and we want to report back |
| diagnostic info]]> |
| </doc> |
| </method> |
| <method name="reportNextRecordRange" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskid" type="org.apache.hadoop.mapred.TaskAttemptID"/> |
| <param name="range" type="org.apache.hadoop.mapred.SortedRanges.Range"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="ping" return="boolean" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskid" type="org.apache.hadoop.mapred.TaskAttemptID"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Child checking to see if we're alive. Normally does nothing.]]> |
| </doc> |
| </method> |
| <method name="commitPending" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskid" type="org.apache.hadoop.mapred.TaskAttemptID"/> |
| <param name="taskStatus" type="org.apache.hadoop.mapred.TaskStatus"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Task is reporting that it is in commit_pending |
| and it is waiting for the commit Response]]> |
| </doc> |
| </method> |
| <method name="canCommit" return="boolean" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskid" type="org.apache.hadoop.mapred.TaskAttemptID"/> |
| <doc> |
| <![CDATA[Child checking whether it can commit]]> |
| </doc> |
| </method> |
| <method name="done" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskid" type="org.apache.hadoop.mapred.TaskAttemptID"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[The task is done.]]> |
| </doc> |
| </method> |
| <method name="shuffleError" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskId" type="org.apache.hadoop.mapred.TaskAttemptID"/> |
| <param name="message" type="java.lang.String"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[A reduce-task failed to shuffle the map-outputs. Kill the task.]]> |
| </doc> |
| </method> |
| <method name="fsError" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskId" type="org.apache.hadoop.mapred.TaskAttemptID"/> |
| <param name="message" type="java.lang.String"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[A child task had a local filesystem error. Kill the task.]]> |
| </doc> |
| </method> |
| <method name="getMapCompletionEvents" return="org.apache.hadoop.mapred.MapTaskCompletionEventsUpdate" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobId" type="org.apache.hadoop.mapred.JobID"/> |
| <param name="fromEventId" type="int"/> |
| <param name="maxLocs" type="int"/> |
| <param name="id" type="org.apache.hadoop.mapred.TaskAttemptID"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="mapOutputLost" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="taskid" type="org.apache.hadoop.mapred.TaskAttemptID"/> |
| <param name="errorMsg" type="java.lang.String"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[A completed map task's output has been lost.]]> |
| </doc> |
| </method> |
| <method name="isIdle" return="boolean" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Is this task tracker idle? |
| @return has this task tracker finished and cleaned up all of its tasks?]]> |
| </doc> |
| </method> |
| <method name="main" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="argv" type="java.lang.String[]"/> |
| <exception name="Exception" type="java.lang.Exception"/> |
| <doc> |
| <![CDATA[Start the TaskTracker, point toward the indicated JobTracker]]> |
| </doc> |
| </method> |
| <method name="isTaskMemoryManagerEnabled" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Is the TaskMemoryManager Enabled on this system? |
| @return true if enabled, false otherwise.]]> |
| </doc> |
| </method> |
| <method name="getTaskMemoryManager" return="org.apache.hadoop.mapred.TaskMemoryManagerThread" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <field name="LOG" type="org.apache.commons.logging.Log" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="MR_CLIENTTRACE_FORMAT" type="java.lang.String" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="ClientTraceLog" type="org.apache.commons.logging.Log" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[TaskTracker is a process that starts and tracks MR Tasks |
| in a networked environment. It contacts the JobTracker |
| for Task assignments and reporting results.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.TaskTracker --> |
| <!-- start class org.apache.hadoop.mapred.TaskTracker.MapOutputServlet --> |
| <class name="TaskTracker.MapOutputServlet" extends="javax.servlet.http.HttpServlet" |
| abstract="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="TaskTracker.MapOutputServlet" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="doGet" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="request" type="javax.servlet.http.HttpServletRequest"/> |
| <param name="response" type="javax.servlet.http.HttpServletResponse"/> |
| <exception name="ServletException" type="javax.servlet.ServletException"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[This class is used in TaskTracker's Jetty to serve the map outputs |
| to other nodes.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.TaskTracker.MapOutputServlet --> |
| <!-- start class org.apache.hadoop.mapred.TextInputFormat --> |
| <class name="TextInputFormat" extends="org.apache.hadoop.mapred.FileInputFormat" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="Use {@link org.apache.hadoop.mapreduce.lib.input.TextInputFormat} |
| instead."> |
| <implements name="org.apache.hadoop.mapred.JobConfigurable"/> |
| <constructor name="TextInputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="configure" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| </method> |
| <method name="isSplitable" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="fs" type="org.apache.hadoop.fs.FileSystem"/> |
| <param name="file" type="org.apache.hadoop.fs.Path"/> |
| </method> |
| <method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="genericSplit" type="org.apache.hadoop.mapred.InputSplit"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[An {@link InputFormat} for plain text files. Files are broken into lines. |
| Either linefeed or carriage-return are used to signal end of line. Keys are |
| the position in the file, and values are the line of text.. |
| @deprecated Use {@link org.apache.hadoop.mapreduce.lib.input.TextInputFormat} |
| instead.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.TextInputFormat --> |
| <!-- start class org.apache.hadoop.mapred.TextOutputFormat --> |
| <class name="TextOutputFormat" extends="org.apache.hadoop.mapred.FileOutputFormat" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="Use |
| {@link org.apache.hadoop.mapreduce.lib.output.TextOutputFormat} instead."> |
| <constructor name="TextOutputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="ignored" type="org.apache.hadoop.fs.FileSystem"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="name" type="java.lang.String"/> |
| <param name="progress" type="org.apache.hadoop.util.Progressable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[An {@link OutputFormat} that writes plain text files. |
| @deprecated Use |
| {@link org.apache.hadoop.mapreduce.lib.output.TextOutputFormat} instead.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.TextOutputFormat --> |
| <!-- start class org.apache.hadoop.mapred.TextOutputFormat.LineRecordWriter --> |
| <class name="TextOutputFormat.LineRecordWriter" extends="java.lang.Object" |
| abstract="false" |
| static="true" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.RecordWriter"/> |
| <constructor name="TextOutputFormat.LineRecordWriter" type="java.io.DataOutputStream, java.lang.String" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <constructor name="TextOutputFormat.LineRecordWriter" type="java.io.DataOutputStream" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="write" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="java.lang.Object"/> |
| <param name="value" type="java.lang.Object"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="close" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <field name="out" type="java.io.DataOutputStream" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.TextOutputFormat.LineRecordWriter --> |
| <!-- start class org.apache.hadoop.mapred.TIPStatus --> |
| <class name="TIPStatus" extends="java.lang.Enum" |
| abstract="false" |
| static="false" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <method name="values" return="org.apache.hadoop.mapred.TIPStatus[]" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="valueOf" return="org.apache.hadoop.mapred.TIPStatus" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="name" type="java.lang.String"/> |
| </method> |
| <field name="PENDING" type="org.apache.hadoop.mapred.TIPStatus" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="RUNNING" type="org.apache.hadoop.mapred.TIPStatus" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="COMPLETE" type="org.apache.hadoop.mapred.TIPStatus" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="KILLED" type="org.apache.hadoop.mapred.TIPStatus" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="FAILED" type="org.apache.hadoop.mapred.TIPStatus" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[The states of a {@link TaskInProgress} as seen by the JobTracker.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.TIPStatus --> |
| </package> |
| <package name="org.apache.hadoop.mapred.jobcontrol"> |
| <!-- start class org.apache.hadoop.mapred.jobcontrol.Job --> |
| <class name="Job" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="Job" type="org.apache.hadoop.mapred.JobConf, java.util.ArrayList" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Construct a job. |
| @param jobConf a mapred job configuration representing a job to be executed. |
| @param dependingJobs an array of jobs the current job depends on]]> |
| </doc> |
| </constructor> |
| <constructor name="Job" type="org.apache.hadoop.mapred.JobConf" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Construct a job. |
| |
| @param jobConf mapred job configuration representing a job to be executed. |
| @throws IOException]]> |
| </doc> |
| </constructor> |
| <method name="toString" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getJobName" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the job name of this job]]> |
| </doc> |
| </method> |
| <method name="setJobName" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobName" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the job name for this job. |
| @param jobName the job name]]> |
| </doc> |
| </method> |
| <method name="getJobID" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the job ID of this job assigned by JobControl]]> |
| </doc> |
| </method> |
| <method name="setJobID" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="id" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the job ID for this job. |
| @param id the job ID]]> |
| </doc> |
| </method> |
| <method name="getMapredJobID" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="use {@link #getAssignedJobID()} instead"> |
| <doc> |
| <![CDATA[@return the mapred ID of this job |
| @deprecated use {@link #getAssignedJobID()} instead]]> |
| </doc> |
| </method> |
| <method name="setMapredJobID" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="use {@link #setAssignedJobID(JobID)} instead"> |
| <param name="mapredJobID" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the mapred ID for this job. |
| @param mapredJobID the mapred job ID for this job. |
| @deprecated use {@link #setAssignedJobID(JobID)} instead]]> |
| </doc> |
| </method> |
| <method name="getAssignedJobID" return="org.apache.hadoop.mapred.JobID" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the mapred ID of this job as assigned by the |
| mapred framework.]]> |
| </doc> |
| </method> |
| <method name="setAssignedJobID" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="mapredJobID" type="org.apache.hadoop.mapred.JobID"/> |
| <doc> |
| <![CDATA[Set the mapred ID for this job as assigned by the |
| mapred framework. |
| @param mapredJobID the mapred job ID for this job.]]> |
| </doc> |
| </method> |
| <method name="getJobConf" return="org.apache.hadoop.mapred.JobConf" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the mapred job conf of this job]]> |
| </doc> |
| </method> |
| <method name="setJobConf" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobConf" type="org.apache.hadoop.mapred.JobConf"/> |
| <doc> |
| <![CDATA[Set the mapred job conf for this job. |
| @param jobConf the mapred job conf for this job.]]> |
| </doc> |
| </method> |
| <method name="getState" return="int" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the state of this job]]> |
| </doc> |
| </method> |
| <method name="setState" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="state" type="int"/> |
| <doc> |
| <![CDATA[Set the state for this job. |
| @param state the new state for this job.]]> |
| </doc> |
| </method> |
| <method name="getMessage" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the message of this job]]> |
| </doc> |
| </method> |
| <method name="setMessage" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="message" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Set the message for this job. |
| @param message the message for this job.]]> |
| </doc> |
| </method> |
| <method name="getJobClient" return="org.apache.hadoop.mapred.JobClient" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the job client of this job]]> |
| </doc> |
| </method> |
| <method name="getDependingJobs" return="java.util.ArrayList" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the depending jobs of this job]]> |
| </doc> |
| </method> |
| <method name="addDependingJob" return="boolean" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="dependingJob" type="org.apache.hadoop.mapred.jobcontrol.Job"/> |
| <doc> |
| <![CDATA[Add a job to this jobs' dependency list. Dependent jobs can only be added while a Job |
| is waiting to run, not during or afterwards. |
| |
| @param dependingJob Job that this Job depends on. |
| @return <tt>true</tt> if the Job was added.]]> |
| </doc> |
| </method> |
| <method name="isCompleted" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return true if this job is in a complete state]]> |
| </doc> |
| </method> |
| <method name="isReady" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return true if this job is in READY state]]> |
| </doc> |
| </method> |
| <method name="submit" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Submit this job to mapred. The state becomes RUNNING if submission |
| is successful, FAILED otherwise.]]> |
| </doc> |
| </method> |
| <field name="SUCCESS" type="int" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="WAITING" type="int" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="RUNNING" type="int" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="READY" type="int" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="FAILED" type="int" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="DEPENDENT_FAILED" type="int" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[This class encapsulates a MapReduce job and its dependency. It monitors |
| the states of the depending jobs and updates the state of this job. |
| A job starts in the WAITING state. If it does not have any depending jobs, or |
| all of the depending jobs are in SUCCESS state, then the job state will become |
| READY. If any depending jobs fail, the job will fail too. |
| When in READY state, the job can be submitted to Hadoop for execution, with |
| the state changing into RUNNING state. From RUNNING state, the job can get into |
| SUCCESS or FAILED state, depending the status of the job execution.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.jobcontrol.Job --> |
| <!-- start class org.apache.hadoop.mapred.jobcontrol.JobControl --> |
| <class name="JobControl" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="java.lang.Runnable"/> |
| <constructor name="JobControl" type="java.lang.String" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Construct a job control for a group of jobs. |
| @param groupName a name identifying this group]]> |
| </doc> |
| </constructor> |
| <method name="getWaitingJobs" return="java.util.ArrayList" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the jobs in the waiting state]]> |
| </doc> |
| </method> |
| <method name="getRunningJobs" return="java.util.ArrayList" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the jobs in the running state]]> |
| </doc> |
| </method> |
| <method name="getReadyJobs" return="java.util.ArrayList" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the jobs in the ready state]]> |
| </doc> |
| </method> |
| <method name="getSuccessfulJobs" return="java.util.ArrayList" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the jobs in the success state]]> |
| </doc> |
| </method> |
| <method name="getFailedJobs" return="java.util.ArrayList" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="addJob" return="java.lang.String" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="aJob" type="org.apache.hadoop.mapred.jobcontrol.Job"/> |
| <doc> |
| <![CDATA[Add a new job. |
| @param aJob the new job]]> |
| </doc> |
| </method> |
| <method name="addJobs" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jobs" type="java.util.Collection"/> |
| <doc> |
| <![CDATA[Add a collection of jobs |
| |
| @param jobs]]> |
| </doc> |
| </method> |
| <method name="getState" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[@return the thread state]]> |
| </doc> |
| </method> |
| <method name="stop" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[set the thread state to STOPPING so that the |
| thread will stop when it wakes up.]]> |
| </doc> |
| </method> |
| <method name="suspend" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[suspend the running thread]]> |
| </doc> |
| </method> |
| <method name="resume" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[resume the suspended thread]]> |
| </doc> |
| </method> |
| <method name="allFinished" return="boolean" |
| abstract="false" native="false" synchronized="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="run" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[The main loop for the thread. |
| The loop does the following: |
| Check the states of the running jobs |
| Update the states of waiting jobs |
| Submit the jobs in ready state]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[This class encapsulates a set of MapReduce jobs and its dependency. It tracks |
| the states of the jobs by placing them into different tables according to their |
| states. |
| |
| This class provides APIs for the client app to add a job to the group and to get |
| the jobs in the group in different states. When a |
| job is added, an ID unique to the group is assigned to the job. |
| |
| This class has a thread that submits jobs when they become ready, monitors the |
| states of the running jobs, and updates the states of jobs based on the state changes |
| of their depending jobs states. The class provides APIs for suspending/resuming |
| the thread,and for stopping the thread.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.jobcontrol.JobControl --> |
| </package> |
| <package name="org.apache.hadoop.mapred.join"> |
| <!-- start class org.apache.hadoop.mapred.join.ArrayListBackedIterator --> |
| <class name="ArrayListBackedIterator" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.join.ResetableIterator"/> |
| <constructor name="ArrayListBackedIterator" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <constructor name="ArrayListBackedIterator" type="java.util.ArrayList" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="hasNext" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="next" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="val" type="org.apache.hadoop.io.Writable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="replay" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="val" type="org.apache.hadoop.io.Writable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="reset" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="add" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="item" type="org.apache.hadoop.io.Writable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="close" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="clear" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <doc> |
| <![CDATA[This class provides an implementation of ResetableIterator. The |
| implementation uses an {@link java.util.ArrayList} to store elements |
| added to it, replaying them as requested. |
| Prefer {@link StreamBackedIterator}.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.join.ArrayListBackedIterator --> |
| <!-- start interface org.apache.hadoop.mapred.join.ComposableInputFormat --> |
| <interface name="ComposableInputFormat" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.InputFormat"/> |
| <method name="getRecordReader" return="org.apache.hadoop.mapred.join.ComposableRecordReader" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="split" type="org.apache.hadoop.mapred.InputSplit"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[Refinement of InputFormat requiring implementors to provide |
| ComposableRecordReader instead of RecordReader.]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapred.join.ComposableInputFormat --> |
| <!-- start interface org.apache.hadoop.mapred.join.ComposableRecordReader --> |
| <interface name="ComposableRecordReader" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.RecordReader"/> |
| <implements name="java.lang.Comparable"/> |
| <method name="id" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Return the position in the collector this class occupies.]]> |
| </doc> |
| </method> |
| <method name="key" return="org.apache.hadoop.io.WritableComparable" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Return the key this RecordReader would supply on a call to next(K,V)]]> |
| </doc> |
| </method> |
| <method name="key" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="org.apache.hadoop.io.WritableComparable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Clone the key at the head of this RecordReader into the object provided.]]> |
| </doc> |
| </method> |
| <method name="hasNext" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns true if the stream is not empty, but provides no guarantee that |
| a call to next(K,V) will succeed.]]> |
| </doc> |
| </method> |
| <method name="skip" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="org.apache.hadoop.io.WritableComparable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Skip key-value pairs with keys less than or equal to the key provided.]]> |
| </doc> |
| </method> |
| <method name="accept" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jc" type="org.apache.hadoop.mapred.join.CompositeRecordReader.JoinCollector"/> |
| <param name="key" type="org.apache.hadoop.io.WritableComparable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[While key-value pairs from this RecordReader match the given key, register |
| them with the JoinCollector provided.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Additional operations required of a RecordReader to participate in a join.]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapred.join.ComposableRecordReader --> |
| <!-- start class org.apache.hadoop.mapred.join.CompositeInputFormat --> |
| <class name="CompositeInputFormat" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.join.ComposableInputFormat"/> |
| <constructor name="CompositeInputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="setFormat" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Interpret a given string as a composite expression. |
| {@code |
| func ::= <ident>([<func>,]*<func>) |
| func ::= tbl(<class>,"<path>") |
| class ::= @see java.lang.Class#forName(java.lang.String) |
| path ::= @see org.apache.hadoop.fs.Path#Path(java.lang.String) |
| } |
| Reads expression from the <tt>mapred.join.expr</tt> property and |
| user-supplied join types from <tt>mapred.join.define.<ident></tt> |
| types. Paths supplied to <tt>tbl</tt> are given as input paths to the |
| InputFormat class listed. |
| @see #compose(java.lang.String, java.lang.Class, java.lang.String...)]]> |
| </doc> |
| </method> |
| <method name="addDefaults" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Adds the default set of identifiers to the parser.]]> |
| </doc> |
| </method> |
| <method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="numSplits" type="int"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Build a CompositeInputSplit from the child InputFormats by assigning the |
| ith split from each child to the ith composite split.]]> |
| </doc> |
| </method> |
| <method name="getRecordReader" return="org.apache.hadoop.mapred.join.ComposableRecordReader" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="split" type="org.apache.hadoop.mapred.InputSplit"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Construct a CompositeRecordReader for the children of this InputFormat |
| as defined in the init expression. |
| The outermost join need only be composable, not necessarily a composite. |
| Mandating TupleWritable isn't strictly correct.]]> |
| </doc> |
| </method> |
| <method name="compose" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="inf" type="java.lang.Class"/> |
| <param name="path" type="java.lang.String"/> |
| <doc> |
| <![CDATA[Convenience method for constructing composite formats. |
| Given InputFormat class (inf), path (p) return: |
| {@code tbl(<inf>, <p>) }]]> |
| </doc> |
| </method> |
| <method name="compose" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="op" type="java.lang.String"/> |
| <param name="inf" type="java.lang.Class"/> |
| <param name="path" type="java.lang.String[]"/> |
| <doc> |
| <![CDATA[Convenience method for constructing composite formats. |
| Given operation (op), Object class (inf), set of paths (p) return: |
| {@code <op>(tbl(<inf>,<p1>),tbl(<inf>,<p2>),...,tbl(<inf>,<pn>)) }]]> |
| </doc> |
| </method> |
| <method name="compose" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="op" type="java.lang.String"/> |
| <param name="inf" type="java.lang.Class"/> |
| <param name="path" type="org.apache.hadoop.fs.Path[]"/> |
| <doc> |
| <![CDATA[Convenience method for constructing composite formats. |
| Given operation (op), Object class (inf), set of paths (p) return: |
| {@code <op>(tbl(<inf>,<p1>),tbl(<inf>,<p2>),...,tbl(<inf>,<pn>)) }]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[An InputFormat capable of performing joins over a set of data sources sorted |
| and partitioned the same way. |
| @see #setFormat |
| |
| A user may define new join types by setting the property |
| <tt>mapred.join.define.<ident></tt> to a classname. In the expression |
| <tt>mapred.join.expr</tt>, the identifier will be assumed to be a |
| ComposableRecordReader. |
| <tt>mapred.join.keycomparator</tt> can be a classname used to compare keys |
| in the join. |
| @see JoinRecordReader |
| @see MultiFilterRecordReader]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.join.CompositeInputFormat --> |
| <!-- start class org.apache.hadoop.mapred.join.CompositeInputSplit --> |
| <class name="CompositeInputSplit" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.InputSplit"/> |
| <constructor name="CompositeInputSplit" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <constructor name="CompositeInputSplit" type="int" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="add" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="s" type="org.apache.hadoop.mapred.InputSplit"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Add an InputSplit to this collection. |
| @throws IOException If capacity was not specified during construction |
| or if capacity has been reached.]]> |
| </doc> |
| </method> |
| <method name="get" return="org.apache.hadoop.mapred.InputSplit" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="i" type="int"/> |
| <doc> |
| <![CDATA[Get ith child InputSplit.]]> |
| </doc> |
| </method> |
| <method name="getLength" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Return the aggregate length of all child InputSplits currently added.]]> |
| </doc> |
| </method> |
| <method name="getLength" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="i" type="int"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get the length of ith child InputSplit.]]> |
| </doc> |
| </method> |
| <method name="getLocations" return="java.lang.String[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Collect a set of hosts from all child InputSplits.]]> |
| </doc> |
| </method> |
| <method name="getLocation" return="java.lang.String[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="i" type="int"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[getLocations from ith InputSplit.]]> |
| </doc> |
| </method> |
| <method name="write" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="out" type="java.io.DataOutput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Write splits in the following format. |
| {@code |
| <count><class1><class2>...<classn><split1><split2>...<splitn> |
| }]]> |
| </doc> |
| </method> |
| <method name="readFields" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="in" type="java.io.DataInput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[{@inheritDoc} |
| @throws IOException If the child InputSplit cannot be read, typically |
| for faliing access checks.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[This InputSplit contains a set of child InputSplits. Any InputSplit inserted |
| into this collection must have a public default constructor.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.join.CompositeInputSplit --> |
| <!-- start class org.apache.hadoop.mapred.join.CompositeRecordReader --> |
| <class name="CompositeRecordReader" extends="java.lang.Object" |
| abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.conf.Configurable"/> |
| <constructor name="CompositeRecordReader" type="int, int, java.lang.Class" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Create a RecordReader with <tt>capacity</tt> children to position |
| <tt>id</tt> in the parent reader. |
| The id of a root CompositeRecordReader is -1 by convention, but relying |
| on this is not recommended.]]> |
| </doc> |
| </constructor> |
| <method name="combine" return="boolean" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="srcs" type="java.lang.Object[]"/> |
| <param name="value" type="org.apache.hadoop.mapred.join.TupleWritable"/> |
| </method> |
| <method name="id" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Return the position in the collector this class occupies.]]> |
| </doc> |
| </method> |
| <method name="setConf" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| <doc> |
| <![CDATA[{@inheritDoc}]]> |
| </doc> |
| </method> |
| <method name="getConf" return="org.apache.hadoop.conf.Configuration" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[{@inheritDoc}]]> |
| </doc> |
| </method> |
| <method name="getRecordReaderQueue" return="java.util.PriorityQueue" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Return sorted list of RecordReaders for this composite.]]> |
| </doc> |
| </method> |
| <method name="getComparator" return="org.apache.hadoop.io.WritableComparator" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Return comparator defining the ordering for RecordReaders in this |
| composite.]]> |
| </doc> |
| </method> |
| <method name="add" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="rr" type="org.apache.hadoop.mapred.join.ComposableRecordReader"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Add a RecordReader to this collection. |
| The id() of a RecordReader determines where in the Tuple its |
| entry will appear. Adding RecordReaders with the same id has |
| undefined behavior.]]> |
| </doc> |
| </method> |
| <method name="key" return="org.apache.hadoop.io.WritableComparable" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Return the key for the current join or the value at the top of the |
| RecordReader heap.]]> |
| </doc> |
| </method> |
| <method name="key" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="org.apache.hadoop.io.WritableComparable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Clone the key at the top of this RR into the given object.]]> |
| </doc> |
| </method> |
| <method name="hasNext" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Return true if it is possible that this could emit more values.]]> |
| </doc> |
| </method> |
| <method name="skip" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="org.apache.hadoop.io.WritableComparable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Pass skip key to child RRs.]]> |
| </doc> |
| </method> |
| <method name="getDelegate" return="org.apache.hadoop.mapred.join.ResetableIterator" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Obtain an iterator over the child RRs apropos of the value type |
| ultimately emitted from this join.]]> |
| </doc> |
| </method> |
| <method name="accept" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="jc" type="org.apache.hadoop.mapred.join.CompositeRecordReader.JoinCollector"/> |
| <param name="key" type="org.apache.hadoop.io.WritableComparable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[If key provided matches that of this Composite, give JoinCollector |
| iterator over values it may emit.]]> |
| </doc> |
| </method> |
| <method name="fillJoinCollector" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="iterkey" type="org.apache.hadoop.io.WritableComparable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[For all child RRs offering the key provided, obtain an iterator |
| at that position in the JoinCollector.]]> |
| </doc> |
| </method> |
| <method name="compareTo" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="other" type="org.apache.hadoop.mapred.join.ComposableRecordReader"/> |
| <doc> |
| <![CDATA[Implement Comparable contract (compare key of join or head of heap |
| with that of another).]]> |
| </doc> |
| </method> |
| <method name="createKey" return="org.apache.hadoop.io.WritableComparable" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Create a new key value common to all child RRs. |
| @throws ClassCastException if key classes differ.]]> |
| </doc> |
| </method> |
| <method name="createInternalValue" return="org.apache.hadoop.mapred.join.TupleWritable" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Create a value to be used internally for joins.]]> |
| </doc> |
| </method> |
| <method name="getPos" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Unsupported (returns zero in all cases).]]> |
| </doc> |
| </method> |
| <method name="close" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Close all child RRs.]]> |
| </doc> |
| </method> |
| <method name="getProgress" return="float" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Report progress as the minimum of all child RR progress.]]> |
| </doc> |
| </method> |
| <field name="jc" type="org.apache.hadoop.mapred.join.CompositeRecordReader.JoinCollector" |
| transient="false" volatile="false" |
| static="false" final="true" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <field name="kids" type="org.apache.hadoop.mapred.join.ComposableRecordReader[]" |
| transient="false" volatile="false" |
| static="false" final="true" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[A RecordReader that can effect joins of RecordReaders sharing a common key |
| type and partitioning.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.join.CompositeRecordReader --> |
| <!-- start class org.apache.hadoop.mapred.join.InnerJoinRecordReader --> |
| <class name="InnerJoinRecordReader" extends="org.apache.hadoop.mapred.join.JoinRecordReader" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <method name="combine" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="srcs" type="java.lang.Object[]"/> |
| <param name="dst" type="org.apache.hadoop.mapred.join.TupleWritable"/> |
| <doc> |
| <![CDATA[Return true iff the tuple is full (all data sources contain this key).]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Full inner join.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.join.InnerJoinRecordReader --> |
| <!-- start class org.apache.hadoop.mapred.join.JoinRecordReader --> |
| <class name="JoinRecordReader" extends="org.apache.hadoop.mapred.join.CompositeRecordReader" |
| abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.join.ComposableRecordReader"/> |
| <constructor name="JoinRecordReader" type="int, org.apache.hadoop.mapred.JobConf, int, java.lang.Class" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </constructor> |
| <method name="next" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="org.apache.hadoop.io.WritableComparable"/> |
| <param name="value" type="org.apache.hadoop.mapred.join.TupleWritable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Emit the next set of key, value pairs as defined by the child |
| RecordReaders and operation associated with this composite RR.]]> |
| </doc> |
| </method> |
| <method name="createValue" return="org.apache.hadoop.mapred.join.TupleWritable" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[{@inheritDoc}]]> |
| </doc> |
| </method> |
| <method name="getDelegate" return="org.apache.hadoop.mapred.join.ResetableIterator" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Return an iterator wrapping the JoinCollector.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Base class for Composite joins returning Tuples of arbitrary Writables.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.join.JoinRecordReader --> |
| <!-- start class org.apache.hadoop.mapred.join.JoinRecordReader.JoinDelegationIterator --> |
| <class name="JoinRecordReader.JoinDelegationIterator" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.join.ResetableIterator"/> |
| <constructor name="JoinRecordReader.JoinDelegationIterator" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="hasNext" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="next" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="val" type="org.apache.hadoop.mapred.join.TupleWritable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="replay" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="val" type="org.apache.hadoop.mapred.join.TupleWritable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="reset" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="add" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="item" type="org.apache.hadoop.mapred.join.TupleWritable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="close" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="clear" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <doc> |
| <![CDATA[Since the JoinCollector is effecting our operation, we need only |
| provide an iterator proxy wrapping its operation.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.join.JoinRecordReader.JoinDelegationIterator --> |
| <!-- start class org.apache.hadoop.mapred.join.MultiFilterRecordReader --> |
| <class name="MultiFilterRecordReader" extends="org.apache.hadoop.mapred.join.CompositeRecordReader" |
| abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.join.ComposableRecordReader"/> |
| <constructor name="MultiFilterRecordReader" type="int, org.apache.hadoop.mapred.JobConf, int, java.lang.Class" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </constructor> |
| <method name="emit" return="org.apache.hadoop.io.Writable" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="dst" type="org.apache.hadoop.mapred.join.TupleWritable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[For each tuple emitted, return a value (typically one of the values |
| in the tuple). |
| Modifying the Writables in the tuple is permitted and unlikely to affect |
| join behavior in most cases, but it is not recommended. It's safer to |
| clone first.]]> |
| </doc> |
| </method> |
| <method name="combine" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="srcs" type="java.lang.Object[]"/> |
| <param name="dst" type="org.apache.hadoop.mapred.join.TupleWritable"/> |
| <doc> |
| <![CDATA[Default implementation offers {@link #emit} every Tuple from the |
| collector (the outer join of child RRs).]]> |
| </doc> |
| </method> |
| <method name="next" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="org.apache.hadoop.io.WritableComparable"/> |
| <param name="value" type="org.apache.hadoop.io.Writable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[{@inheritDoc}]]> |
| </doc> |
| </method> |
| <method name="createValue" return="org.apache.hadoop.io.Writable" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[{@inheritDoc}]]> |
| </doc> |
| </method> |
| <method name="getDelegate" return="org.apache.hadoop.mapred.join.ResetableIterator" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Return an iterator returning a single value from the tuple. |
| @see MultiFilterDelegationIterator]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Base class for Composite join returning values derived from multiple |
| sources, but generally not tuples.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.join.MultiFilterRecordReader --> |
| <!-- start class org.apache.hadoop.mapred.join.MultiFilterRecordReader.MultiFilterDelegationIterator --> |
| <class name="MultiFilterRecordReader.MultiFilterDelegationIterator" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.join.ResetableIterator"/> |
| <constructor name="MultiFilterRecordReader.MultiFilterDelegationIterator" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="hasNext" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="next" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="val" type="org.apache.hadoop.io.Writable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="replay" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="val" type="org.apache.hadoop.io.Writable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="reset" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="add" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="item" type="org.apache.hadoop.io.Writable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="close" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="clear" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <doc> |
| <![CDATA[Proxy the JoinCollector, but include callback to emit.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.join.MultiFilterRecordReader.MultiFilterDelegationIterator --> |
| <!-- start class org.apache.hadoop.mapred.join.OuterJoinRecordReader --> |
| <class name="OuterJoinRecordReader" extends="org.apache.hadoop.mapred.join.JoinRecordReader" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <method name="combine" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="srcs" type="java.lang.Object[]"/> |
| <param name="dst" type="org.apache.hadoop.mapred.join.TupleWritable"/> |
| <doc> |
| <![CDATA[Emit everything from the collector.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Full outer join.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.join.OuterJoinRecordReader --> |
| <!-- start class org.apache.hadoop.mapred.join.OverrideRecordReader --> |
| <class name="OverrideRecordReader" extends="org.apache.hadoop.mapred.join.MultiFilterRecordReader" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <method name="emit" return="org.apache.hadoop.io.Writable" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="dst" type="org.apache.hadoop.mapred.join.TupleWritable"/> |
| <doc> |
| <![CDATA[Emit the value with the highest position in the tuple.]]> |
| </doc> |
| </method> |
| <method name="fillJoinCollector" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="iterkey" type="org.apache.hadoop.io.WritableComparable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Instead of filling the JoinCollector with iterators from all |
| data sources, fill only the rightmost for this key. |
| This not only saves space by discarding the other sources, but |
| it also emits the number of key-value pairs in the preferred |
| RecordReader instead of repeating that stream n times, where |
| n is the cardinality of the cross product of the discarded |
| streams for the given key.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Prefer the "rightmost" data source for this key. |
| For example, <tt>override(S1,S2,S3)</tt> will prefer values |
| from S3 over S2, and values from S2 over S1 for all keys |
| emitted from all sources.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.join.OverrideRecordReader --> |
| <!-- start class org.apache.hadoop.mapred.join.Parser --> |
| <class name="Parser" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="Parser" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <doc> |
| <![CDATA[Very simple shift-reduce parser for join expressions. |
| |
| This should be sufficient for the user extension permitted now, but ought to |
| be replaced with a parser generator if more complex grammars are supported. |
| In particular, this "shift-reduce" parser has no states. Each set |
| of formals requires a different internal node type, which is responsible for |
| interpreting the list of tokens it receives. This is sufficient for the |
| current grammar, but it has several annoying properties that might inhibit |
| extension. In particular, parenthesis are always function calls; an |
| algebraic or filter grammar would not only require a node type, but must |
| also work around the internals of this parser. |
| |
| For most other cases, adding classes to the hierarchy- particularly by |
| extending JoinRecordReader and MultiFilterRecordReader- is fairly |
| straightforward. One need only override the relevant method(s) (usually only |
| {@link CompositeRecordReader#combine}) and include a property to map its |
| value to an identifier in the parser.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.join.Parser --> |
| <!-- start class org.apache.hadoop.mapred.join.Parser.Node --> |
| <class name="Parser.Node" extends="java.lang.Object" |
| abstract="true" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.join.ComposableInputFormat"/> |
| <constructor name="Parser.Node" type="java.lang.String" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="addIdentifier" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="ident" type="java.lang.String"/> |
| <param name="mcstrSig" type="java.lang.Class[]"/> |
| <param name="nodetype" type="java.lang.Class"/> |
| <param name="cl" type="java.lang.Class"/> |
| <exception name="NoSuchMethodException" type="java.lang.NoSuchMethodException"/> |
| <doc> |
| <![CDATA[For a given identifier, add a mapping to the nodetype for the parse |
| tree and to the ComposableRecordReader to be created, including the |
| formals required to invoke the constructor. |
| The nodetype and constructor signature should be filled in from the |
| child node.]]> |
| </doc> |
| </method> |
| <method name="setID" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="id" type="int"/> |
| </method> |
| <method name="setKeyComparator" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="cmpcl" type="java.lang.Class"/> |
| </method> |
| <field name="rrCstrMap" type="java.util.Map" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <field name="id" type="int" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <field name="ident" type="java.lang.String" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <field name="cmpcl" type="java.lang.Class" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.join.Parser.Node --> |
| <!-- start class org.apache.hadoop.mapred.join.Parser.NodeToken --> |
| <class name="Parser.NodeToken" extends="org.apache.hadoop.mapred.join.Parser.Token" |
| abstract="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <method name="getNode" return="org.apache.hadoop.mapred.join.Parser.Node" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.join.Parser.NodeToken --> |
| <!-- start class org.apache.hadoop.mapred.join.Parser.NumToken --> |
| <class name="Parser.NumToken" extends="org.apache.hadoop.mapred.join.Parser.Token" |
| abstract="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="Parser.NumToken" type="double" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getNum" return="double" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.join.Parser.NumToken --> |
| <!-- start class org.apache.hadoop.mapred.join.Parser.StrToken --> |
| <class name="Parser.StrToken" extends="org.apache.hadoop.mapred.join.Parser.Token" |
| abstract="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="Parser.StrToken" type="org.apache.hadoop.mapred.join.Parser.TType, java.lang.String" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getStr" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.join.Parser.StrToken --> |
| <!-- start class org.apache.hadoop.mapred.join.Parser.Token --> |
| <class name="Parser.Token" extends="java.lang.Object" |
| abstract="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <method name="getType" return="org.apache.hadoop.mapred.join.Parser.TType" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getNode" return="org.apache.hadoop.mapred.join.Parser.Node" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getNum" return="double" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getStr" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[Tagged-union type for tokens from the join expression. |
| @see Parser.TType]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.join.Parser.Token --> |
| <!-- start class org.apache.hadoop.mapred.join.Parser.TType --> |
| <class name="Parser.TType" extends="java.lang.Enum" |
| abstract="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| <method name="values" return="org.apache.hadoop.mapred.join.Parser.TType[]" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="valueOf" return="org.apache.hadoop.mapred.join.Parser.TType" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="name" type="java.lang.String"/> |
| </method> |
| <field name="CIF" type="org.apache.hadoop.mapred.join.Parser.TType" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="IDENT" type="org.apache.hadoop.mapred.join.Parser.TType" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="COMMA" type="org.apache.hadoop.mapred.join.Parser.TType" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="LPAREN" type="org.apache.hadoop.mapred.join.Parser.TType" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="RPAREN" type="org.apache.hadoop.mapred.join.Parser.TType" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="QUOT" type="org.apache.hadoop.mapred.join.Parser.TType" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <field name="NUM" type="org.apache.hadoop.mapred.join.Parser.TType" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.join.Parser.TType --> |
| <!-- start interface org.apache.hadoop.mapred.join.ResetableIterator --> |
| <interface name="ResetableIterator" abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <method name="hasNext" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[True if a call to next may return a value. This is permitted false |
| positives, but not false negatives.]]> |
| </doc> |
| </method> |
| <method name="next" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="val" type="org.apache.hadoop.io.Writable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Assign next value to actual. |
| It is required that elements added to a ResetableIterator be returned in |
| the same order after a call to {@link #reset} (FIFO). |
| |
| Note that a call to this may fail for nested joins (i.e. more elements |
| available, but none satisfying the constraints of the join)]]> |
| </doc> |
| </method> |
| <method name="replay" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="val" type="org.apache.hadoop.io.Writable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Assign last value returned to actual.]]> |
| </doc> |
| </method> |
| <method name="reset" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Set iterator to return to the start of its range. Must be called after |
| calling {@link #add} to avoid a ConcurrentModificationException.]]> |
| </doc> |
| </method> |
| <method name="add" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="item" type="org.apache.hadoop.io.Writable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Add an element to the collection of elements to iterate over.]]> |
| </doc> |
| </method> |
| <method name="close" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Close datasources and release resources. Calling methods on the iterator |
| after calling close has undefined behavior.]]> |
| </doc> |
| </method> |
| <method name="clear" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Close datasources, but do not release internal resources. Calling this |
| method should permit the object to be reused with a different datasource.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[This defines an interface to a stateful Iterator that can replay elements |
| added to it directly. |
| Note that this does not extend {@link java.util.Iterator}.]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapred.join.ResetableIterator --> |
| <!-- start class org.apache.hadoop.mapred.join.ResetableIterator.EMPTY --> |
| <class name="ResetableIterator.EMPTY" extends="java.lang.Object" |
| abstract="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.join.ResetableIterator"/> |
| <constructor name="ResetableIterator.EMPTY" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="hasNext" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="reset" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="close" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="clear" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="next" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="val" type="org.apache.hadoop.io.Writable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="replay" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="val" type="org.apache.hadoop.io.Writable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="add" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="item" type="org.apache.hadoop.io.Writable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.join.ResetableIterator.EMPTY --> |
| <!-- start class org.apache.hadoop.mapred.join.StreamBackedIterator --> |
| <class name="StreamBackedIterator" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.join.ResetableIterator"/> |
| <constructor name="StreamBackedIterator" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="hasNext" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="next" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="val" type="org.apache.hadoop.io.Writable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="replay" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="val" type="org.apache.hadoop.io.Writable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="reset" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="add" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="item" type="org.apache.hadoop.io.Writable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="close" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="clear" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <doc> |
| <![CDATA[This class provides an implementation of ResetableIterator. This |
| implementation uses a byte array to store elements added to it.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.join.StreamBackedIterator --> |
| <!-- start class org.apache.hadoop.mapred.join.TupleWritable --> |
| <class name="TupleWritable" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.io.Writable"/> |
| <implements name="java.lang.Iterable"/> |
| <constructor name="TupleWritable" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Create an empty tuple with no allocated storage for writables.]]> |
| </doc> |
| </constructor> |
| <constructor name="TupleWritable" type="org.apache.hadoop.io.Writable[]" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Initialize tuple with storage; unknown whether any of them contain |
| "written" values.]]> |
| </doc> |
| </constructor> |
| <method name="has" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="i" type="int"/> |
| <doc> |
| <![CDATA[Return true if tuple has an element at the position provided.]]> |
| </doc> |
| </method> |
| <method name="get" return="org.apache.hadoop.io.Writable" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="i" type="int"/> |
| <doc> |
| <![CDATA[Get ith Writable from Tuple.]]> |
| </doc> |
| </method> |
| <method name="size" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[The number of children in this Tuple.]]> |
| </doc> |
| </method> |
| <method name="equals" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="other" type="java.lang.Object"/> |
| <doc> |
| <![CDATA[{@inheritDoc}]]> |
| </doc> |
| </method> |
| <method name="hashCode" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="iterator" return="java.util.Iterator" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Return an iterator over the elements in this tuple. |
| Note that this doesn't flatten the tuple; one may receive tuples |
| from this iterator.]]> |
| </doc> |
| </method> |
| <method name="toString" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Convert Tuple to String as in the following. |
| <tt>[<child1>,<child2>,...,<childn>]</tt>]]> |
| </doc> |
| </method> |
| <method name="write" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="out" type="java.io.DataOutput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Writes each Writable to <code>out</code>. |
| TupleWritable format: |
| {@code |
| <count><type1><type2>...<typen><obj1><obj2>...<objn> |
| }]]> |
| </doc> |
| </method> |
| <method name="readFields" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="in" type="java.io.DataInput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[{@inheritDoc}]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Writable type storing multiple {@link org.apache.hadoop.io.Writable}s. |
| |
| This is *not* a general-purpose tuple type. In almost all cases, users are |
| encouraged to implement their own serializable types, which can perform |
| better validation and provide more efficient encodings than this class is |
| capable. TupleWritable relies on the join framework for type safety and |
| assumes its instances will rarely be persisted, assumptions not only |
| incompatible with, but contrary to the general case. |
| |
| @see org.apache.hadoop.io.Writable]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.join.TupleWritable --> |
| <!-- start class org.apache.hadoop.mapred.join.WrappedRecordReader --> |
| <class name="WrappedRecordReader" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.join.ComposableRecordReader"/> |
| <method name="id" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[{@inheritDoc}]]> |
| </doc> |
| </method> |
| <method name="key" return="org.apache.hadoop.io.WritableComparable" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Return the key at the head of this RR.]]> |
| </doc> |
| </method> |
| <method name="key" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="qkey" type="org.apache.hadoop.io.WritableComparable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Clone the key at the head of this RR into the object supplied.]]> |
| </doc> |
| </method> |
| <method name="hasNext" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Return true if the RR- including the k,v pair stored in this object- |
| is exhausted.]]> |
| </doc> |
| </method> |
| <method name="skip" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="org.apache.hadoop.io.WritableComparable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Skip key-value pairs with keys less than or equal to the key provided.]]> |
| </doc> |
| </method> |
| <method name="next" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Read the next k,v pair into the head of this object; return true iff |
| the RR and this are exhausted.]]> |
| </doc> |
| </method> |
| <method name="accept" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="i" type="org.apache.hadoop.mapred.join.CompositeRecordReader.JoinCollector"/> |
| <param name="key" type="org.apache.hadoop.io.WritableComparable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Add an iterator to the collector at the position occupied by this |
| RecordReader over the values in this stream paired with the key |
| provided (ie register a stream of values from this source matching K |
| with a collector).]]> |
| </doc> |
| </method> |
| <method name="next" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="org.apache.hadoop.io.WritableComparable"/> |
| <param name="value" type="org.apache.hadoop.io.Writable"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Write key-value pair at the head of this stream to the objects provided; |
| get next key-value pair from proxied RR.]]> |
| </doc> |
| </method> |
| <method name="createKey" return="org.apache.hadoop.io.WritableComparable" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Request new key from proxied RR.]]> |
| </doc> |
| </method> |
| <method name="createValue" return="org.apache.hadoop.io.Writable" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Request new value from proxied RR.]]> |
| </doc> |
| </method> |
| <method name="getProgress" return="float" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Request progress from proxied RR.]]> |
| </doc> |
| </method> |
| <method name="getPos" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Request position from proxied RR.]]> |
| </doc> |
| </method> |
| <method name="close" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Forward close request to proxied RR.]]> |
| </doc> |
| </method> |
| <method name="compareTo" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="other" type="org.apache.hadoop.mapred.join.ComposableRecordReader"/> |
| <doc> |
| <![CDATA[Implement Comparable contract (compare key at head of proxied RR |
| with that of another).]]> |
| </doc> |
| </method> |
| <method name="equals" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="other" type="java.lang.Object"/> |
| <doc> |
| <![CDATA[Return true iff compareTo(other) retn true.]]> |
| </doc> |
| </method> |
| <method name="hashCode" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <doc> |
| <![CDATA[Proxy class for a RecordReader participating in the join framework. |
| This class keeps track of the "head" key-value pair for the |
| provided RecordReader and keeps a store of values matching a key when |
| this source is participating in a join.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.join.WrappedRecordReader --> |
| </package> |
| <package name="org.apache.hadoop.mapred.lib"> |
| <!-- start class org.apache.hadoop.mapred.lib.ChainMapper --> |
| <class name="ChainMapper" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.Mapper"/> |
| <constructor name="ChainMapper" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Constructor.]]> |
| </doc> |
| </constructor> |
| <method name="addMapper" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="klass" type="java.lang.Class"/> |
| <param name="inputKeyClass" type="java.lang.Class"/> |
| <param name="inputValueClass" type="java.lang.Class"/> |
| <param name="outputKeyClass" type="java.lang.Class"/> |
| <param name="outputValueClass" type="java.lang.Class"/> |
| <param name="byValue" type="boolean"/> |
| <param name="mapperConf" type="org.apache.hadoop.mapred.JobConf"/> |
| <doc> |
| <![CDATA[Adds a Mapper class to the chain job's JobConf. |
| <p/> |
| It has to be specified how key and values are passed from one element of |
| the chain to the next, by value or by reference. If a Mapper leverages the |
| assumed semantics that the key and values are not modified by the collector |
| 'by value' must be used. If the Mapper does not expect this semantics, as |
| an optimization to avoid serialization and deserialization 'by reference' |
| can be used. |
| <p/> |
| For the added Mapper the configuration given for it, |
| <code>mapperConf</code>, have precedence over the job's JobConf. This |
| precedence is in effect when the task is running. |
| <p/> |
| IMPORTANT: There is no need to specify the output key/value classes for the |
| ChainMapper, this is done by the addMapper for the last mapper in the chain |
| <p/> |
| |
| @param job job's JobConf to add the Mapper class. |
| @param klass the Mapper class to add. |
| @param inputKeyClass mapper input key class. |
| @param inputValueClass mapper input value class. |
| @param outputKeyClass mapper output key class. |
| @param outputValueClass mapper output value class. |
| @param byValue indicates if key/values should be passed by value |
| to the next Mapper in the chain, if any. |
| @param mapperConf a JobConf with the configuration for the Mapper |
| class. It is recommended to use a JobConf without default values using the |
| <code>JobConf(boolean loadDefaults)</code> constructor with FALSE.]]> |
| </doc> |
| </method> |
| <method name="configure" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <doc> |
| <![CDATA[Configures the ChainMapper and all the Mappers in the chain. |
| <p/> |
| If this method is overriden <code>super.configure(...)</code> should be |
| invoked at the beginning of the overwriter method.]]> |
| </doc> |
| </method> |
| <method name="map" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="java.lang.Object"/> |
| <param name="value" type="java.lang.Object"/> |
| <param name="output" type="org.apache.hadoop.mapred.OutputCollector"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Chains the <code>map(...)</code> methods of the Mappers in the chain.]]> |
| </doc> |
| </method> |
| <method name="close" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Closes the ChainMapper and all the Mappers in the chain. |
| <p/> |
| If this method is overriden <code>super.close()</code> should be |
| invoked at the end of the overwriter method.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[The ChainMapper class allows to use multiple Mapper classes within a single |
| Map task. |
| <p/> |
| The Mapper classes are invoked in a chained (or piped) fashion, the output of |
| the first becomes the input of the second, and so on until the last Mapper, |
| the output of the last Mapper will be written to the task's output. |
| <p/> |
| The key functionality of this feature is that the Mappers in the chain do not |
| need to be aware that they are executed in a chain. This enables having |
| reusable specialized Mappers that can be combined to perform composite |
| operations within a single task. |
| <p/> |
| Special care has to be taken when creating chains that the key/values output |
| by a Mapper are valid for the following Mapper in the chain. It is assumed |
| all Mappers and the Reduce in the chain use maching output and input key and |
| value classes as no conversion is done by the chaining code. |
| <p/> |
| Using the ChainMapper and the ChainReducer classes is possible to compose |
| Map/Reduce jobs that look like <code>[MAP+ / REDUCE MAP*]</code>. And |
| immediate benefit of this pattern is a dramatic reduction in disk IO. |
| <p/> |
| IMPORTANT: There is no need to specify the output key/value classes for the |
| ChainMapper, this is done by the addMapper for the last mapper in the chain. |
| <p/> |
| ChainMapper usage pattern: |
| <p/> |
| <pre> |
| ... |
| conf.setJobName("chain"); |
| conf.setInputFormat(TextInputFormat.class); |
| conf.setOutputFormat(TextOutputFormat.class); |
| <p/> |
| JobConf mapAConf = new JobConf(false); |
| ... |
| ChainMapper.addMapper(conf, AMap.class, LongWritable.class, Text.class, |
| Text.class, Text.class, true, mapAConf); |
| <p/> |
| JobConf mapBConf = new JobConf(false); |
| ... |
| ChainMapper.addMapper(conf, BMap.class, Text.class, Text.class, |
| LongWritable.class, Text.class, false, mapBConf); |
| <p/> |
| JobConf reduceConf = new JobConf(false); |
| ... |
| ChainReducer.setReducer(conf, XReduce.class, LongWritable.class, Text.class, |
| Text.class, Text.class, true, reduceConf); |
| <p/> |
| ChainReducer.addMapper(conf, CMap.class, Text.class, Text.class, |
| LongWritable.class, Text.class, false, null); |
| <p/> |
| ChainReducer.addMapper(conf, DMap.class, LongWritable.class, Text.class, |
| LongWritable.class, LongWritable.class, true, null); |
| <p/> |
| FileInputFormat.setInputPaths(conf, inDir); |
| FileOutputFormat.setOutputPath(conf, outDir); |
| ... |
| <p/> |
| JobClient jc = new JobClient(conf); |
| RunningJob job = jc.submitJob(conf); |
| ... |
| </pre>]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.ChainMapper --> |
| <!-- start class org.apache.hadoop.mapred.lib.ChainReducer --> |
| <class name="ChainReducer" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.Reducer"/> |
| <constructor name="ChainReducer" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Constructor.]]> |
| </doc> |
| </constructor> |
| <method name="setReducer" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="klass" type="java.lang.Class"/> |
| <param name="inputKeyClass" type="java.lang.Class"/> |
| <param name="inputValueClass" type="java.lang.Class"/> |
| <param name="outputKeyClass" type="java.lang.Class"/> |
| <param name="outputValueClass" type="java.lang.Class"/> |
| <param name="byValue" type="boolean"/> |
| <param name="reducerConf" type="org.apache.hadoop.mapred.JobConf"/> |
| <doc> |
| <![CDATA[Sets the Reducer class to the chain job's JobConf. |
| <p/> |
| It has to be specified how key and values are passed from one element of |
| the chain to the next, by value or by reference. If a Reducer leverages the |
| assumed semantics that the key and values are not modified by the collector |
| 'by value' must be used. If the Reducer does not expect this semantics, as |
| an optimization to avoid serialization and deserialization 'by reference' |
| can be used. |
| <p/> |
| For the added Reducer the configuration given for it, |
| <code>reducerConf</code>, have precedence over the job's JobConf. This |
| precedence is in effect when the task is running. |
| <p/> |
| IMPORTANT: There is no need to specify the output key/value classes for the |
| ChainReducer, this is done by the setReducer or the addMapper for the last |
| element in the chain. |
| |
| @param job job's JobConf to add the Reducer class. |
| @param klass the Reducer class to add. |
| @param inputKeyClass reducer input key class. |
| @param inputValueClass reducer input value class. |
| @param outputKeyClass reducer output key class. |
| @param outputValueClass reducer output value class. |
| @param byValue indicates if key/values should be passed by value |
| to the next Mapper in the chain, if any. |
| @param reducerConf a JobConf with the configuration for the Reducer |
| class. It is recommended to use a JobConf without default values using the |
| <code>JobConf(boolean loadDefaults)</code> constructor with FALSE.]]> |
| </doc> |
| </method> |
| <method name="addMapper" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="klass" type="java.lang.Class"/> |
| <param name="inputKeyClass" type="java.lang.Class"/> |
| <param name="inputValueClass" type="java.lang.Class"/> |
| <param name="outputKeyClass" type="java.lang.Class"/> |
| <param name="outputValueClass" type="java.lang.Class"/> |
| <param name="byValue" type="boolean"/> |
| <param name="mapperConf" type="org.apache.hadoop.mapred.JobConf"/> |
| <doc> |
| <![CDATA[Adds a Mapper class to the chain job's JobConf. |
| <p/> |
| It has to be specified how key and values are passed from one element of |
| the chain to the next, by value or by reference. If a Mapper leverages the |
| assumed semantics that the key and values are not modified by the collector |
| 'by value' must be used. If the Mapper does not expect this semantics, as |
| an optimization to avoid serialization and deserialization 'by reference' |
| can be used. |
| <p/> |
| For the added Mapper the configuration given for it, |
| <code>mapperConf</code>, have precedence over the job's JobConf. This |
| precedence is in effect when the task is running. |
| <p/> |
| IMPORTANT: There is no need to specify the output key/value classes for the |
| ChainMapper, this is done by the addMapper for the last mapper in the chain |
| . |
| |
| @param job chain job's JobConf to add the Mapper class. |
| @param klass the Mapper class to add. |
| @param inputKeyClass mapper input key class. |
| @param inputValueClass mapper input value class. |
| @param outputKeyClass mapper output key class. |
| @param outputValueClass mapper output value class. |
| @param byValue indicates if key/values should be passed by value |
| to the next Mapper in the chain, if any. |
| @param mapperConf a JobConf with the configuration for the Mapper |
| class. It is recommended to use a JobConf without default values using the |
| <code>JobConf(boolean loadDefaults)</code> constructor with FALSE.]]> |
| </doc> |
| </method> |
| <method name="configure" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <doc> |
| <![CDATA[Configures the ChainReducer, the Reducer and all the Mappers in the chain. |
| <p/> |
| If this method is overriden <code>super.configure(...)</code> should be |
| invoked at the beginning of the overwriter method.]]> |
| </doc> |
| </method> |
| <method name="reduce" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="java.lang.Object"/> |
| <param name="values" type="java.util.Iterator"/> |
| <param name="output" type="org.apache.hadoop.mapred.OutputCollector"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Chains the <code>reduce(...)</code> method of the Reducer with the |
| <code>map(...) </code> methods of the Mappers in the chain.]]> |
| </doc> |
| </method> |
| <method name="close" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Closes the ChainReducer, the Reducer and all the Mappers in the chain. |
| <p/> |
| If this method is overriden <code>super.close()</code> should be |
| invoked at the end of the overwriter method.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[The ChainReducer class allows to chain multiple Mapper classes after a |
| Reducer within the Reducer task. |
| <p/> |
| For each record output by the Reducer, the Mapper classes are invoked in a |
| chained (or piped) fashion, the output of the first becomes the input of the |
| second, and so on until the last Mapper, the output of the last Mapper will |
| be written to the task's output. |
| <p/> |
| The key functionality of this feature is that the Mappers in the chain do not |
| need to be aware that they are executed after the Reducer or in a chain. |
| This enables having reusable specialized Mappers that can be combined to |
| perform composite operations within a single task. |
| <p/> |
| Special care has to be taken when creating chains that the key/values output |
| by a Mapper are valid for the following Mapper in the chain. It is assumed |
| all Mappers and the Reduce in the chain use maching output and input key and |
| value classes as no conversion is done by the chaining code. |
| <p/> |
| Using the ChainMapper and the ChainReducer classes is possible to compose |
| Map/Reduce jobs that look like <code>[MAP+ / REDUCE MAP*]</code>. And |
| immediate benefit of this pattern is a dramatic reduction in disk IO. |
| <p/> |
| IMPORTANT: There is no need to specify the output key/value classes for the |
| ChainReducer, this is done by the setReducer or the addMapper for the last |
| element in the chain. |
| <p/> |
| ChainReducer usage pattern: |
| <p/> |
| <pre> |
| ... |
| conf.setJobName("chain"); |
| conf.setInputFormat(TextInputFormat.class); |
| conf.setOutputFormat(TextOutputFormat.class); |
| <p/> |
| JobConf mapAConf = new JobConf(false); |
| ... |
| ChainMapper.addMapper(conf, AMap.class, LongWritable.class, Text.class, |
| Text.class, Text.class, true, mapAConf); |
| <p/> |
| JobConf mapBConf = new JobConf(false); |
| ... |
| ChainMapper.addMapper(conf, BMap.class, Text.class, Text.class, |
| LongWritable.class, Text.class, false, mapBConf); |
| <p/> |
| JobConf reduceConf = new JobConf(false); |
| ... |
| ChainReducer.setReducer(conf, XReduce.class, LongWritable.class, Text.class, |
| Text.class, Text.class, true, reduceConf); |
| <p/> |
| ChainReducer.addMapper(conf, CMap.class, Text.class, Text.class, |
| LongWritable.class, Text.class, false, null); |
| <p/> |
| ChainReducer.addMapper(conf, DMap.class, LongWritable.class, Text.class, |
| LongWritable.class, LongWritable.class, true, null); |
| <p/> |
| FileInputFormat.setInputPaths(conf, inDir); |
| FileOutputFormat.setOutputPath(conf, outDir); |
| ... |
| <p/> |
| JobClient jc = new JobClient(conf); |
| RunningJob job = jc.submitJob(conf); |
| ... |
| </pre>]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.ChainReducer --> |
| <!-- start class org.apache.hadoop.mapred.lib.CombineFileInputFormat --> |
| <class name="CombineFileInputFormat" extends="org.apache.hadoop.mapred.FileInputFormat" |
| abstract="true" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="CombineFileInputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[default constructor]]> |
| </doc> |
| </constructor> |
| <method name="setMaxSplitSize" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="maxSplitSize" type="long"/> |
| <doc> |
| <![CDATA[Specify the maximum size (in bytes) of each split. Each split is |
| approximately equal to the specified size.]]> |
| </doc> |
| </method> |
| <method name="setMinSplitSizeNode" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="minSplitSizeNode" type="long"/> |
| <doc> |
| <![CDATA[Specify the minimum size (in bytes) of each split per node. |
| This applies to data that is left over after combining data on a single |
| node into splits that are of maximum size specified by maxSplitSize. |
| This leftover data will be combined into its own split if its size |
| exceeds minSplitSizeNode.]]> |
| </doc> |
| </method> |
| <method name="setMinSplitSizeRack" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="minSplitSizeRack" type="long"/> |
| <doc> |
| <![CDATA[Specify the minimum size (in bytes) of each split per rack. |
| This applies to data that is left over after combining data on a single |
| rack into splits that are of maximum size specified by maxSplitSize. |
| This leftover data will be combined into its own split if its size |
| exceeds minSplitSizeRack.]]> |
| </doc> |
| </method> |
| <method name="createPool" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="filters" type="java.util.List"/> |
| <doc> |
| <![CDATA[Create a new pool and add the filters to it. |
| A split cannot have files from different pools.]]> |
| </doc> |
| </method> |
| <method name="createPool" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="filters" type="org.apache.hadoop.fs.PathFilter[]"/> |
| <doc> |
| <![CDATA[Create a new pool and add the filters to it. |
| A pathname can satisfy any one of the specified filters. |
| A split cannot have files from different pools.]]> |
| </doc> |
| </method> |
| <method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="numSplits" type="int"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader" |
| abstract="true" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="split" type="org.apache.hadoop.mapred.InputSplit"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[This is not implemented yet.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[An abstract {@link org.apache.hadoop.mapred.InputFormat} that returns {@link CombineFileSplit}'s |
| in {@link org.apache.hadoop.mapred.InputFormat#getSplits(JobConf, int)} method. |
| Splits are constructed from the files under the input paths. |
| A split cannot have files from different pools. |
| Each split returned may contain blocks from different files. |
| If a maxSplitSize is specified, then blocks on the same node are |
| combined to form a single split. Blocks that are left over are |
| then combined with other blocks in the same rack. |
| If maxSplitSize is not specified, then blocks from the same rack |
| are combined in a single split; no attempt is made to create |
| node-local splits. |
| If the maxSplitSize is equal to the block size, then this class |
| is similar to the default spliting behaviour in Hadoop: each |
| block is a locally processed split. |
| Subclasses implement {@link org.apache.hadoop.mapred.InputFormat#getRecordReader(InputSplit, JobConf, Reporter)} |
| to construct <code>RecordReader</code>'s for <code>CombineFileSplit</code>'s. |
| @see CombineFileSplit]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.CombineFileInputFormat --> |
| <!-- start class org.apache.hadoop.mapred.lib.CombineFileRecordReader --> |
| <class name="CombineFileRecordReader" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.RecordReader"/> |
| <constructor name="CombineFileRecordReader" type="org.apache.hadoop.mapred.JobConf, org.apache.hadoop.mapred.lib.CombineFileSplit, org.apache.hadoop.mapred.Reporter, java.lang.Class" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[A generic RecordReader that can hand out different recordReaders |
| for each chunk in the CombineFileSplit.]]> |
| </doc> |
| </constructor> |
| <method name="next" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="java.lang.Object"/> |
| <param name="value" type="java.lang.Object"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="createKey" return="java.lang.Object" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="createValue" return="java.lang.Object" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getPos" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[return the amount of data processed]]> |
| </doc> |
| </method> |
| <method name="close" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getProgress" return="float" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[return progress based on the amount of data processed so far.]]> |
| </doc> |
| </method> |
| <method name="initNextRecordReader" return="boolean" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Get the record reader for the next chunk in this CombineFileSplit.]]> |
| </doc> |
| </method> |
| <field name="split" type="org.apache.hadoop.mapred.lib.CombineFileSplit" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <field name="jc" type="org.apache.hadoop.mapred.JobConf" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <field name="reporter" type="org.apache.hadoop.mapred.Reporter" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <field name="rrClass" type="java.lang.Class" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <field name="rrConstructor" type="java.lang.reflect.Constructor" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <field name="fs" type="org.apache.hadoop.fs.FileSystem" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <field name="idx" type="int" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <field name="progress" type="long" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <field name="curReader" type="org.apache.hadoop.mapred.RecordReader" |
| transient="false" volatile="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[A generic RecordReader that can hand out different recordReaders |
| for each chunk in a {@link CombineFileSplit}. |
| A CombineFileSplit can combine data chunks from multiple files. |
| This class allows using different RecordReaders for processing |
| these data chunks from different files. |
| @see CombineFileSplit]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.CombineFileRecordReader --> |
| <!-- start class org.apache.hadoop.mapred.lib.CombineFileSplit --> |
| <class name="CombineFileSplit" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.InputSplit"/> |
| <constructor name="CombineFileSplit" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[default constructor]]> |
| </doc> |
| </constructor> |
| <constructor name="CombineFileSplit" type="org.apache.hadoop.mapred.JobConf, org.apache.hadoop.fs.Path[], long[], long[], java.lang.String[]" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <constructor name="CombineFileSplit" type="org.apache.hadoop.mapred.JobConf, org.apache.hadoop.fs.Path[], long[]" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <constructor name="CombineFileSplit" type="org.apache.hadoop.mapred.lib.CombineFileSplit" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Copy constructor]]> |
| </doc> |
| </constructor> |
| <method name="getJob" return="org.apache.hadoop.mapred.JobConf" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getLength" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="getStartOffsets" return="long[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns an array containing the startoffsets of the files in the split]]> |
| </doc> |
| </method> |
| <method name="getLengths" return="long[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns an array containing the lengths of the files in the split]]> |
| </doc> |
| </method> |
| <method name="getOffset" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="i" type="int"/> |
| <doc> |
| <![CDATA[Returns the start offset of the i<sup>th</sup> Path]]> |
| </doc> |
| </method> |
| <method name="getLength" return="long" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="i" type="int"/> |
| <doc> |
| <![CDATA[Returns the length of the i<sup>th</sup> Path]]> |
| </doc> |
| </method> |
| <method name="getNumPaths" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns the number of Paths in the split]]> |
| </doc> |
| </method> |
| <method name="getPath" return="org.apache.hadoop.fs.Path" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="i" type="int"/> |
| <doc> |
| <![CDATA[Returns the i<sup>th</sup> Path]]> |
| </doc> |
| </method> |
| <method name="getPaths" return="org.apache.hadoop.fs.Path[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Returns all the Paths in the split]]> |
| </doc> |
| </method> |
| <method name="getLocations" return="java.lang.String[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Returns all the Paths where this input-split resides]]> |
| </doc> |
| </method> |
| <method name="readFields" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="in" type="java.io.DataInput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="write" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="out" type="java.io.DataOutput"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="toString" return="java.lang.String" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <doc> |
| <![CDATA[A sub-collection of input files. Unlike {@link org.apache.hadoop.mapred.FileSplit}, |
| CombineFileSplit * class does not represent a split of a file, but a split of input files |
| into smaller sets. A split may contain blocks from different file but all |
| the blocks in the same split are probably local to some rack <br> |
| CombineFileSplit can be used to implement {@link org.apache.hadoop.mapred.RecordReader}'s, |
| with reading one record per file. |
| @see org.apache.hadoop.mapred.FileSplit |
| @see CombineFileInputFormat]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.CombineFileSplit --> |
| <!-- start class org.apache.hadoop.mapred.lib.DelegatingInputFormat --> |
| <class name="DelegatingInputFormat" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.InputFormat"/> |
| <constructor name="DelegatingInputFormat" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="numSplits" type="int"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="split" type="org.apache.hadoop.mapred.InputSplit"/> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[An {@link InputFormat} that delegates behaviour of paths to multiple other |
| InputFormats. |
| |
| @see MultipleInputs#addInputPath(JobConf, Path, Class, Class)]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.DelegatingInputFormat --> |
| <!-- start class org.apache.hadoop.mapred.lib.DelegatingMapper --> |
| <class name="DelegatingMapper" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.Mapper"/> |
| <constructor name="DelegatingMapper" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="map" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="java.lang.Object"/> |
| <param name="value" type="java.lang.Object"/> |
| <param name="outputCollector" type="org.apache.hadoop.mapred.OutputCollector"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="configure" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> |
| </method> |
| <method name="close" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[An {@link Mapper} that delegates behaviour of paths to multiple other |
| mappers. |
| |
| @see MultipleInputs#addInputPath(JobConf, Path, Class, Class)]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.DelegatingMapper --> |
| <!-- start class org.apache.hadoop.mapred.lib.FieldSelectionMapReduce --> |
| <class name="FieldSelectionMapReduce" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.Mapper"/> |
| <implements name="org.apache.hadoop.mapred.Reducer"/> |
| <constructor name="FieldSelectionMapReduce" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="map" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="java.lang.Object"/> |
| <param name="val" type="java.lang.Object"/> |
| <param name="output" type="org.apache.hadoop.mapred.OutputCollector"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[The identify function. Input key/value pair is written directly to output.]]> |
| </doc> |
| </method> |
| <method name="configure" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| </method> |
| <method name="close" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <method name="reduce" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="org.apache.hadoop.io.Text"/> |
| <param name="values" type="java.util.Iterator"/> |
| <param name="output" type="org.apache.hadoop.mapred.OutputCollector"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <field name="LOG" type="org.apache.commons.logging.Log" |
| transient="false" volatile="false" |
| static="true" final="true" visibility="public" |
| deprecated="not deprecated"> |
| </field> |
| <doc> |
| <![CDATA[This class implements a mapper/reducer class that can be used to perform |
| field selections in a manner similar to unix cut. The input data is treated |
| as fields separated by a user specified separator (the default value is |
| "\t"). The user can specify a list of fields that form the map output keys, |
| and a list of fields that form the map output values. If the inputformat is |
| TextInputFormat, the mapper will ignore the key to the map function. and the |
| fields are from the value only. Otherwise, the fields are the union of those |
| from the key and those from the value. |
| |
| The field separator is under attribute "mapred.data.field.separator" |
| |
| The map output field list spec is under attribute "map.output.key.value.fields.spec". |
| The value is expected to be like "keyFieldsSpec:valueFieldsSpec" |
| key/valueFieldsSpec are comma (,) separated field spec: fieldSpec,fieldSpec,fieldSpec ... |
| Each field spec can be a simple number (e.g. 5) specifying a specific field, or a range |
| (like 2-5) to specify a range of fields, or an open range (like 3-) specifying all |
| the fields starting from field 3. The open range field spec applies value fields only. |
| They have no effect on the key fields. |
| |
| Here is an example: "4,3,0,1:6,5,1-3,7-". It specifies to use fields 4,3,0 and 1 for keys, |
| and use fields 6,5,1,2,3,7 and above for values. |
| |
| The reduce output field list spec is under attribute "reduce.output.key.value.fields.spec". |
| |
| The reducer extracts output key/value pairs in a similar manner, except that |
| the key is never ignored.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.FieldSelectionMapReduce --> |
| <!-- start class org.apache.hadoop.mapred.lib.HashPartitioner --> |
| <class name="HashPartitioner" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="Use |
| {@link org.apache.hadoop.mapreduce.lib.partition.HashPartitioner} instead."> |
| <implements name="org.apache.hadoop.mapred.Partitioner"/> |
| <constructor name="HashPartitioner" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="configure" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| </method> |
| <method name="getPartition" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="java.lang.Object"/> |
| <param name="value" type="java.lang.Object"/> |
| <param name="numReduceTasks" type="int"/> |
| <doc> |
| <![CDATA[Use {@link Object#hashCode()} to partition.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Partition keys by their {@link Object#hashCode()}. |
| @deprecated Use |
| {@link org.apache.hadoop.mapreduce.lib.partition.HashPartitioner} instead.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.HashPartitioner --> |
| <!-- start class org.apache.hadoop.mapred.lib.IdentityMapper --> |
| <class name="IdentityMapper" extends="org.apache.hadoop.mapred.MapReduceBase" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="Use {@link org.apache.hadoop.mapreduce.Mapper} instead."> |
| <implements name="org.apache.hadoop.mapred.Mapper"/> |
| <constructor name="IdentityMapper" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="map" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="java.lang.Object"/> |
| <param name="val" type="java.lang.Object"/> |
| <param name="output" type="org.apache.hadoop.mapred.OutputCollector"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[The identify function. Input key/value pair is written directly to |
| output.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Implements the identity function, mapping inputs directly to outputs. |
| @deprecated Use {@link org.apache.hadoop.mapreduce.Mapper} instead.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.IdentityMapper --> |
| <!-- start class org.apache.hadoop.mapred.lib.IdentityReducer --> |
| <class name="IdentityReducer" extends="org.apache.hadoop.mapred.MapReduceBase" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="Use {@link org.apache.hadoop.mapreduce.Reducer} instead."> |
| <implements name="org.apache.hadoop.mapred.Reducer"/> |
| <constructor name="IdentityReducer" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="reduce" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="java.lang.Object"/> |
| <param name="values" type="java.util.Iterator"/> |
| <param name="output" type="org.apache.hadoop.mapred.OutputCollector"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Writes all keys and values directly to output.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Performs no reduction, writing all input values directly to the output. |
| @deprecated Use {@link org.apache.hadoop.mapreduce.Reducer} instead.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.IdentityReducer --> |
| <!-- start class org.apache.hadoop.mapred.lib.InputSampler --> |
| <class name="InputSampler" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.util.Tool"/> |
| <constructor name="InputSampler" type="org.apache.hadoop.mapred.JobConf" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="getConf" return="org.apache.hadoop.conf.Configuration" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </method> |
| <method name="setConf" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="conf" type="org.apache.hadoop.conf.Configuration"/> |
| </method> |
| <method name="writePartitionFile" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <param name="sampler" type="org.apache.hadoop.mapred.lib.InputSampler.Sampler"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Write a partition file for the given job, using the Sampler provided. |
| Queries the sampler for a sample keyset, sorts by the output key |
| comparator, selects the keys for each rank, and writes to the destination |
| returned from {@link |
| org.apache.hadoop.mapred.lib.TotalOrderPartitioner#getPartitionFile}.]]> |
| </doc> |
| </method> |
| <method name="run" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="args" type="java.lang.String[]"/> |
| <exception name="Exception" type="java.lang.Exception"/> |
| <doc> |
| <![CDATA[Driver for InputSampler from the command line. |
| Configures a JobConf instance and calls {@link #writePartitionFile}.]]> |
| </doc> |
| </method> |
| <method name="main" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="args" type="java.lang.String[]"/> |
| <exception name="Exception" type="java.lang.Exception"/> |
| </method> |
| <doc> |
| <![CDATA[Utility for collecting samples and writing a partition file for |
| {@link org.apache.hadoop.mapred.lib.TotalOrderPartitioner}.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.InputSampler --> |
| <!-- start class org.apache.hadoop.mapred.lib.InputSampler.IntervalSampler --> |
| <class name="InputSampler.IntervalSampler" extends="java.lang.Object" |
| abstract="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.lib.InputSampler.Sampler"/> |
| <constructor name="InputSampler.IntervalSampler" type="double" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Create a new IntervalSampler sampling <em>all</em> splits. |
| @param freq The frequency with which records will be emitted.]]> |
| </doc> |
| </constructor> |
| <constructor name="InputSampler.IntervalSampler" type="double, int" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Create a new IntervalSampler. |
| @param freq The frequency with which records will be emitted. |
| @param maxSplitsSampled The maximum number of splits to examine. |
| @see #getSample]]> |
| </doc> |
| </constructor> |
| <method name="getSample" return="java.lang.Object[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="inf" type="org.apache.hadoop.mapred.InputFormat"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[For each split sampled, emit when the ratio of the number of records |
| retained to the total record count is less than the specified |
| frequency.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Sample from s splits at regular intervals. |
| Useful for sorted data.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.InputSampler.IntervalSampler --> |
| <!-- start class org.apache.hadoop.mapred.lib.InputSampler.RandomSampler --> |
| <class name="InputSampler.RandomSampler" extends="java.lang.Object" |
| abstract="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.lib.InputSampler.Sampler"/> |
| <constructor name="InputSampler.RandomSampler" type="double, int" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Create a new RandomSampler sampling <em>all</em> splits. |
| This will read every split at the client, which is very expensive. |
| @param freq Probability with which a key will be chosen. |
| @param numSamples Total number of samples to obtain from all selected |
| splits.]]> |
| </doc> |
| </constructor> |
| <constructor name="InputSampler.RandomSampler" type="double, int, int" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Create a new RandomSampler. |
| @param freq Probability with which a key will be chosen. |
| @param numSamples Total number of samples to obtain from all selected |
| splits. |
| @param maxSplitsSampled The maximum number of splits to examine.]]> |
| </doc> |
| </constructor> |
| <method name="getSample" return="java.lang.Object[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="inf" type="org.apache.hadoop.mapred.InputFormat"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[Randomize the split order, then take the specified number of keys from |
| each split sampled, where each key is selected with the specified |
| probability and possibly replaced by a subsequently selected key when |
| the quota of keys from that split is satisfied.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Sample from random points in the input. |
| General-purpose sampler. Takes numSamples / maxSplitsSampled inputs from |
| each split.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.InputSampler.RandomSampler --> |
| <!-- start interface org.apache.hadoop.mapred.lib.InputSampler.Sampler --> |
| <interface name="InputSampler.Sampler" abstract="true" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <method name="getSample" return="java.lang.Object[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="inf" type="org.apache.hadoop.mapred.InputFormat"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[For a given job, collect and return a subset of the keys from the |
| input data.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Interface to sample using an {@link org.apache.hadoop.mapred.InputFormat}.]]> |
| </doc> |
| </interface> |
| <!-- end interface org.apache.hadoop.mapred.lib.InputSampler.Sampler --> |
| <!-- start class org.apache.hadoop.mapred.lib.InputSampler.SplitSampler --> |
| <class name="InputSampler.SplitSampler" extends="java.lang.Object" |
| abstract="false" |
| static="true" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.lib.InputSampler.Sampler"/> |
| <constructor name="InputSampler.SplitSampler" type="int" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Create a SplitSampler sampling <em>all</em> splits. |
| Takes the first numSamples / numSplits records from each split. |
| @param numSamples Total number of samples to obtain from all selected |
| splits.]]> |
| </doc> |
| </constructor> |
| <constructor name="InputSampler.SplitSampler" type="int, int" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <doc> |
| <![CDATA[Create a new SplitSampler. |
| @param numSamples Total number of samples to obtain from all selected |
| splits. |
| @param maxSplitsSampled The maximum number of splits to examine.]]> |
| </doc> |
| </constructor> |
| <method name="getSample" return="java.lang.Object[]" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="inf" type="org.apache.hadoop.mapred.InputFormat"/> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[From each split sampled, take the first numSamples / numSplits records.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[Samples the first n records from s splits. |
| Inexpensive way to sample random data.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.InputSampler.SplitSampler --> |
| <!-- start class org.apache.hadoop.mapred.lib.InverseMapper --> |
| <class name="InverseMapper" extends="org.apache.hadoop.mapred.MapReduceBase" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="Use {@link org.apache.hadoop.mapreduce.lib.map.InverseMapper} |
| instead."> |
| <implements name="org.apache.hadoop.mapred.Mapper"/> |
| <constructor name="InverseMapper" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="map" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="java.lang.Object"/> |
| <param name="value" type="java.lang.Object"/> |
| <param name="output" type="org.apache.hadoop.mapred.OutputCollector"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| <doc> |
| <![CDATA[The inverse function. Input keys and values are swapped.]]> |
| </doc> |
| </method> |
| <doc> |
| <![CDATA[A {@link Mapper} that swaps keys and values. |
| @deprecated Use {@link org.apache.hadoop.mapreduce.lib.map.InverseMapper} |
| instead.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.InverseMapper --> |
| <!-- start class org.apache.hadoop.mapred.lib.KeyFieldBasedComparator --> |
| <class name="KeyFieldBasedComparator" extends="org.apache.hadoop.io.WritableComparator" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.JobConfigurable"/> |
| <constructor name="KeyFieldBasedComparator" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="configure" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| </method> |
| <method name="compare" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="b1" type="byte[]"/> |
| <param name="s1" type="int"/> |
| <param name="l1" type="int"/> |
| <param name="b2" type="byte[]"/> |
| <param name="s2" type="int"/> |
| <param name="l2" type="int"/> |
| </method> |
| <doc> |
| <![CDATA[This comparator implementation provides a subset of the features provided |
| by the Unix/GNU Sort. In particular, the supported features are: |
| -n, (Sort numerically) |
| -r, (Reverse the result of comparison) |
| -k pos1[,pos2], where pos is of the form f[.c][opts], where f is the number |
| of the field to use, and c is the number of the first character from the |
| beginning of the field. Fields and character posns are numbered starting |
| with 1; a character position of zero in pos2 indicates the field's last |
| character. If '.c' is omitted from pos1, it defaults to 1 (the beginning |
| of the field); if omitted from pos2, it defaults to 0 (the end of the |
| field). opts are ordering options (any of 'nr' as described above). |
| We assume that the fields in the key are separated by |
| map.output.key.field.separator.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.KeyFieldBasedComparator --> |
| <!-- start class org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner --> |
| <class name="KeyFieldBasedPartitioner" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <implements name="org.apache.hadoop.mapred.Partitioner"/> |
| <constructor name="KeyFieldBasedPartitioner" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="configure" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="job" type="org.apache.hadoop.mapred.JobConf"/> |
| </method> |
| <method name="getPartition" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="java.lang.Object"/> |
| <param name="value" type="java.lang.Object"/> |
| <param name="numReduceTasks" type="int"/> |
| </method> |
| <method name="hashCode" return="int" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="protected" |
| deprecated="not deprecated"> |
| <param name="b" type="byte[]"/> |
| <param name="start" type="int"/> |
| <param name="end" type="int"/> |
| <param name="currentHash" type="int"/> |
| </method> |
| <doc> |
| <![CDATA[Defines a way to partition keys based on certain key fields (also see |
| {@link KeyFieldBasedComparator}. |
| The key specification supported is of the form -k pos1[,pos2], where, |
| pos is of the form f[.c][opts], where f is the number |
| of the key field to use, and c is the number of the first character from |
| the beginning of the field. Fields and character posns are numbered |
| starting with 1; a character position of zero in pos2 indicates the |
| field's last character. If '.c' is omitted from pos1, it defaults to 1 |
| (the beginning of the field); if omitted from pos2, it defaults to 0 |
| (the end of the field).]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner --> |
| <!-- start class org.apache.hadoop.mapred.lib.LongSumReducer --> |
| <class name="LongSumReducer" extends="org.apache.hadoop.mapred.MapReduceBase" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="Use {@link org.apache.hadoop.mapreduce.lib.reduce.LongSumReducer} |
| instead."> |
| <implements name="org.apache.hadoop.mapred.Reducer"/> |
| <constructor name="LongSumReducer" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="reduce" |
| abstract="false" native="false" synchronized="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <param name="key" type="java.lang.Object"/> |
| <param name="values" type="java.util.Iterator"/> |
| <param name="output" type="org.apache.hadoop.mapred.OutputCollector"/> |
| <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> |
| <exception name="IOException" type="java.io.IOException"/> |
| </method> |
| <doc> |
| <![CDATA[A {@link Reducer} that sums long values. |
| @deprecated Use {@link org.apache.hadoop.mapreduce.lib.reduce.LongSumReducer} |
| instead.]]> |
| </doc> |
| </class> |
| <!-- end class org.apache.hadoop.mapred.lib.LongSumReducer --> |
| <!-- start class org.apache.hadoop.mapred.lib.MultipleInputs --> |
| <class name="MultipleInputs" extends="java.lang.Object" |
| abstract="false" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| <constructor name="MultipleInputs" |
| static="false" final="false" visibility="public" |
| deprecated="not deprecated"> |
| </constructor> |
| <method name="addInputPath" |
| abstract="false" native="false" synchronized="false" |
| static="true" final="false" visibility="public" |
| |