| <?xml version="1.0"?> |
| <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> |
| |
| <!-- |
| Licensed to the Apache Software Foundation (ASF) under one or more |
| contributor license agreements. See the NOTICE file distributed with |
| this work for additional information regarding copyright ownership. |
| The ASF licenses this file to You under the Apache License, Version 2.0 |
| (the "License"); you may not use this file except in compliance with |
| the License. You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| --> |
| |
| <!-- Put site-specific property overrides in this file. --> |
| |
| <configuration xmlns:xi="http://www.w3.org/2001/XInclude"> |
| |
| <!-- i/o properties --> |
| |
| <property> |
| <name>io.sort.mb</name> |
| <value></value> |
| <description>No description</description> |
| </property> |
| |
| <property> |
| <name>io.sort.record.percent</name> |
| <value>.2</value> |
| <description>No description</description> |
| </property> |
| |
| <property> |
| <name>io.sort.spill.percent</name> |
| <value></value> |
| <description>No description</description> |
| </property> |
| |
| <property> |
| <name>io.sort.factor</name> |
| <value>100</value> |
| <description>No description</description> |
| </property> |
| |
| <!-- map/reduce properties --> |
| |
| <property> |
| <name>mapred.tasktracker.tasks.sleeptime-before-sigkill</name> |
| <value>250</value> |
| <description>Normally, this is the amount of time before killing |
| processes, and the recommended-default is 5.000 seconds - a value of |
| 5000 here. In this case, we are using it solely to blast tasks before |
| killing them, and killing them very quickly (1/4 second) to guarantee |
| that we do not leave VMs around for later jobs. |
| </description> |
| </property> |
| |
| <property> |
| <name>mapred.job.tracker.handler.count</name> |
| <value>50</value> |
| <description> |
| The number of server threads for the JobTracker. This should be roughly |
| 4% of the number of tasktracker nodes. |
| </description> |
| </property> |
| |
| <property> |
| <name>mapred.system.dir</name> |
| <value>/mapred/system</value> |
| <description>No description</description> |
| <final>true</final> |
| </property> |
| |
| <property> |
| <name>mapred.job.tracker</name> |
| <!-- cluster variant --> |
| <value></value> |
| <description>No description</description> |
| <final>true</final> |
| </property> |
| |
| <property> |
| <name>mapred.job.tracker.http.address</name> |
| <!-- cluster variant --> |
| <value></value> |
| <description>No description</description> |
| <final>true</final> |
| </property> |
| |
| <property> |
| <!-- cluster specific --> |
| <name>mapred.local.dir</name> |
| <value></value> |
| <description>No description</description> |
| <final>true</final> |
| </property> |
| |
| <property> |
| <name>mapreduce.cluster.administrators</name> |
| <value> hadoop</value> |
| </property> |
| |
| <property> |
| <name>mapred.reduce.parallel.copies</name> |
| <value>30</value> |
| <description>No description</description> |
| </property> |
| |
| <property> |
| <name>mapred.tasktracker.map.tasks.maximum</name> |
| <value></value> |
| <description>No description</description> |
| </property> |
| |
| <property> |
| <name>mapred.tasktracker.reduce.tasks.maximum</name> |
| <value></value> |
| <description>No description</description> |
| </property> |
| |
| <property> |
| <name>tasktracker.http.threads</name> |
| <value>50</value> |
| </property> |
| |
| <property> |
| <name>mapred.map.tasks.speculative.execution</name> |
| <value>false</value> |
| <description>If true, then multiple instances of some map tasks |
| may be executed in parallel.</description> |
| </property> |
| |
| <property> |
| <name>mapred.reduce.tasks.speculative.execution</name> |
| <value>false</value> |
| <description>If true, then multiple instances of some reduce tasks |
| may be executed in parallel.</description> |
| </property> |
| |
| <property> |
| <name>mapred.reduce.slowstart.completed.maps</name> |
| <value>0.05</value> |
| </property> |
| |
| <property> |
| <name>mapred.inmem.merge.threshold</name> |
| <value>1000</value> |
| <description>The threshold, in terms of the number of files |
| for the in-memory merge process. When we accumulate threshold number of files |
| we initiate the in-memory merge and spill to disk. A value of 0 or less than |
| 0 indicates we want to DON'T have any threshold and instead depend only on |
| the ramfs's memory consumption to trigger the merge. |
| </description> |
| </property> |
| |
| <property> |
| <name>mapred.job.shuffle.merge.percent</name> |
| <value>0.66</value> |
| <description>The usage threshold at which an in-memory merge will be |
| initiated, expressed as a percentage of the total memory allocated to |
| storing in-memory map outputs, as defined by |
| mapred.job.shuffle.input.buffer.percent. |
| </description> |
| </property> |
| |
| <property> |
| <name>mapred.job.shuffle.input.buffer.percent</name> |
| <value>0.7</value> |
| <description>The percentage of memory to be allocated from the maximum heap |
| size to storing map outputs during the shuffle. |
| </description> |
| </property> |
| |
| <property> |
| <name>mapred.map.output.compression.codec</name> |
| <value></value> |
| <description>If the map outputs are compressed, how should they be |
| compressed |
| </description> |
| </property> |
| |
| <property> |
| <name>mapred.output.compression.type</name> |
| <value>BLOCK</value> |
| <description>If the job outputs are to compressed as SequenceFiles, how should |
| they be compressed? Should be one of NONE, RECORD or BLOCK. |
| </description> |
| </property> |
| |
| |
| <property> |
| <name>mapred.jobtracker.completeuserjobs.maximum</name> |
| <value>0</value> |
| </property> |
| |
| <property> |
| <name>mapred.jobtracker.taskScheduler</name> |
| <value></value> |
| </property> |
| |
| <property> |
| <name>mapred.jobtracker.restart.recover</name> |
| <value>false</value> |
| <description>"true" to enable (job) recovery upon restart, |
| "false" to start afresh |
| </description> |
| </property> |
| |
| <property> |
| <name>mapred.job.reduce.input.buffer.percent</name> |
| <value>0.0</value> |
| <description>The percentage of memory- relative to the maximum heap size- to |
| retain map outputs during the reduce. When the shuffle is concluded, any |
| remaining map outputs in memory must consume less than this threshold before |
| the reduce can begin. |
| </description> |
| </property> |
| |
| <property> |
| <name>mapreduce.reduce.input.limit</name> |
| <value>10737418240</value> |
| <description>The limit on the input size of the reduce. (This value |
| is 10 Gb.) If the estimated input size of the reduce is greater than |
| this value, job is failed. A value of -1 means that there is no limit |
| set. </description> |
| </property> |
| |
| |
| <!-- copied from kryptonite configuration --> |
| <property> |
| <name>mapred.compress.map.output</name> |
| <value></value> |
| </property> |
| |
| |
| <property> |
| <name>mapred.task.timeout</name> |
| <value>600000</value> |
| <description>The number of milliseconds before a task will be |
| terminated if it neither reads an input, writes an output, nor |
| updates its status string. |
| </description> |
| </property> |
| |
| <property> |
| <name>jetty.connector</name> |
| <value>org.mortbay.jetty.nio.SelectChannelConnector</value> |
| <description>No description</description> |
| </property> |
| |
| <property> |
| <name>mapred.task.tracker.task-controller</name> |
| <value></value> |
| <description> |
| TaskController which is used to launch and manage task execution. |
| </description> |
| </property> |
| |
| <property> |
| <name>mapred.child.root.logger</name> |
| <value>INFO,TLA</value> |
| </property> |
| |
| <property> |
| <name>mapred.child.java.opts</name> |
| <value></value> |
| |
| <description>No description</description> |
| </property> |
| |
| <property> |
| <name>mapred.cluster.map.memory.mb</name> |
| <value></value> |
| </property> |
| |
| <property> |
| <name>mapred.cluster.reduce.memory.mb</name> |
| <value></value> |
| </property> |
| |
| <property> |
| <name>mapred.job.map.memory.mb</name> |
| <value></value> |
| </property> |
| |
| <property> |
| <name>mapred.job.reduce.memory.mb</name> |
| <value></value> |
| </property> |
| |
| <property> |
| <name>mapred.cluster.max.map.memory.mb</name> |
| <value></value> |
| </property> |
| |
| <property> |
| <name>mapred.cluster.max.reduce.memory.mb</name> |
| <value></value> |
| </property> |
| |
| <property> |
| <name>mapred.hosts</name> |
| <value></value> |
| </property> |
| |
| <property> |
| <name>mapred.hosts.exclude</name> |
| <value></value> |
| </property> |
| |
| <property> |
| <name>mapred.max.tracker.blacklists</name> |
| <value>16</value> |
| <description> |
| if node is reported blacklisted by 16 successful jobs within timeout-window, it will be graylisted |
| </description> |
| </property> |
| |
| <property> |
| <name>mapred.healthChecker.script.path</name> |
| <value></value> |
| </property> |
| |
| <property> |
| <name>mapred.healthChecker.interval</name> |
| <value>135000</value> |
| </property> |
| |
| <property> |
| <name>mapred.healthChecker.script.timeout</name> |
| <value>60000</value> |
| </property> |
| |
| <property> |
| <name>mapred.job.tracker.persist.jobstatus.active</name> |
| <value>false</value> |
| <description>Indicates if persistency of job status information is |
| active or not. |
| </description> |
| </property> |
| |
| <property> |
| <name>mapred.job.tracker.persist.jobstatus.hours</name> |
| <value>1</value> |
| <description>The number of hours job status information is persisted in DFS. |
| The job status information will be available after it drops of the memory |
| queue and between jobtracker restarts. With a zero value the job status |
| information is not persisted at all in DFS. |
| </description> |
| </property> |
| |
| <property> |
| <name>mapred.job.tracker.persist.jobstatus.dir</name> |
| <value></value> |
| <description>The directory where the job status information is persisted |
| in a file system to be available after it drops of the memory queue and |
| between jobtracker restarts. |
| </description> |
| </property> |
| |
| <property> |
| <name>mapred.jobtracker.retirejob.check</name> |
| <value>10000</value> |
| </property> |
| |
| <property> |
| <name>mapred.jobtracker.retirejob.interval</name> |
| <value>0</value> |
| </property> |
| |
| <property> |
| <name>mapred.job.tracker.history.completed.location</name> |
| <value>/mapred/history/done</value> |
| <description>No description</description> |
| </property> |
| |
| <property> |
| <name>mapred.task.maxvmem</name> |
| <value></value> |
| <final>true</final> |
| <description>No description</description> |
| </property> |
| |
| <property> |
| <name>mapred.jobtracker.maxtasks.per.job</name> |
| <value></value> |
| <final>true</final> |
| <description>The maximum number of tasks for a single job. |
| A value of -1 indicates that there is no maximum. </description> |
| </property> |
| |
| <property> |
| <name>mapreduce.fileoutputcommitter.marksuccessfuljobs</name> |
| <value>false</value> |
| </property> |
| |
| <property> |
| <name>mapred.userlog.retain.hours</name> |
| <value></value> |
| </property> |
| |
| <property> |
| <name>mapred.job.reuse.jvm.num.tasks</name> |
| <value>1</value> |
| <description> |
| How many tasks to run per jvm. If set to -1, there is no limit |
| </description> |
| <final>true</final> |
| </property> |
| |
| <property> |
| <name>mapreduce.jobtracker.kerberos.principal</name> |
| <value></value> |
| <description> |
| JT user name key. |
| </description> |
| </property> |
| |
| <property> |
| <name>mapreduce.tasktracker.kerberos.principal</name> |
| <value></value> |
| <description> |
| tt user name key. "_HOST" is replaced by the host name of the task tracker. |
| </description> |
| </property> |
| |
| |
| <property> |
| <name>hadoop.job.history.user.location</name> |
| <value>none</value> |
| <final>true</final> |
| </property> |
| |
| |
| <property> |
| <name>mapreduce.jobtracker.keytab.file</name> |
| <value></value> |
| <description> |
| The keytab for the jobtracker principal. |
| </description> |
| |
| </property> |
| |
| <property> |
| <name>mapreduce.tasktracker.keytab.file</name> |
| <value></value> |
| <description>The filename of the keytab for the task tracker</description> |
| </property> |
| |
| <property> |
| <name>mapreduce.jobtracker.staging.root.dir</name> |
| <value>/user</value> |
| <description>The Path prefix for where the staging directories should be placed. The next level is always the user's |
| name. It is a path in the default file system.</description> |
| </property> |
| |
| <property> |
| <name>mapreduce.tasktracker.group</name> |
| <value>hadoop</value> |
| <description>The group that the task controller uses for accessing the task controller. The mapred user must be a member and users should *not* be members.</description> |
| |
| </property> |
| |
| <property> |
| <name>mapreduce.jobtracker.split.metainfo.maxsize</name> |
| <value>50000000</value> |
| <final>true</final> |
| <description>If the size of the split metainfo file is larger than this, the JobTracker will fail the job during |
| initialize. |
| </description> |
| </property> |
| <property> |
| <name>mapreduce.history.server.embedded</name> |
| <value>false</value> |
| <description>Should job history server be embedded within Job tracker |
| process</description> |
| <final>true</final> |
| </property> |
| |
| <property> |
| <name>mapreduce.history.server.http.address</name> |
| <!-- cluster variant --> |
| <value></value> |
| <description>Http address of the history server</description> |
| <final>true</final> |
| </property> |
| |
| <property> |
| <name>mapreduce.jobhistory.kerberos.principal</name> |
| <!-- cluster variant --> |
| <value></value> |
| <description>Job history user name key. (must map to same user as JT |
| user)</description> |
| </property> |
| |
| <property> |
| <name>mapreduce.jobhistory.keytab.file</name> |
| <!-- cluster variant --> |
| <value></value> |
| <description>The keytab for the job history server principal.</description> |
| </property> |
| |
| <property> |
| <name>mapred.jobtracker.blacklist.fault-timeout-window</name> |
| <value>180</value> |
| <description> |
| 3-hour sliding window (value is in minutes) |
| </description> |
| </property> |
| |
| <property> |
| <name>mapred.jobtracker.blacklist.fault-bucket-width</name> |
| <value>15</value> |
| <description> |
| 15-minute bucket size (value is in minutes) |
| </description> |
| </property> |
| |
| <property> |
| <name>mapred.queue.names</name> |
| <value>default</value> |
| <description> Comma separated list of queues configured for this jobtracker.</description> |
| </property> |
| |
| </configuration> |