blob: 11a72b1446ab7ebd507229a4d3a450d871c3bede [file] [log] [blame]
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration xmlns:xi="http://www.w3.org/2001/XInclude">
<!-- i/o properties -->
<property>
<name>io.sort.mb</name>
<value></value>
<description>No description</description>
</property>
<property>
<name>io.sort.record.percent</name>
<value>.2</value>
<description>No description</description>
</property>
<property>
<name>io.sort.spill.percent</name>
<value></value>
<description>No description</description>
</property>
<property>
<name>io.sort.factor</name>
<value>100</value>
<description>No description</description>
</property>
<!-- map/reduce properties -->
<property>
<name>mapred.tasktracker.tasks.sleeptime-before-sigkill</name>
<value>250</value>
<description>Normally, this is the amount of time before killing
processes, and the recommended-default is 5.000 seconds - a value of
5000 here. In this case, we are using it solely to blast tasks before
killing them, and killing them very quickly (1/4 second) to guarantee
that we do not leave VMs around for later jobs.
</description>
</property>
<property>
<name>mapred.job.tracker.handler.count</name>
<value>50</value>
<description>
The number of server threads for the JobTracker. This should be roughly
4% of the number of tasktracker nodes.
</description>
</property>
<property>
<name>mapred.system.dir</name>
<value>/mapred/system</value>
<description>No description</description>
<final>true</final>
</property>
<property>
<name>mapred.job.tracker</name>
<!-- cluster variant -->
<value></value>
<description>No description</description>
<final>true</final>
</property>
<property>
<name>mapred.job.tracker.http.address</name>
<!-- cluster variant -->
<value></value>
<description>No description</description>
<final>true</final>
</property>
<property>
<!-- cluster specific -->
<name>mapred.local.dir</name>
<value></value>
<description>No description</description>
<final>true</final>
</property>
<property>
<name>mapreduce.cluster.administrators</name>
<value> hadoop</value>
</property>
<property>
<name>mapred.reduce.parallel.copies</name>
<value>30</value>
<description>No description</description>
</property>
<property>
<name>mapred.tasktracker.map.tasks.maximum</name>
<value></value>
<description>No description</description>
</property>
<property>
<name>mapred.tasktracker.reduce.tasks.maximum</name>
<value></value>
<description>No description</description>
</property>
<property>
<name>tasktracker.http.threads</name>
<value>50</value>
</property>
<property>
<name>mapred.map.tasks.speculative.execution</name>
<value>false</value>
<description>If true, then multiple instances of some map tasks
may be executed in parallel.</description>
</property>
<property>
<name>mapred.reduce.tasks.speculative.execution</name>
<value>false</value>
<description>If true, then multiple instances of some reduce tasks
may be executed in parallel.</description>
</property>
<property>
<name>mapred.reduce.slowstart.completed.maps</name>
<value>0.05</value>
</property>
<property>
<name>mapred.inmem.merge.threshold</name>
<value>1000</value>
<description>The threshold, in terms of the number of files
for the in-memory merge process. When we accumulate threshold number of files
we initiate the in-memory merge and spill to disk. A value of 0 or less than
0 indicates we want to DON'T have any threshold and instead depend only on
the ramfs's memory consumption to trigger the merge.
</description>
</property>
<property>
<name>mapred.job.shuffle.merge.percent</name>
<value>0.66</value>
<description>The usage threshold at which an in-memory merge will be
initiated, expressed as a percentage of the total memory allocated to
storing in-memory map outputs, as defined by
mapred.job.shuffle.input.buffer.percent.
</description>
</property>
<property>
<name>mapred.job.shuffle.input.buffer.percent</name>
<value>0.7</value>
<description>The percentage of memory to be allocated from the maximum heap
size to storing map outputs during the shuffle.
</description>
</property>
<property>
<name>mapred.map.output.compression.codec</name>
<value></value>
<description>If the map outputs are compressed, how should they be
compressed
</description>
</property>
<property>
<name>mapred.output.compression.type</name>
<value>BLOCK</value>
<description>If the job outputs are to compressed as SequenceFiles, how should
they be compressed? Should be one of NONE, RECORD or BLOCK.
</description>
</property>
<property>
<name>mapred.jobtracker.completeuserjobs.maximum</name>
<value>0</value>
</property>
<property>
<name>mapred.jobtracker.taskScheduler</name>
<value></value>
</property>
<property>
<name>mapred.jobtracker.restart.recover</name>
<value>false</value>
<description>"true" to enable (job) recovery upon restart,
"false" to start afresh
</description>
</property>
<property>
<name>mapred.job.reduce.input.buffer.percent</name>
<value>0.0</value>
<description>The percentage of memory- relative to the maximum heap size- to
retain map outputs during the reduce. When the shuffle is concluded, any
remaining map outputs in memory must consume less than this threshold before
the reduce can begin.
</description>
</property>
<property>
<name>mapreduce.reduce.input.limit</name>
<value>10737418240</value>
<description>The limit on the input size of the reduce. (This value
is 10 Gb.) If the estimated input size of the reduce is greater than
this value, job is failed. A value of -1 means that there is no limit
set. </description>
</property>
<!-- copied from kryptonite configuration -->
<property>
<name>mapred.compress.map.output</name>
<value></value>
</property>
<property>
<name>mapred.task.timeout</name>
<value>600000</value>
<description>The number of milliseconds before a task will be
terminated if it neither reads an input, writes an output, nor
updates its status string.
</description>
</property>
<property>
<name>jetty.connector</name>
<value>org.mortbay.jetty.nio.SelectChannelConnector</value>
<description>No description</description>
</property>
<property>
<name>mapred.task.tracker.task-controller</name>
<value></value>
<description>
TaskController which is used to launch and manage task execution.
</description>
</property>
<property>
<name>mapred.child.root.logger</name>
<value>INFO,TLA</value>
</property>
<property>
<name>mapred.child.java.opts</name>
<value></value>
<description>No description</description>
</property>
<property>
<name>mapred.cluster.map.memory.mb</name>
<value></value>
</property>
<property>
<name>mapred.cluster.reduce.memory.mb</name>
<value></value>
</property>
<property>
<name>mapred.job.map.memory.mb</name>
<value></value>
</property>
<property>
<name>mapred.job.reduce.memory.mb</name>
<value></value>
</property>
<property>
<name>mapred.cluster.max.map.memory.mb</name>
<value></value>
</property>
<property>
<name>mapred.cluster.max.reduce.memory.mb</name>
<value></value>
</property>
<property>
<name>mapred.hosts</name>
<value></value>
</property>
<property>
<name>mapred.hosts.exclude</name>
<value></value>
</property>
<property>
<name>mapred.max.tracker.blacklists</name>
<value>16</value>
<description>
if node is reported blacklisted by 16 successful jobs within timeout-window, it will be graylisted
</description>
</property>
<property>
<name>mapred.healthChecker.script.path</name>
<value></value>
</property>
<property>
<name>mapred.healthChecker.interval</name>
<value>135000</value>
</property>
<property>
<name>mapred.healthChecker.script.timeout</name>
<value>60000</value>
</property>
<property>
<name>mapred.job.tracker.persist.jobstatus.active</name>
<value>false</value>
<description>Indicates if persistency of job status information is
active or not.
</description>
</property>
<property>
<name>mapred.job.tracker.persist.jobstatus.hours</name>
<value>1</value>
<description>The number of hours job status information is persisted in DFS.
The job status information will be available after it drops of the memory
queue and between jobtracker restarts. With a zero value the job status
information is not persisted at all in DFS.
</description>
</property>
<property>
<name>mapred.job.tracker.persist.jobstatus.dir</name>
<value></value>
<description>The directory where the job status information is persisted
in a file system to be available after it drops of the memory queue and
between jobtracker restarts.
</description>
</property>
<property>
<name>mapred.jobtracker.retirejob.check</name>
<value>10000</value>
</property>
<property>
<name>mapred.jobtracker.retirejob.interval</name>
<value>0</value>
</property>
<property>
<name>mapred.job.tracker.history.completed.location</name>
<value>/mapred/history/done</value>
<description>No description</description>
</property>
<property>
<name>mapred.task.maxvmem</name>
<value></value>
<final>true</final>
<description>No description</description>
</property>
<property>
<name>mapred.jobtracker.maxtasks.per.job</name>
<value></value>
<final>true</final>
<description>The maximum number of tasks for a single job.
A value of -1 indicates that there is no maximum. </description>
</property>
<property>
<name>mapreduce.fileoutputcommitter.marksuccessfuljobs</name>
<value>false</value>
</property>
<property>
<name>mapred.userlog.retain.hours</name>
<value></value>
</property>
<property>
<name>mapred.job.reuse.jvm.num.tasks</name>
<value>1</value>
<description>
How many tasks to run per jvm. If set to -1, there is no limit
</description>
<final>true</final>
</property>
<property>
<name>mapreduce.jobtracker.kerberos.principal</name>
<value></value>
<description>
JT user name key.
</description>
</property>
<property>
<name>mapreduce.tasktracker.kerberos.principal</name>
<value></value>
<description>
tt user name key. "_HOST" is replaced by the host name of the task tracker.
</description>
</property>
<property>
<name>hadoop.job.history.user.location</name>
<value>none</value>
<final>true</final>
</property>
<property>
<name>mapreduce.jobtracker.keytab.file</name>
<value></value>
<description>
The keytab for the jobtracker principal.
</description>
</property>
<property>
<name>mapreduce.tasktracker.keytab.file</name>
<value></value>
<description>The filename of the keytab for the task tracker</description>
</property>
<property>
<name>mapreduce.jobtracker.staging.root.dir</name>
<value>/user</value>
<description>The Path prefix for where the staging directories should be placed. The next level is always the user's
name. It is a path in the default file system.</description>
</property>
<property>
<name>mapreduce.tasktracker.group</name>
<value>hadoop</value>
<description>The group that the task controller uses for accessing the task controller. The mapred user must be a member and users should *not* be members.</description>
</property>
<property>
<name>mapreduce.jobtracker.split.metainfo.maxsize</name>
<value>50000000</value>
<final>true</final>
<description>If the size of the split metainfo file is larger than this, the JobTracker will fail the job during
initialize.
</description>
</property>
<property>
<name>mapreduce.history.server.embedded</name>
<value>false</value>
<description>Should job history server be embedded within Job tracker
process</description>
<final>true</final>
</property>
<property>
<name>mapreduce.history.server.http.address</name>
<!-- cluster variant -->
<value></value>
<description>Http address of the history server</description>
<final>true</final>
</property>
<property>
<name>mapreduce.jobhistory.kerberos.principal</name>
<!-- cluster variant -->
<value></value>
<description>Job history user name key. (must map to same user as JT
user)</description>
</property>
<property>
<name>mapreduce.jobhistory.keytab.file</name>
<!-- cluster variant -->
<value></value>
<description>The keytab for the job history server principal.</description>
</property>
<property>
<name>mapred.jobtracker.blacklist.fault-timeout-window</name>
<value>180</value>
<description>
3-hour sliding window (value is in minutes)
</description>
</property>
<property>
<name>mapred.jobtracker.blacklist.fault-bucket-width</name>
<value>15</value>
<description>
15-minute bucket size (value is in minutes)
</description>
</property>
<property>
<name>mapred.queue.names</name>
<value>default</value>
<description> Comma separated list of queues configured for this jobtracker.</description>
</property>
</configuration>