| <?xml version="1.0"?> |
| |
| <!-- This is the configuration file for the resource manager in Hadoop. --> |
| <!-- You can configure various scheduling parameters related to queues. --> |
| <!-- The properties for a queue follow a naming convention,such as, --> |
| <!-- mapred.capacity-scheduler.queue.<queue-name>.property-name. --> |
| |
| <configuration> |
| |
| <property> |
| <name>mapred.capacity-scheduler.maximum-system-jobs</name> |
| <value>3000</value> |
| <description>Maximum number of jobs in the system which can be initialized, |
| concurrently, by the CapacityScheduler. |
| </description> |
| </property> |
| |
| <property> |
| <name>mapred.capacity-scheduler.queue.default.capacity</name> |
| <value>100</value> |
| <description>Percentage of the number of slots in the cluster that are |
| to be available for jobs in this queue. |
| </description> |
| </property> |
| |
| <property> |
| <name>mapred.capacity-scheduler.queue.default.maximum-capacity</name> |
| <value>-1</value> |
| <description> |
| maximum-capacity defines a limit beyond which a queue cannot use the capacity of the cluster. |
| This provides a means to limit how much excess capacity a queue can use. By default, there is no limit. |
| The maximum-capacity of a queue can only be greater than or equal to its minimum capacity. |
| Default value of -1 implies a queue can use complete capacity of the cluster. |
| |
| This property could be to curtail certain jobs which are long running in nature from occupying more than a |
| certain percentage of the cluster, which in the absence of pre-emption, could lead to capacity guarantees of |
| other queues being affected. |
| |
| One important thing to note is that maximum-capacity is a percentage , so based on the cluster's capacity |
| the max capacity would change. So if large no of nodes or racks get added to the cluster , max Capacity in |
| absolute terms would increase accordingly. |
| </description> |
| </property> |
| |
| <property> |
| <name>mapred.capacity-scheduler.queue.default.supports-priority</name> |
| <value>false</value> |
| <description>If true, priorities of jobs will be taken into |
| account in scheduling decisions. |
| </description> |
| </property> |
| |
| <property> |
| <name>mapred.capacity-scheduler.queue.default.minimum-user-limit-percent</name> |
| <value>100</value> |
| <description> Each queue enforces a limit on the percentage of resources |
| allocated to a user at any given time, if there is competition for them. |
| This user limit can vary between a minimum and maximum value. The former |
| depends on the number of users who have submitted jobs, and the latter is |
| set to this property value. For example, suppose the value of this |
| property is 25. If two users have submitted jobs to a queue, no single |
| user can use more than 50% of the queue resources. If a third user submits |
| a job, no single user can use more than 33% of the queue resources. With 4 |
| or more users, no user can use more than 25% of the queue's resources. A |
| value of 100 implies no user limits are imposed. |
| </description> |
| </property> |
| |
| <property> |
| <name>mapred.capacity-scheduler.queue.default.user-limit-factor</name> |
| <value>1</value> |
| <description>The multiple of the queue capacity which can be configured to |
| allow a single user to acquire more slots. |
| </description> |
| </property> |
| |
| <property> |
| <name>mapred.capacity-scheduler.queue.default.maximum-initialized-active-tasks</name> |
| <value>200000</value> |
| <description>The maximum number of tasks, across all jobs in the queue, |
| which can be initialized concurrently. Once the queue's jobs exceed this |
| limit they will be queued on disk. |
| </description> |
| </property> |
| |
| <property> |
| <name>mapred.capacity-scheduler.queue.default.maximum-initialized-active-tasks-per-user</name> |
| <value>100000</value> |
| <description>The maximum number of tasks per-user, across all the of the |
| user's jobs in the queue, which can be initialized concurrently. Once the |
| user's jobs exceed this limit they will be queued on disk. |
| </description> |
| </property> |
| |
| <property> |
| <name>mapred.capacity-scheduler.queue.default.init-accept-jobs-factor</name> |
| <value>10</value> |
| <description>The multipe of (maximum-system-jobs * queue-capacity) used to |
| determine the number of jobs which are accepted by the scheduler. |
| </description> |
| </property> |
| |
| <!-- The default configuration settings for the capacity task scheduler --> |
| <!-- The default values would be applied to all the queues which don't have --> |
| <!-- the appropriate property for the particular queue --> |
| <property> |
| <name>mapred.capacity-scheduler.default-supports-priority</name> |
| <value>false</value> |
| <description>If true, priorities of jobs will be taken into |
| account in scheduling decisions by default in a job queue. |
| </description> |
| </property> |
| |
| <property> |
| <name>mapred.capacity-scheduler.default-minimum-user-limit-percent</name> |
| <value>100</value> |
| <description>The percentage of the resources limited to a particular user |
| for the job queue at any given point of time by default. |
| </description> |
| </property> |
| |
| |
| <property> |
| <name>mapred.capacity-scheduler.default-user-limit-factor</name> |
| <value>1</value> |
| <description>The default multiple of queue-capacity which is used to |
| determine the amount of slots a single user can consume concurrently. |
| </description> |
| </property> |
| |
| <property> |
| <name>mapred.capacity-scheduler.default-maximum-active-tasks-per-queue</name> |
| <value>200000</value> |
| <description>The default maximum number of tasks, across all jobs in the |
| queue, which can be initialized concurrently. Once the queue's jobs exceed |
| this limit they will be queued on disk. |
| </description> |
| </property> |
| |
| <property> |
| <name>mapred.capacity-scheduler.default-maximum-active-tasks-per-user</name> |
| <value>100000</value> |
| <description>The default maximum number of tasks per-user, across all the of |
| the user's jobs in the queue, which can be initialized concurrently. Once |
| the user's jobs exceed this limit they will be queued on disk. |
| </description> |
| </property> |
| |
| <property> |
| <name>mapred.capacity-scheduler.default-init-accept-jobs-factor</name> |
| <value>10</value> |
| <description>The default multipe of (maximum-system-jobs * queue-capacity) |
| used to determine the number of jobs which are accepted by the scheduler. |
| </description> |
| </property> |
| |
| <!-- Capacity scheduler Job Initialization configuration parameters --> |
| <property> |
| <name>mapred.capacity-scheduler.init-poll-interval</name> |
| <value>5000</value> |
| <description>The amount of time in miliseconds which is used to poll |
| the job queues for jobs to initialize. |
| </description> |
| </property> |
| <property> |
| <name>mapred.capacity-scheduler.init-worker-threads</name> |
| <value>5</value> |
| <description>Number of worker threads which would be used by |
| Initialization poller to initialize jobs in a set of queue. |
| If number mentioned in property is equal to number of job queues |
| then a single thread would initialize jobs in a queue. If lesser |
| then a thread would get a set of queues assigned. If the number |
| is greater then number of threads would be equal to number of |
| job queues. |
| </description> |
| </property> |
| |
| </configuration> |