Skip to content

Instantly share code, notes, and snippets.

@samidalouche
Created October 19, 2011 19:48
Show Gist options
  • Save samidalouche/1299463 to your computer and use it in GitHub Desktop.
Save samidalouche/1299463 to your computer and use it in GitHub Desktop.
Queues configuration
<?xml version="1.0"?>
<!-- This is the configuration file for the resource manager in Hadoop. -->
<!-- You can configure various scheduling parameters related to queues. -->
<!-- The properties for a queue follow a naming convention,such as, -->
<!-- mapred.capacity-scheduler.queue.<queue-name>.property-name. -->
<configuration>
<property>
<name>mapred.capacity-scheduler.maximum-system-jobs</name>
<value>3000</value>
<description>Maximum number of jobs in the system which can be initialized,
concurrently, by the CapacityScheduler.
</description>
</property>
<!--
Let's KISS for now. We have 6 queues + 1 'default' queue, each having a minimum capacity guarantee of 14 % (100/7) of the total cluster capacity.
All IO-bound queues are limited to 50% of the total cluster capacity. This should reduce cassandra timeouts.
The default queue is limited to 50% of the total cluster capacity, to make sure that jobs that don't use the queue mechanism
(e.g. not-yet-updated Factory chain) do not crash cassandra's performance.
Queues :
- development-io-bound
- development-cpu-bound
- integration-io-bound
- integration-cpu-bound
- factory-io-bound
- factory-cpu-bound
-->
<property>
<name>mapred.capacity-scheduler.queue.default.capacity</name>
<value>14</value>
</property>
<property>
<name>mapred.capacity-scheduler.queue.default.maximum-capacity</name>
<value>50</value>
</property>
<property>
<name>mapred.capacity-scheduler.queue.default.supports-priority</name>
<value>true</value>
</property>
<!-- queue: development-io-bound -->
<property>
<name>mapred.capacity-scheduler.queue.development-io-bound.capacity</name>
<value>14</value>
</property>
<property>
<name>mapred.capacity-scheduler.queue.development-io-bound.maximum-capacity</name>
<value>50</value>
</property>
<property>
<name>mapred.capacity-scheduler.queue.development-io-bound.supports-priority</name>
<value>true</value>
</property>
<!-- queue: development-cpu-bound -->
<property>
<name>mapred.capacity-scheduler.queue.development-cpu-bound.capacity</name>
<value>14</value>
</property>
<property>
<name>mapred.capacity-scheduler.queue.development-cpu-bound.maximum-capacity</name>
<value>100</value>
</property>
<property>
<name>mapred.capacity-scheduler.queue.development-cpu-bound.supports-priority</name>
<value>true</value>
</property>
<!-- queue: integration-io-bound -->
<property>
<name>mapred.capacity-scheduler.queue.integration-io-bound.capacity</name>
<value>14</value>
</property>
<property>
<name>mapred.capacity-scheduler.queue.integration-io-bound.maximum-capacity</name>
<value>50</value>
</property>
<property>
<name>mapred.capacity-scheduler.queue.integration-io-bound.supports-priority</name>
<value>true</value>
</property>
<!-- queue: integration-cpu-bound -->
<property>
<name>mapred.capacity-scheduler.queue.integration-cpu-bound.capacity</name>
<value>14</value>
</property>
<property>
<name>mapred.capacity-scheduler.queue.integration-cpu-bound.maximum-capacity</name>
<value>100</value>
</property>
<property>
<name>mapred.capacity-scheduler.queue.integration-cpu-bound.supports-priority</name>
<value>true</value>
</property>
<!-- queue: factory-io-bound -->
<property>
<name>mapred.capacity-scheduler.queue.factory-io-bound.capacity</name>
<value>14</value>
</property>
<property>
<name>mapred.capacity-scheduler.queue.factory-io-bound.maximum-capacity</name>
<value>50</value>
</property>
<property>
<name>mapred.capacity-scheduler.queue.factory-io-bound.supports-priority</name>
<value>true</value>
</property>
<!-- queue: factory-cpu-bound -->
<property>
<name>mapred.capacity-scheduler.queue.factory-cpu-bound.capacity</name>
<value>14</value>
</property>
<property>
<name>mapred.capacity-scheduler.queue.factory-cpu-bound.maximum-capacity</name>
<value>100</value>
</property>
<property>
<name>mapred.capacity-scheduler.queue.factory-cpu-bound.supports-priority</name>
<value>true</value>
</property>
<!-- The default configuration settings for the capacity task scheduler -->
<!-- The default values would be applied to all the queues which don't have -->
<!-- the appropriate property for the particular queue -->
<property>
<name>mapred.capacity-scheduler.default-supports-priority</name>
<value>false</value>
<description>If true, priorities of jobs will be taken into
account in scheduling decisions by default in a job queue.
</description>
</property>
<property>
<name>mapred.capacity-scheduler.default-minimum-user-limit-percent</name>
<value>100</value>
<description>The percentage of the resources limited to a particular user
for the job queue at any given point of time by default.
</description>
</property>
<property>
<name>mapred.capacity-scheduler.default-user-limit-factor</name>
<value>1</value>
<description>The default multiple of queue-capacity which is used to
determine the amount of slots a single user can consume concurrently.
</description>
</property>
<property>
<name>mapred.capacity-scheduler.default-maximum-active-tasks-per-queue</name>
<value>200000</value>
<description>The default maximum number of tasks, across all jobs in the
queue, which can be initialized concurrently. Once the queue's jobs exceed
this limit they will be queued on disk.
</description>
</property>
<property>
<name>mapred.capacity-scheduler.default-maximum-active-tasks-per-user</name>
<value>100000</value>
<description>The default maximum number of tasks per-user, across all the of
the user's jobs in the queue, which can be initialized concurrently. Once
the user's jobs exceed this limit they will be queued on disk.
</description>
</property>
<property>
<name>mapred.capacity-scheduler.default-init-accept-jobs-factor</name>
<value>10</value>
<description>The default multipe of (maximum-system-jobs * queue-capacity)
used to determine the number of jobs which are accepted by the scheduler.
</description>
</property>
<!-- Capacity scheduler Job Initialization configuration parameters -->
<property>
<name>mapred.capacity-scheduler.init-poll-interval</name>
<value>5000</value>
<description>The amount of time in miliseconds which is used to poll
the job queues for jobs to initialize.
</description>
</property>
<property>
<name>mapred.capacity-scheduler.init-worker-threads</name>
<value>5</value>
<description>Number of worker threads which would be used by
Initialization poller to initialize jobs in a set of queue.
If number mentioned in property is equal to number of job queues
then a single thread would initialize jobs in a queue. If lesser
then a thread would get a set of queues assigned. If the number
is greater then number of threads would be equal to number of
job queues.
</description>
</property>
</configuration>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment