MAPREDUCE-2543. [Gridmix] High-Ram feature emulation in Gridmix. (amarrk)
git-svn-id: https://svn.apache.org/repos/asf/hadoop/mapreduce/trunk@1130550 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/CHANGES.txt b/CHANGES.txt
index 798e298..2fb9e7c 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -9,6 +9,8 @@
NEW FEATURES
+ MAPREDUCE-2543. [Gridmix] High-Ram feature emulation in Gridmix. (amarrk)
+
MAPREDUCE-2408. [Gridmix] Compression emulation in Gridmix. (amarrk)
MAPREDUCE-2473. Add "mapred groups" command to query the server-side groups
diff --git a/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/GridmixJob.java b/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/GridmixJob.java
index 81b7508..4d9866d 100644
--- a/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/GridmixJob.java
+++ b/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/GridmixJob.java
@@ -37,10 +37,12 @@
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.MRConfig;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig;
import org.apache.hadoop.mapreduce.MRJobConfig;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.tools.rumen.JobStory;
@@ -81,6 +83,9 @@
"gridmix.job-submission.use-queue-in-trace";
protected static final String GRIDMIX_DEFAULT_QUEUE =
"gridmix.job-submission.default-queue";
+ // configuration key to enable/disable High-Ram feature emulation
+ static final String GRIDMIX_HIGHRAM_EMULATION_ENABLE =
+ "gridmix.highram-emulation.enable";
private static void setJobQueue(Job job, String queue) {
if (queue != null) {
@@ -126,6 +131,12 @@
}
}
+ // configure high ram properties if enabled
+ if (conf.getBoolean(GRIDMIX_HIGHRAM_EMULATION_ENABLE, true)) {
+ configureHighRamProperties(jobdesc.getJobConf(),
+ ret.getConfiguration());
+ }
+
return ret;
}
});
@@ -138,6 +149,108 @@
outdir = new Path(outRoot, "" + seq);
}
+ // Scales the desired job-level configuration parameter. This API makes sure
+ // that the ratio of the job level configuration parameter to the cluster
+ // level configuration parameter is maintained in the simulated run. Hence
+ // the values are scaled from the original cluster's configuration to the
+ // simulated cluster's configuration for higher emulation accuracy.
+ // This kind of scaling is useful for memory parameters.
+ private static void scaleConfigParameter(Configuration sourceConf,
+ Configuration destConf, String clusterValueKey,
+ String jobValueKey, long defaultValue) {
+ long simulatedClusterDefaultValue =
+ destConf.getLong(clusterValueKey, defaultValue);
+
+ long originalClusterDefaultValue =
+ sourceConf.getLong(clusterValueKey, defaultValue);
+
+ long originalJobValue =
+ sourceConf.getLong(jobValueKey, defaultValue);
+
+ double scaleFactor = (double)originalJobValue/originalClusterDefaultValue;
+
+ long simulatedJobValue = (long)(scaleFactor * simulatedClusterDefaultValue);
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("For the job configuration parameter '" + jobValueKey
+ + "' and the cluster configuration parameter '"
+ + clusterValueKey + "', the original job's configuration value"
+ + " is scaled from '" + originalJobValue + "' to '"
+ + simulatedJobValue + "' using the default (unit) value of "
+ + "'" + originalClusterDefaultValue + "' for the original "
+ + " cluster and '" + simulatedClusterDefaultValue + "' for the"
+ + " simulated cluster.");
+ }
+
+ destConf.setLong(jobValueKey, simulatedJobValue);
+ }
+
+ // Checks if the scaling of original job's memory parameter value is
+ // valid
+ @SuppressWarnings("deprecation")
+ private static boolean checkMemoryUpperLimits(String jobKey, String limitKey,
+ Configuration conf,
+ boolean convertLimitToMB) {
+ if (conf.get(limitKey) != null) {
+ long limit = conf.getLong(limitKey, JobConf.DISABLED_MEMORY_LIMIT);
+ // scale only if the max memory limit is set.
+ if (limit >= 0) {
+ if (convertLimitToMB) {
+ limit /= (1024 * 1024); //Converting to MB
+ }
+
+ long scaledConfigValue =
+ conf.getLong(jobKey, JobConf.DISABLED_MEMORY_LIMIT);
+
+ // check now
+ if (scaledConfigValue > limit) {
+ throw new RuntimeException("Simulated job's configuration"
+ + " parameter '" + jobKey + "' got scaled to a value '"
+ + scaledConfigValue + "' which exceeds the upper limit of '"
+ + limit + "' defined for the simulated cluster by the key '"
+ + limitKey + "'. To disable High-Ram feature emulation, set '"
+ + GRIDMIX_HIGHRAM_EMULATION_ENABLE + "' to 'false'.");
+ }
+ return true;
+ }
+ }
+ return false;
+ }
+
+ // Check if the parameter scaling does not exceed the cluster limits.
+ @SuppressWarnings("deprecation")
+ private static void validateTaskMemoryLimits(Configuration conf,
+ String jobKey, String clusterMaxKey) {
+ if (!checkMemoryUpperLimits(jobKey,
+ JobConf.UPPER_LIMIT_ON_TASK_VMEM_PROPERTY, conf, true)) {
+ checkMemoryUpperLimits(jobKey, clusterMaxKey, conf, false);
+ }
+ }
+
+ /**
+ * Sets the high ram job properties in the simulated job's configuration.
+ */
+ @SuppressWarnings("deprecation")
+ static void configureHighRamProperties(Configuration sourceConf,
+ Configuration destConf) {
+ // set the memory per map task
+ scaleConfigParameter(sourceConf, destConf,
+ MRConfig.MAPMEMORY_MB, MRJobConfig.MAP_MEMORY_MB,
+ JobConf.DISABLED_MEMORY_LIMIT);
+
+ // validate and fail early
+ validateTaskMemoryLimits(destConf, MRJobConfig.MAP_MEMORY_MB,
+ JTConfig.JT_MAX_MAPMEMORY_MB);
+
+ // set the memory per reduce task
+ scaleConfigParameter(sourceConf, destConf,
+ MRConfig.REDUCEMEMORY_MB, MRJobConfig.REDUCE_MEMORY_MB,
+ JobConf.DISABLED_MEMORY_LIMIT);
+ // validate and fail early
+ validateTaskMemoryLimits(destConf, MRJobConfig.REDUCE_MEMORY_MB,
+ JTConfig.JT_MAX_REDUCEMEMORY_MB);
+ }
+
/**
* Indicates whether this {@link GridmixJob} supports compression emulation.
*/
diff --git a/src/docs/src/documentation/content/xdocs/gridmix.xml b/src/docs/src/documentation/content/xdocs/gridmix.xml
index fac3526..a4dc357 100644
--- a/src/docs/src/documentation/content/xdocs/gridmix.xml
+++ b/src/docs/src/documentation/content/xdocs/gridmix.xml
@@ -639,6 +639,35 @@
</p>
</section>
+ <section id="highram-emulation">
+ <title>Emulating High-Ram jobs</title>
+ <p>MapReduce allows users to define a job as a High-Ram job. Tasks from a
+ High-Ram job can occupy multiple slots on the task-trackers.
+ Task-tracker assigns fixed virtual memory for each slot. Tasks from
+ High-Ram jobs can occupy multiple slots and thus can use up more
+ virtual memory as compared to a default task.
+ </p>
+ <p>Emulating this behavior is important because of the following reasons
+ </p>
+ <ul>
+ <li>Impact on scheduler: Scheduling of tasks from High-Ram jobs
+ impacts the scheduling behavior as it might result into slot
+ reservation and slot/resource utilization.
+ </li>
+ <li>Impact on the node : Since High-Ram tasks occupy multiple slots,
+ trackers do some bookkeeping for allocating extra resources for
+ these tasks. Thus this becomes a precursor for memory emulation
+ where tasks with high memory requirements needs to be considered
+ as a High-Ram task.
+ </li>
+ </ul>
+ <p>High-Ram feature emulation can be disabled by setting
+ <code>gridmix.highram-emulation.enable</code> to
+ <code>false</code>. By default High-Ram feature emulation is enabled.
+ Note that this feature works only for jobs of type <em>LOADJOB</em>.
+ </p>
+ </section>
+
<section id="assumptions">
<title>Simplifying Assumptions</title>
<p>GridMix will be developed in stages, incorporating feedback and