MAPREDUCE-2137. Provide mapping between jobs of trace file and the corresponding simulated cluster's jobs in Gridmix.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/mapreduce/trunk@1128147 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/Gridmix.java b/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/Gridmix.java
index 48ae52c..b64a036 100644
--- a/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/Gridmix.java
+++ b/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/Gridmix.java
@@ -92,6 +92,20 @@
*/
public static final String GRIDMIX_USR_RSV = "gridmix.user.resolve.class";
+ /**
+ * Configuration property set in simulated job's configuration whose value is
+ * set to the corresponding original job's name. This is not configurable by
+ * gridmix user.
+ */
+ public static final String ORIGINAL_JOB_NAME =
+ "gridmix.job.original-job-name";
+ /**
+ * Configuration property set in simulated job's configuration whose value is
+ * set to the corresponding original job's id. This is not configurable by
+ * gridmix user.
+ */
+ public static final String ORIGINAL_JOB_ID = "gridmix.job.original-job-id";
+
private DistributedCacheEmulator distCacheEmulator;
// Submit data structures
diff --git a/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/GridmixJob.java b/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/GridmixJob.java
index d94e8fa..0ce5e76 100644
--- a/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/GridmixJob.java
+++ b/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/GridmixJob.java
@@ -54,16 +54,17 @@
*/
abstract class GridmixJob implements Callable<Job>, Delayed {
- public static final String JOBNAME = "GRIDMIX";
- public static final String ORIGNAME = "gridmix.job.name.original";
+ // Gridmix job name format is GRIDMIX<6 digit sequence number>
+ public static final String JOB_NAME_PREFIX = "GRIDMIX";
public static final Log LOG = LogFactory.getLog(GridmixJob.class);
private static final ThreadLocal<Formatter> nameFormat =
new ThreadLocal<Formatter>() {
@Override
protected Formatter initialValue() {
- final StringBuilder sb = new StringBuilder(JOBNAME.length() + 5);
- sb.append(JOBNAME);
+ final StringBuilder sb =
+ new StringBuilder(JOB_NAME_PREFIX.length() + 6);
+ sb.append(JOB_NAME_PREFIX);
return new Formatter(sb);
}
};
@@ -95,18 +96,21 @@
this.jobdesc = jobdesc;
this.seq = seq;
- ((StringBuilder)nameFormat.get().out()).setLength(JOBNAME.length());
+ ((StringBuilder)nameFormat.get().out()).setLength(JOB_NAME_PREFIX.length());
try {
job = this.ugi.doAs(new PrivilegedExceptionAction<Job>() {
public Job run() throws IOException {
- Job ret =
- new Job(conf,
- nameFormat.get().format("%05d", seq).toString());
- ret.getConfiguration().setInt(GRIDMIX_JOB_SEQ, seq);
+
String jobId = null == jobdesc.getJobID()
? "<unknown>"
: jobdesc.getJobID().toString();
- ret.getConfiguration().set(ORIGNAME, jobId);
+ Job ret = new Job(conf,
+ nameFormat.get().format("%06d", seq).toString());
+ ret.getConfiguration().setInt(GRIDMIX_JOB_SEQ, seq);
+
+ ret.getConfiguration().set(Gridmix.ORIGINAL_JOB_ID, jobId);
+ ret.getConfiguration().set(Gridmix.ORIGINAL_JOB_NAME,
+ jobdesc.getName());
if (conf.getBoolean(GRIDMIX_USE_QUEUE_IN_TRACE, false)) {
setJobQueue(ret, jobdesc.getQueueName());
} else {
diff --git a/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/DebugJobProducer.java b/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/DebugJobProducer.java
index c1e433c..fca29af 100644
--- a/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/DebugJobProducer.java
+++ b/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/DebugJobProducer.java
@@ -146,7 +146,7 @@
final long seed = r.nextLong();
r.setSeed(seed);
id = seq.getAndIncrement();
- name = String.format("MOCKJOB%05d", id);
+ name = String.format("MOCKJOB%06d", id);
this.conf = conf;
LOG.info(name + " (" + seed + ")");
diff --git a/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestGridmixSubmission.java b/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestGridmixSubmission.java
index 17c6738..2815f24 100644
--- a/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestGridmixSubmission.java
+++ b/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestGridmixSubmission.java
@@ -17,9 +17,9 @@
*/
package org.apache.hadoop.mapred.gridmix;
-import java.io.FileInputStream;
import java.io.InputStream;
import java.io.IOException;
+import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
@@ -41,6 +41,7 @@
import org.apache.hadoop.mapred.JobID;
import org.apache.hadoop.mapred.TaskReport;
import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.MRJobConfig;
import org.apache.hadoop.mapreduce.TaskType;
import org.apache.hadoop.tools.rumen.JobStory;
import org.apache.hadoop.tools.rumen.JobStoryProducer;
@@ -110,18 +111,10 @@
final JobClient client = new JobClient(
GridmixTestUtils.mrCluster.createJobConf());
for (Job job : succeeded) {
- final String jobname = job.getJobName();
- if (GenerateData.JOB_NAME.equals(jobname)) {
- if (!job.getConfiguration().getBoolean(
- GridmixJob.GRIDMIX_USE_QUEUE_IN_TRACE, true)) {
- assertEquals(" Improper queue for " + job.getJobName(),
- job.getConfiguration().get("mapred.job.queue.name"),
- "q1");
- } else {
- assertEquals(" Improper queue for " + job.getJobName(),
- job.getConfiguration().get("mapred.job.queue.name"),
- "default");
- }
+ final String jobName = job.getJobName();
+ Configuration conf = job.getConfiguration();
+ if (GenerateData.JOB_NAME.equals(jobName)) {
+ verifyQueue(conf, jobName);
final Path in = new Path("foo").makeQualified(GridmixTestUtils.dfs);
final Path out = new Path("/gridmix").makeQualified(GridmixTestUtils.dfs);
final ContentSummary generated = GridmixTestUtils.dfs.getContentSummary(in);
@@ -131,29 +124,48 @@
FileStatus[] outstat = GridmixTestUtils.dfs.listStatus(out);
assertEquals("Mismatched job count", NJOBS, outstat.length);
continue;
+ } else if (GenerateDistCacheData.JOB_NAME.equals(jobName)) {
+ verifyQueue(conf, jobName);
+ continue;
}
- if (!job.getConfiguration().getBoolean(
+ if (!conf.getBoolean(
GridmixJob.GRIDMIX_USE_QUEUE_IN_TRACE, true)) {
- assertEquals(" Improper queue for " + job.getJobName() + " " ,
- job.getConfiguration().get("mapred.job.queue.name"),"q1" );
+ assertEquals(" Improper queue for " + jobName + " " ,
+ conf.get(MRJobConfig.QUEUE_NAME), "q1" );
} else {
- assertEquals(" Improper queue for " + job.getJobName() + " ",
- job.getConfiguration().get("mapred.job.queue.name"),
- sub.get(job.getConfiguration().get(GridmixJob.ORIGNAME))
- .getQueueName());
+ assertEquals(" Improper queue for " + jobName + " ",
+ conf.get(MRJobConfig.QUEUE_NAME),
+ sub.get(conf.get(Gridmix.ORIGINAL_JOB_ID)).getQueueName());
}
- final JobStory spec =
- sub.get(job.getConfiguration().get(GridmixJob.ORIGNAME));
- assertNotNull("No spec for " + job.getJobName(), spec);
- assertNotNull("No counters for " + job.getJobName(), job.getCounters());
- final String specname = spec.getName();
+ final String originalJobId = conf.get(Gridmix.ORIGINAL_JOB_ID);
+ final JobStory spec = sub.get(originalJobId);
+ assertNotNull("No spec for " + jobName, spec);
+ assertNotNull("No counters for " + jobName, job.getCounters());
+ final String originalJobName = spec.getName();
+ System.out.println("originalJobName=" + originalJobName
+ + ";GridmixJobName=" + jobName + ";originalJobID=" + originalJobId);
+ assertTrue("Original job name is wrong.", originalJobName.equals(
+ conf.get(Gridmix.ORIGINAL_JOB_NAME)));
+
+ // Gridmix job seqNum contains 6 digits
+ int seqNumLength = 6;
+ String jobSeqNum = new DecimalFormat("000000").format(
+ conf.getInt(GridmixJob.GRIDMIX_JOB_SEQ, -1));
+ // Original job name is of the format MOCKJOB<6 digit sequence number>
+ // because MockJob jobNames are of this format.
+ assertTrue(originalJobName.substring(
+ originalJobName.length() - seqNumLength).equals(jobSeqNum));
+
+ assertTrue("Gridmix job name is not in the expected format.",
+ jobName.equals(
+ GridmixJob.JOB_NAME_PREFIX + jobSeqNum));
+
final FileStatus stat =
GridmixTestUtils.dfs.getFileStatus(
- new Path(GridmixTestUtils.DEST,
- "" + Integer.valueOf(specname.substring(specname.length() - 5))));
- assertEquals("Wrong owner for " + job.getJobName(), spec.getUser(),
+ new Path(GridmixTestUtils.DEST, "" + Integer.valueOf(jobSeqNum)));
+ assertEquals("Wrong owner for " + jobName, spec.getUser(),
stat.getOwner());
final int nMaps = spec.getNumberMaps();
@@ -162,7 +174,7 @@
// TODO Blocked by MAPREDUCE-118
if (true) return;
// TODO
- System.out.println(jobname + ": " + nMaps + "/" + nReds);
+ System.out.println(jobName + ": " + nMaps + "/" + nReds);
final TaskReport[] mReports =
client.getMapTaskReports(JobID.downgrade(job.getJobID()));
assertEquals("Mismatched map count", nMaps, mReports.length);
@@ -177,6 +189,18 @@
}
}
+ // Verify if correct job queue is used
+ private void verifyQueue(Configuration conf, String jobName) {
+ if (!conf.getBoolean(
+ GridmixJob.GRIDMIX_USE_QUEUE_IN_TRACE, true)) {
+ assertEquals(" Improper queue for " + jobName,
+ conf.get("mapred.job.queue.name"), "q1");
+ } else {
+ assertEquals(" Improper queue for " + jobName,
+ conf.get("mapred.job.queue.name"), "default");
+ }
+ }
+
public void check(final TaskType type, Job job, JobStory spec,
final TaskReport[] runTasks,
long extraInputBytes, int extraInputRecords,
diff --git a/src/docs/src/documentation/content/xdocs/gridmix.xml b/src/docs/src/documentation/content/xdocs/gridmix.xml
index 2ce5b1e..3c72713 100644
--- a/src/docs/src/documentation/content/xdocs/gridmix.xml
+++ b/src/docs/src/documentation/content/xdocs/gridmix.xml
@@ -56,8 +56,8 @@
<li>Use GridMix with the job trace on the benchmark cluster.</li>
</ol>
<p>Jobs submitted by GridMix have names of the form
- "<code>GRIDMIXnnnnn</code>", where
- "<code>nnnnn</code>" is a sequence number padded with leading
+ "<code>GRIDMIXnnnnnn</code>", where
+ "<code>nnnnnn</code>" is a sequence number padded with leading
zeroes.</p>
</section>
<section id="usage">
@@ -540,6 +540,36 @@
</ul>
</section>
+ <section id="simulatedjobconf">
+ <title>Configuration of Simulated Jobs</title>
+ <p> Gridmix3 sets some configuration properties in the simulated Jobs
+ submitted by it so that they can be mapped back to the corresponding Job
+ in the input Job trace. These configuration parameters include:
+ </p>
+ <table>
+ <tr>
+ <th>Parameter</th>
+ <th>Description</th>
+ </tr>
+ <tr>
+ <td>
+ <code>gridmix.job.original-job-id</code>
+ </td>
+ <td> The job id of the original cluster's job corresponding to this
+ simulated job.
+ </td>
+ </tr>
+ <tr>
+ <td>
+ <code>gridmix.job.original-job-name</code>
+ </td>
+ <td> The job name of the original cluster's job corresponding to this
+ simulated job.
+ </td>
+ </tr>
+ </table>
+ </section>
+
<section id="assumptions">
<title>Simplifying Assumptions</title>
<p>GridMix will be developed in stages, incorporating feedback and