MAPREDUCE-2517. [Gridmix] Add system tests to Gridmix. (Vinay Kumar Thota via amarrk)
git-svn-id: https://svn.apache.org/repos/asf/hadoop/mapreduce/trunk@1128244 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/CHANGES.txt b/CHANGES.txt
index 2a74014..5e04903 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -16,6 +16,9 @@
IMPROVEMENTS
+ MAPREDUCE-2517. [Gridmix] Add system tests to Gridmix.
+ (Vinay Kumar Thota via amarrk)
+
MAPREDUCE-2492. The new MapReduce API should make available task's
progress to the task. (amarrk)
diff --git a/src/contrib/gridmix/build.xml b/src/contrib/gridmix/build.xml
index 40763d8..72740e3 100644
--- a/src/contrib/gridmix/build.xml
+++ b/src/contrib/gridmix/build.xml
@@ -20,4 +20,13 @@
<import file="../build-contrib.xml"/>
+ <!-- Run all unit tests. superdottest -->
+ <target name="test">
+ <antcall target="hadoopbuildcontrib.test" />
+ </target>
+ <!--Run all system tests.-->
+ <target name="test-system">
+ <antcall target="hadoopbuildcontrib.test-system" />
+ </target>
+
</project>
diff --git a/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/GridmixSystemTestCase.java b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/GridmixSystemTestCase.java
new file mode 100644
index 0000000..10cd9aa
--- /dev/null
+++ b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/GridmixSystemTestCase.java
@@ -0,0 +1,166 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapred.gridmix;
+
+import org.apache.commons.logging.LogFactory;
+import org.apache.commons.logging.Log;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.test.system.MRCluster;
+import org.apache.hadoop.mapreduce.test.system.JTProtocol;
+import org.apache.hadoop.mapreduce.test.system.JTClient;
+import org.apache.hadoop.mapred.gridmix.test.system.GridmixJobSubmission;
+import org.apache.hadoop.mapred.gridmix.test.system.GridmixJobVerification;
+import org.apache.hadoop.mapred.gridmix.test.system.GridMixRunMode;
+import org.apache.hadoop.mapred.gridmix.test.system.GridMixConfig;
+import org.apache.hadoop.mapred.gridmix.test.system.UtilsForGridmix;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.mapreduce.JobID;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+
+import java.util.Iterator;
+import java.util.Map;
+import java.util.List;
+import java.io.IOException;
+
+/**
+ * Run and verify the Gridmix jobs for given a trace.
+ */
+public class GridmixSystemTestCase {
+ private static final Log LOG =
+ LogFactory.getLog(GridmixSystemTestCase.class);
+ protected static Configuration conf = new Configuration();
+ protected static MRCluster cluster;
+ protected static int cSize;
+ protected static JTClient jtClient;
+ protected static JTProtocol rtClient;
+ protected static Path gridmixDir;
+ protected static Map<String, String> map;
+ protected static GridmixJobSubmission gridmixJS;
+ protected static GridmixJobVerification gridmixJV;
+ protected static List<JobID> jobids;
+
+ @BeforeClass
+ public static void before() throws Exception {
+ String [] excludeExpList = {"java.net.ConnectException",
+ "java.io.IOException"};
+ cluster = MRCluster.createCluster(conf);
+ cluster.setExcludeExpList(excludeExpList);
+ cluster.setUp();
+ cSize = cluster.getTTClients().size();
+ jtClient = cluster.getJTClient();
+ rtClient = jtClient.getProxy();
+ gridmixDir = new Path("herriot-gridmix");
+ UtilsForGridmix.createDirs(gridmixDir, rtClient.getDaemonConf());
+ map = UtilsForGridmix.getMRTraces(rtClient.getDaemonConf());
+ }
+
+ @AfterClass
+ public static void after() throws Exception {
+ UtilsForGridmix.cleanup(gridmixDir, rtClient.getDaemonConf());
+ org.apache.hadoop.fs.FileUtil.fullyDelete(new java.io.File(System.
+ getProperty("java.io.tmpdir") + "/gridmix-st/"));
+ cluster.tearDown();
+
+ /* Clean up the proxy user directories if gridmix run with
+ RoundRobinUserResovler mode.*/
+ if (gridmixJV.getJobUserResolver().contains("RoundRobin")) {
+ List<String> proxyUsers =
+ UtilsForGridmix.listProxyUsers(gridmixJS.getJobConf(),
+ UserGroupInformation.getLoginUser().getShortUserName());
+ for(int index = 0; index < proxyUsers.size(); index++){
+ UtilsForGridmix.cleanup(new Path("hdfs:///user/" +
+ proxyUsers.get(index)),
+ rtClient.getDaemonConf());
+ }
+ }
+ }
+
+ /**
+ * Run the gridmix with specified runtime parameters and
+ * verify the jobs the after completion of execution.
+ * @param runtimeValues - common runtime arguments for gridmix.
+ * @param otherValues - test specific runtime arguments for gridmix.
+ * @param tracePath - path of a trace file.
+ * @throws Exception - if an exception occurs.
+ */
+ public static void runGridmixAndVerify(String[] runtimeValues,
+ String [] otherValues, String tracePath) throws Exception {
+ runGridmixAndVerify(runtimeValues, otherValues, tracePath ,
+ GridMixRunMode.DATA_GENERATION_AND_RUN_GRIDMIX.getValue());
+ }
+
+ /**
+ * Run the gridmix with specified runtime parameters and
+ * verify the jobs the after completion of execution.
+ * @param runtimeValues - common runtime arguments for gridmix.
+ * @param otherValues - test specific runtime arguments for gridmix.
+ * @param tracePath - path of a trace file.
+ * @param mode - 1 for data generation, 2 for run the gridmix and 3 for
+ * data generation and run the gridmix.
+ * @throws Exception - if an exception occurs.
+ */
+ public static void runGridmixAndVerify(String [] runtimeValues,
+ String [] otherValues, String tracePath, int mode) throws Exception {
+ List<JobID> jobids = runGridmix(runtimeValues, otherValues, mode);
+ gridmixJV = new GridmixJobVerification(new Path(tracePath),
+ gridmixJS.getJobConf(), jtClient);
+ gridmixJV.verifyGridmixJobsWithJobStories(jobids);
+ }
+
+ /**
+ * Run the gridmix with user specified mode.
+ * @param runtimeValues - common runtime parameters for gridmix.
+ * @param otherValues - test specifix runtime parameters for gridmix.
+ * @param mode - 1 for data generation, 2 for run the gridmix and 3 for
+ * data generation and run the gridmix.
+ * @return - list of gridmix job ids.
+ * @throws Exception - if an exception occurs.
+ */
+ public static List<JobID> runGridmix(String[] runtimeValues,
+ String[] otherValues, int mode) throws Exception {
+ gridmixJS = new GridmixJobSubmission(rtClient.getDaemonConf(),
+ jtClient, gridmixDir);
+ gridmixJS.submitJobs(runtimeValues, otherValues, mode);
+ List<JobID> jobids =
+ UtilsForGridmix.listGridmixJobIDs(jtClient.getClient(),
+ gridmixJS.getGridmixJobCount());
+ return jobids;
+ }
+
+ /**
+ * get the trace file based on given regular expression.
+ * @param regExp - trace file file pattern.
+ * @return - trace file as string.
+ * @throws IOException - if an I/O error occurs.
+ */
+ public static String getTraceFile(String regExp) throws IOException {
+ List<String> listTraces = UtilsForGridmix.listMRTraces(
+ rtClient.getDaemonConf());
+ Iterator<String> ite = listTraces.iterator();
+ while(ite.hasNext()) {
+ String traceFile = ite.next();
+ if (traceFile.indexOf(regExp)>=0) {
+ return traceFile;
+ }
+ }
+ return null;
+ }
+}
+
diff --git a/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestGridMixDataGeneration.java b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestGridMixDataGeneration.java
new file mode 100644
index 0000000..f1501bf
--- /dev/null
+++ b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestGridMixDataGeneration.java
@@ -0,0 +1,226 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapred.gridmix;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.mapreduce.test.system.MRCluster;
+import org.apache.hadoop.mapreduce.test.system.JTProtocol;
+import org.apache.hadoop.mapreduce.test.system.JTClient;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobStatus;
+import org.apache.hadoop.mapred.gridmix.RoundRobinUserResolver;
+import org.apache.hadoop.mapred.gridmix.EchoUserResolver;
+import org.apache.hadoop.mapred.gridmix.SubmitterUserResolver;
+import org.apache.hadoop.mapred.gridmix.test.system.UtilsForGridmix;
+import org.apache.hadoop.mapred.gridmix.test.system.GridMixRunMode;
+import org.apache.hadoop.mapred.gridmix.test.system.GridMixConfig;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.ContentSummary;
+import org.junit.BeforeClass;
+import org.junit.AfterClass;
+import org.junit.Test;
+import org.junit.Assert;
+import java.io.IOException;
+
+/**
+ * Verify the Gridmix data generation with various submission policies and
+ * user resolver modes.
+ */
+public class TestGridMixDataGeneration {
+ private static final Log LOG =
+ LogFactory.getLog(TestGridMixDataGeneration.class);
+ private static Configuration conf = new Configuration();
+ private static MRCluster cluster;
+ private static JTClient jtClient;
+ private static JTProtocol rtClient;
+ private static Path gridmixDir;
+ private static int cSize;
+
+ @BeforeClass
+ public static void before() throws Exception {
+ String [] excludeExpList = {"java.net.ConnectException",
+ "java.io.IOException"};
+ cluster = MRCluster.createCluster(conf);
+ cluster.setExcludeExpList(excludeExpList);
+ cluster.setUp();
+ cSize = cluster.getTTClients().size();
+ jtClient = cluster.getJTClient();
+ rtClient = jtClient.getProxy();
+ gridmixDir = new Path("herriot-gridmix");
+ UtilsForGridmix.createDirs(gridmixDir, rtClient.getDaemonConf());
+ }
+
+ @AfterClass
+ public static void after() throws Exception {
+ UtilsForGridmix.cleanup(gridmixDir,conf);
+ cluster.tearDown();
+ }
+
+ /**
+ * Generate the data in a STRESS submission policy with SubmitterUserResolver
+ * mode and verify whether the generated data matches with given
+ * input size or not.
+ * @throws IOException
+ */
+ @Test
+ public void testGenerateDataWithSTRESSSubmission() throws Exception {
+ conf = rtClient.getDaemonConf();
+ final long inputSizeInMB = cSize * 128;
+ String [] runtimeValues = {"LOADJOB",
+ SubmitterUserResolver.class.getName(),
+ "STRESS",
+ inputSizeInMB + "m",
+ "file:///dev/null"};
+
+ String [] otherArgs = {
+ "-D", GridMixConfig.GRIDMIX_DISTCACHE_ENABLE + "=false",
+ "-D", GridMixConfig.GRIDMIX_COMPRESSION_ENABLE + "=false"
+ };
+ int exitCode =
+ UtilsForGridmix.runGridmixJob(gridmixDir, conf,
+ GridMixRunMode.DATA_GENERATION.getValue(),
+ runtimeValues, otherArgs);
+ Assert.assertEquals("Data generation has failed.", 0 , exitCode);
+ checkGeneratedDataAndJobStatus(inputSizeInMB);
+ }
+
+ /**
+ * Generate the data in a REPLAY submission policy with RoundRobinUserResolver
+ * mode and verify whether the generated data matches with the given
+ * input size or not.
+ * @throws Exception
+ */
+ @Test
+ public void testGenerateDataWithREPLAYSubmission() throws Exception {
+ conf = rtClient.getDaemonConf();
+ final long inputSizeInMB = cSize * 300;
+ String [] runtimeValues =
+ {"LOADJOB",
+ RoundRobinUserResolver.class.getName(),
+ "REPLAY",
+ inputSizeInMB +"m",
+ "file://" + UtilsForGridmix.getProxyUsersFile(conf),
+ "file:///dev/null"};
+
+ String [] otherArgs = {
+ "-D", GridMixConfig.GRIDMIX_DISTCACHE_ENABLE + "=false",
+ "-D", GridMixConfig.GRIDMIX_COMPRESSION_ENABLE + "=false"
+ };
+
+ int exitCode =
+ UtilsForGridmix.runGridmixJob(gridmixDir, conf,
+ GridMixRunMode.DATA_GENERATION.getValue(),
+ runtimeValues, otherArgs);
+ Assert.assertEquals("Data generation has failed.", 0 , exitCode);
+ checkGeneratedDataAndJobStatus(inputSizeInMB);
+ }
+
+ /**
+ * Generate the data in a SERIAL submission policy with EchoUserResolver
+ * mode and also set the no.of bytes per file in the data.Verify whether each
+ * file size matches with given per file size or not and also
+ * verify the overall size of generated data.
+ * @throws Exception
+ */
+ @Test
+ public void testGenerateDataWithSERIALSubmission() throws Exception {
+ conf = rtClient.getDaemonConf();
+ long perNodeSizeInMB = 500; // 500 mb per node data
+ final long inputSizeInMB = cSize * perNodeSizeInMB;
+ String [] runtimeValues ={"LOADJOB",
+ EchoUserResolver.class.getName(),
+ "SERIAL",
+ inputSizeInMB + "m",
+ "file:///dev/null"};
+ long bytesPerFile = 200 * 1024 * 1024; // 200 mb per file of data
+ String [] otherArgs = {
+ "-D", GridMixConfig.GRIDMIX_BYTES_PER_FILE + "=" + bytesPerFile,
+ "-D", GridMixConfig.GRIDMIX_DISTCACHE_ENABLE + "=false",
+ "-D", GridMixConfig.GRIDMIX_COMPRESSION_ENABLE + "=false"
+ };
+ int exitCode =
+ UtilsForGridmix.runGridmixJob(gridmixDir, conf,
+ GridMixRunMode.DATA_GENERATION.getValue(),
+ runtimeValues, otherArgs);
+ Assert.assertEquals("Data generation has failed.", 0 , exitCode);
+ LOG.info("Verify the eache file size in a generate data.");
+ verifyEachNodeSize(new Path(gridmixDir, "input"), perNodeSizeInMB);
+ verifyNumOfFilesGeneratedInEachNode(new Path(gridmixDir, "input"),
+ perNodeSizeInMB, bytesPerFile);
+ checkGeneratedDataAndJobStatus(inputSizeInMB);
+ }
+
+ private void checkGeneratedDataAndJobStatus(long inputSize)
+ throws IOException {
+ LOG.info("Verify the generated data size.");
+ long dataSizeInMB = getDataSizeInMB(new Path(gridmixDir,"input"));
+ Assert.assertTrue("Generate data has not matched with given size",
+ dataSizeInMB + 0.1 > inputSize || dataSizeInMB - 0.1 < inputSize);
+
+ JobClient jobClient = jtClient.getClient();
+ int len = jobClient.getAllJobs().length;
+ LOG.info("Verify the job status after completion of job.");
+ Assert.assertEquals("Job has not succeeded.", JobStatus.SUCCEEDED,
+ jobClient.getAllJobs()[len-1].getRunState());
+ }
+
+ private void verifyEachNodeSize(Path inputDir, long dataSizePerNode)
+ throws IOException {
+ FileSystem fs = inputDir.getFileSystem(conf);
+ FileStatus [] fstatus = fs.listStatus(inputDir);
+ for (FileStatus fstat : fstatus) {
+ if ( fstat.isDirectory()) {
+ long fileSize = getDataSizeInMB(fstat.getPath());
+ Assert.assertTrue("The Size has not matched with given "
+ + "per node file size(" + dataSizePerNode +"MB)",
+ fileSize + 0.1 > dataSizePerNode
+ || fileSize - 0.1 < dataSizePerNode);
+ }
+ }
+ }
+
+ private void verifyNumOfFilesGeneratedInEachNode(Path inputDir,
+ long nodeSize, long fileSize) throws IOException {
+ long fileCount = nodeSize/fileSize;
+ long expFileCount = Math.round(fileCount);
+ expFileCount = expFileCount + ((nodeSize%fileSize != 0)? 1:0);
+ FileSystem fs = inputDir.getFileSystem(conf);
+ FileStatus [] fstatus = fs.listStatus(inputDir);
+ for (FileStatus fstat : fstatus) {
+ if ( fstat.isDirectory()) {
+ FileSystem nodeFs = fstat.getPath().getFileSystem(conf);
+ long actFileCount = nodeFs.getContentSummary(
+ fstat.getPath()).getFileCount();
+ Assert.assertEquals("File count has not matched.", expFileCount,
+ actFileCount);
+ }
+ }
+ }
+
+ private static long getDataSizeInMB(Path inputDir) throws IOException {
+ FileSystem fs = inputDir.getFileSystem(conf);
+ ContentSummary csmry = fs.getContentSummary(inputDir);
+ long dataSize = csmry.getLength();
+ dataSize = dataSize/(1024 * 1024);
+ return dataSize;
+ }
+}
diff --git a/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestGridMixFilePool.java b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestGridMixFilePool.java
new file mode 100644
index 0000000..1ad10d8
--- /dev/null
+++ b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestGridMixFilePool.java
@@ -0,0 +1,127 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapred.gridmix;
+import org.apache.commons.logging.LogFactory;
+import org.apache.commons.logging.Log;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.test.system.MRCluster;
+import org.apache.hadoop.mapreduce.test.system.JTClient;
+import org.apache.hadoop.mapreduce.test.system.JTProtocol;
+import org.apache.hadoop.mapred.gridmix.FilePool;
+import org.apache.hadoop.mapred.gridmix.test.system.UtilsForGridmix;
+import org.apache.hadoop.mapred.gridmix.test.system.GridMixRunMode;
+import org.apache.hadoop.mapred.gridmix.test.system.GridMixConfig;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.FileStatus;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.AfterClass;
+import org.junit.Test;
+import java.io.IOException;
+import java.util.ArrayList;
+
+public class TestGridMixFilePool {
+ private static final Log LOG =
+ LogFactory.getLog(TestGridMixFilePool.class);
+ private static Configuration conf = new Configuration();
+ private static MRCluster cluster;
+ private static JTProtocol remoteClient;
+ private static JTClient jtClient;
+ private static Path gridmixDir;
+ private static int clusterSize;
+
+ @BeforeClass
+ public static void before() throws Exception {
+ String [] excludeExpList = {"java.net.ConnectException",
+ "java.io.IOException"};
+ cluster = MRCluster.createCluster(conf);
+ cluster.setExcludeExpList(excludeExpList);
+ cluster.setUp();
+ jtClient = cluster.getJTClient();
+ remoteClient = jtClient.getProxy();
+ clusterSize = cluster.getTTClients().size();
+ gridmixDir = new Path("herriot-gridmix");
+ UtilsForGridmix.createDirs(gridmixDir, remoteClient.getDaemonConf());
+ }
+
+ @AfterClass
+ public static void after() throws Exception {
+ UtilsForGridmix.cleanup(gridmixDir, conf);
+ cluster.tearDown();
+ }
+
+ @Test
+ public void testFilesCountAndSizesForSpecifiedFilePool() throws Exception {
+ conf = remoteClient.getDaemonConf();
+ final long inputSizeInMB = clusterSize * 200;
+ int [] fileSizesInMB = {50, 100, 400, 50, 300, 10, 60, 40, 20 ,10 , 500};
+ long targetSize = Long.MAX_VALUE;
+ final int expFileCount = clusterSize + 4;
+ String [] runtimeValues ={"LOADJOB",
+ SubmitterUserResolver.class.getName(),
+ "STRESS",
+ inputSizeInMB + "m",
+ "file:///dev/null"};
+
+ String [] otherArgs = {
+ "-D", GridMixConfig.GRIDMIX_DISTCACHE_ENABLE + "=false",
+ "-D", GridMixConfig.GRIDMIX_COMPRESSION_ENABLE + "=false"
+ };
+
+ // Generate the input data by using gridmix framework.
+ int exitCode =
+ UtilsForGridmix.runGridmixJob(gridmixDir, conf,
+ GridMixRunMode.DATA_GENERATION.getValue(),
+ runtimeValues, otherArgs);
+ Assert.assertEquals("Data generation has failed.", 0 , exitCode);
+ // Create the files without using gridmix input generation with
+ // above mentioned sizes in a array.
+ createFiles(new Path(gridmixDir, "input"), fileSizesInMB);
+ conf.setLong(FilePool.GRIDMIX_MIN_FILE, 100 * 1024 * 1024);
+ FilePool fpool = new FilePool(conf, new Path(gridmixDir, "input"));
+ fpool.refresh();
+ verifyFilesSizeAndCountForSpecifiedPool(expFileCount, targetSize, fpool);
+ }
+
+ private void createFiles(Path inputDir, int [] fileSizes)
+ throws Exception {
+ for (int size : fileSizes) {
+ UtilsForGridmix.createFile(size, inputDir, conf);
+ }
+ }
+
+ private void verifyFilesSizeAndCountForSpecifiedPool(int expFileCount,
+ long minFileSize, FilePool pool) throws IOException {
+ final ArrayList<FileStatus> files = new ArrayList<FileStatus>();
+ long filesSizeInBytes = pool.getInputFiles(minFileSize, files);
+ long actFilesSizeInMB = filesSizeInBytes / (1024 * 1024);
+ long expFilesSizeInMB = (clusterSize * 200) + 1300;
+ Assert.assertEquals("Files Size has not matched for specified pool.",
+ expFilesSizeInMB, actFilesSizeInMB);
+ int actFileCount = files.size();
+ Assert.assertEquals("File count has not matched.", expFileCount,
+ actFileCount);
+ int count = 0;
+ for (FileStatus fstat : files) {
+ String fp = fstat.getPath().toString();
+ count = count + ((fp.indexOf("datafile_") > 0)? 0 : 1);
+ }
+ Assert.assertEquals("Total folders are not matched with cluster size",
+ clusterSize, count);
+ }
+}
diff --git a/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestGridmixWith10minTrace.java b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestGridmixWith10minTrace.java
new file mode 100644
index 0000000..c48a746
--- /dev/null
+++ b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestGridmixWith10minTrace.java
@@ -0,0 +1,66 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapred.gridmix;
+
+import org.apache.commons.logging.LogFactory;
+import org.apache.commons.logging.Log;
+import org.apache.hadoop.mapred.gridmix.test.system.UtilsForGridmix;
+import org.apache.hadoop.mapred.gridmix.test.system.GridMixConfig;
+import org.junit.Test;
+
+/**
+ * Run the Gridmix with 10 minutes MR jobs trace and
+ * verify each job history against the corresponding job story
+ * in a given trace file.
+ */
+public class TestGridmixWith10minTrace extends GridmixSystemTestCase {
+ private static final Log LOG =
+ LogFactory.getLog(TestGridmixWith10minTrace.class);
+
+ /**
+ * Generate data and run gridmix by sleep jobs with STRESS submission
+ * policy in a RoundRobinUserResolver mode against 10 minutes trace file.
+ * Verify each Gridmix job history with a corresponding job story
+ * in a trace file after completion of all the jobs execution.
+ * @throws Exception - if an error occurs.
+ */
+ @Test
+ public void testGridmixWith10minTrace() throws Exception {
+ final long inputSizeInMB = cSize * 250;
+ final long minFileSize = 200 * 1024 * 1024;
+ String [] runtimeValues =
+ {"SLEEPJOB",
+ RoundRobinUserResolver.class.getName(),
+ "SERIAL",
+ inputSizeInMB + "m",
+ "file://" + UtilsForGridmix.getProxyUsersFile(conf),
+ map.get("10m")};
+
+ String [] otherArgs = {
+ "-D", GridMixConfig.GRIDMIX_DISTCACHE_ENABLE + "=false",
+ "-D", GridMixConfig.GRIDMIX_COMPRESSION_ENABLE + "=false",
+ "-D", GridMixConfig.GRIDMIX_MINIMUM_FILE_SIZE + "=" + minFileSize,
+ "-D", GridMixConfig.GRIDMIX_JOB_SUBMISSION_QUEUE_IN_TRACE + "=false",
+ "-D", GridMixConfig.GRIDMIX_SLEEPJOB_MAPTASK_ONLY + "=true",
+ "-D", GridMixConfig.GRIDMIX_SLEEP_MAP_MAX_TIME + "=10"
+ };
+ String tracePath = map.get("10m");
+ runGridmixAndVerify(runtimeValues, otherArgs,tracePath);
+ }
+}
diff --git a/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestGridmixWith12minTrace.java b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestGridmixWith12minTrace.java
new file mode 100644
index 0000000..ec2a137
--- /dev/null
+++ b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestGridmixWith12minTrace.java
@@ -0,0 +1,61 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapred.gridmix;
+
+import org.apache.commons.logging.LogFactory;
+import org.apache.commons.logging.Log;
+import org.apache.hadoop.mapred.gridmix.test.system.GridMixConfig;
+import org.junit.Test;
+
+/**
+ * Run the Gridmix with 12 minutes MR job traces and
+ * verify each job history against the corresponding job story
+ * in a given trace file.
+ */
+public class TestGridmixWith12minTrace extends GridmixSystemTestCase {
+ private static final Log LOG =
+ LogFactory.getLog(TestGridmixWith12minTrace.class);
+
+ /**
+ * Generate data and run gridmix sleep jobs with REPLAY submission
+ * policy in a SubmitterUserResolver mode against 12 minutes trace file.
+ * Verify each Gridmix job history with a corresponding job story
+ * in a trace file after completion of all the jobs execution.
+ * @throws Exception - if an error occurs.
+ */
+ @Test
+ public void testGridmixWith12minTrace() throws Exception {
+ final long inputSizeInMB = cSize * 150;
+ String [] runtimeValues = {"SLEEPJOB",
+ SubmitterUserResolver.class.getName(),
+ "REPLAY",
+ inputSizeInMB + "m",
+ map.get("12m")};
+
+ String [] otherArgs = {
+ "-D", GridMixConfig.GRIDMIX_DISTCACHE_ENABLE + "=false",
+ "-D", GridMixConfig.GRIDMIX_COMPRESSION_ENABLE + "=false",
+ "-D", GridMixConfig.GRIDMIX_SLEEP_MAP_MAX_TIME + "=10",
+ "-D", GridMixConfig.GRIDMIX_SLEEP_REDUCE_MAX_TIME + "=5"
+ };
+
+ String tracePath = map.get("12m");
+ runGridmixAndVerify(runtimeValues, otherArgs, tracePath);
+ }
+}
diff --git a/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestGridmixWith1minTrace.java b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestGridmixWith1minTrace.java
new file mode 100644
index 0000000..ed26484
--- /dev/null
+++ b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestGridmixWith1minTrace.java
@@ -0,0 +1,58 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapred.gridmix;
+
+import org.apache.commons.logging.LogFactory;
+import org.apache.commons.logging.Log;
+import org.junit.Test;
+import org.apache.hadoop.mapred.gridmix.test.system.GridMixConfig;
+
+/**
+ * Run the Gridmix with 1 minute MR jobs trace and
+ * verify each job history against the corresponding job story
+ * in a given trace file.
+ */
+public class TestGridmixWith1minTrace extends GridmixSystemTestCase{
+ private static final Log LOG =
+ LogFactory.getLog(TestGridmixWith1minTrace.class);
+
+ /**
+ * Generate data and run gridmix by load job with STRESS submission policy
+ * in a SubmitterUserResolver mode against 1 minute trace file.
+ * Verify each Gridmix job history with a corresponding job story in the
+ * trace after completion of all the jobs execution.
+ * @throws Exception - if an error occurs.
+ */
+ @Test
+ public void testGridmixWith1minTrace() throws Exception {
+ final long inputSizeInMB = cSize * 400;
+ String [] runtimeValues = {"LOADJOB",
+ SubmitterUserResolver.class.getName(),
+ "STRESS",
+ inputSizeInMB + "m",
+ map.get("1m")};
+
+ String [] otherArgs = {
+ "-D", GridMixConfig.GRIDMIX_DISTCACHE_ENABLE + "=false",
+ "-D", GridMixConfig.GRIDMIX_COMPRESSION_ENABLE + "=false"
+ };
+
+ String tracePath = map.get("1m");
+ runGridmixAndVerify(runtimeValues, otherArgs, tracePath);
+ }
+}
diff --git a/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestGridmixWith2minStreamingJobTrace.java b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestGridmixWith2minStreamingJobTrace.java
new file mode 100644
index 0000000..9628dd2
--- /dev/null
+++ b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestGridmixWith2minStreamingJobTrace.java
@@ -0,0 +1,63 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapred.gridmix;
+
+import org.apache.commons.logging.LogFactory;
+import org.apache.commons.logging.Log;
+import org.apache.hadoop.mapred.gridmix.test.system.GridMixConfig;
+import org.junit.Test;
+import org.junit.Assert;
+
+/**
+ * Run the Gridmix with 2 minutes job trace which has been generated with
+ * streaming jobs histories and verify each job history against
+ * the corresponding job story in a given trace file.
+ */
+public class TestGridmixWith2minStreamingJobTrace
+ extends GridmixSystemTestCase {
+ private static final Log LOG =
+ LogFactory.getLog("TestGridmixWith2minStreamingJobTrace.class");
+
+ /**
+ * Generate input data and run Gridmix by load job with STRESS submission
+ * policy in a SubmitterUserResolver mode against 2 minutes job
+ * trace file of streaming jobs. Verify each Gridmix job history with
+ * a corresponding job story in a trace file after completion of all
+ * the jobs execution.
+ * @throws Exception - if an error occurs.
+ */
+ @Test
+ public void testGridmixWith2minStreamJobTrace() throws Exception {
+ final long inputSizeInMB = cSize * 250;
+ final long minFileSize = 150 * 1024 * 1024;
+ String tracePath = getTraceFile("2m_stream");
+ Assert.assertNotNull("Trace file has not found.", tracePath);
+ String [] runtimeValues = {"LOADJOB",
+ SubmitterUserResolver.class.getName(),
+ "STRESS",
+ inputSizeInMB + "m",
+ tracePath};
+ String [] otherArgs = {
+ "-D", GridMixConfig.GRIDMIX_JOB_SUBMISSION_QUEUE_IN_TRACE + "=true",
+ "-D", GridMixConfig.GRIDMIX_MINIMUM_FILE_SIZE + "=" + minFileSize,
+ "-D", GridMixConfig.GRIDMIX_DISTCACHE_ENABLE + "=false",
+ "-D", GridMixConfig.GRIDMIX_COMPRESSION_ENABLE + "=false"
+ };
+ runGridmixAndVerify(runtimeValues, otherArgs, tracePath);
+ }
+}
diff --git a/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestGridmixWith3minStreamingJobTrace.java b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestGridmixWith3minStreamingJobTrace.java
new file mode 100644
index 0000000..926f795
--- /dev/null
+++ b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestGridmixWith3minStreamingJobTrace.java
@@ -0,0 +1,67 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapred.gridmix;
+
+import org.apache.commons.logging.LogFactory;
+import org.apache.commons.logging.Log;
+import org.apache.hadoop.mapred.gridmix.test.system.GridMixConfig;
+import org.apache.hadoop.mapred.gridmix.test.system.UtilsForGridmix;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * Run the Gridmix with 3 minutes job trace which has been generated with
+ * streaming jobs histories and verify each job history against
+ * corresponding job story in a given trace file.
+ */
+public class TestGridmixWith3minStreamingJobTrace
+ extends GridmixSystemTestCase {
+ private static final Log LOG =
+ LogFactory.getLog("TestGridmixWith3minStreamingJobTrace.class");
+
+ /**
+ * Generate input data and run gridmix by load job with REPLAY submission
+ * policy in a RoundRobinUserResolver mode against 3 minutes job trace file
+ * of streaming job. Verify each gridmix job history with a corresponding
+ * job story in a trace file after completion of all the jobs execution.
+ * @throws Exception - if an error occurs.
+ */
+ @Test
+ public void testGridmixWith3minStreamJobTrace() throws Exception {
+ final long inputSizeInMB = cSize * 200;
+ final long bytesPerFile = 150 * 1024 * 1024;
+ String tracePath = getTraceFile("3m_stream");
+ Assert.assertNotNull("Trace file has not found.", tracePath);
+ String [] runtimeValues =
+ {"LOADJOB",
+ RoundRobinUserResolver.class.getName(),
+ "REPLAY",
+ inputSizeInMB + "m",
+ "file://" + UtilsForGridmix.getProxyUsersFile(conf),
+ tracePath};
+
+ String [] otherArgs = {
+ "-D", GridMixConfig.GRIDMIX_JOB_SUBMISSION_QUEUE_IN_TRACE + "=true",
+ "-D", GridMixConfig.GRIDMIX_BYTES_PER_FILE + "=" + bytesPerFile,
+ "-D", GridMixConfig.GRIDMIX_DISTCACHE_ENABLE + "=false",
+ "-D", GridMixConfig.GRIDMIX_COMPRESSION_ENABLE + "=false"
+ };
+
+ runGridmixAndVerify(runtimeValues, otherArgs, tracePath);
+ }
+}
diff --git a/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestGridmixWith3minTrace.java b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestGridmixWith3minTrace.java
new file mode 100644
index 0000000..bed33d0
--- /dev/null
+++ b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestGridmixWith3minTrace.java
@@ -0,0 +1,61 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapred.gridmix;
+
+import org.apache.commons.logging.LogFactory;
+import org.apache.commons.logging.Log;
+import org.apache.hadoop.mapred.gridmix.test.system.UtilsForGridmix;
+import org.apache.hadoop.mapred.gridmix.test.system.GridMixConfig;
+import org.junit.Test;
+
+/**
+ * Run the Gridmix with 3 minutes MR jobs trace and
+ * verify each job history against the corresponding job story
+ * in a given trace file.
+ */
+public class TestGridmixWith3minTrace extends GridmixSystemTestCase {
+ private static final Log LOG =
+ LogFactory.getLog(TestGridmixWith3minTrace.class);
+
+ /**
+ * Generate data and run gridmix by load job with REPLAY submission
+ * policy in a RoundRobinUserResolver mode by using 3 minutes trace file.
+ * Verify each Gridmix job history with a corresponding job story in
+ * a trace after completion of all the jobs execution.
+ * @throws Exception - if an error occurs.
+ */
+ @Test
+ public void testGridmixWith3minTrace() throws Exception {
+ final long inputSizeInMB = cSize * 200;
+ String [] runtimeValues =
+ {"LOADJOB",
+ RoundRobinUserResolver.class.getName(),
+ "REPLAY",
+ inputSizeInMB + "m",
+ "file://" + UtilsForGridmix.getProxyUsersFile(conf),
+ map.get("3m")};
+
+ String [] otherArgs = {
+ "-D", GridMixConfig.GRIDMIX_DISTCACHE_ENABLE + "=false",
+ "-D", GridMixConfig.GRIDMIX_COMPRESSION_ENABLE + "=false"
+ };
+
+ String tracePath = map.get("3m");
+ runGridmixAndVerify(runtimeValues, otherArgs, tracePath);
+ }
+}
diff --git a/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestGridmixWith5minStreamingJobTrace.java b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestGridmixWith5minStreamingJobTrace.java
new file mode 100644
index 0000000..370f120
--- /dev/null
+++ b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestGridmixWith5minStreamingJobTrace.java
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapred.gridmix;
+
+import org.apache.commons.logging.LogFactory;
+import org.apache.commons.logging.Log;
+import org.apache.hadoop.mapred.gridmix.test.system.GridMixConfig;
+import org.junit.Test;
+import org.junit.Assert;
+
+/**
+ * Run the Gridmix with 5 minutes job trace which has been generated with
+ * streaming jobs histories and verify each job history against
+ * corresponding job story in a given trace file.
+ */
+public class TestGridmixWith5minStreamingJobTrace
+ extends GridmixSystemTestCase {
+ private static final Log LOG =
+ LogFactory.getLog("TestGridmixWith5minStreamingJobTrace.class");
+
+ /**
+ * Generate input data and run gridmix by load job with SERIAL submission
+ * policy in a SubmitterUserResolver mode against 5 minutes job trace file
+ * of streaming job. Verify each gridmix job history with a corresponding
+ * job story in a trace file after completion of all the jobs execution.
+ * @throws Exception - if an error occurs.
+ */
+ @Test
+ public void testGridmixWith5minStreamJobTrace() throws Exception {
+ String tracePath = getTraceFile("5m_stream");
+ Assert.assertNotNull("Trace file has not found.", tracePath);
+ final long inputSizeInMB = cSize * 200;
+ final long bytesPerFile = 150 * 1024 * 1024;
+ String [] runtimeValues = {"LOADJOB",
+ SubmitterUserResolver.class.getName(),
+ "SERIAL",
+ inputSizeInMB + "m",
+ tracePath};
+
+ String [] otherArgs = {
+ "-D", GridMixConfig.GRIDMIX_KEY_FRC + "=0.5f",
+ "-D", GridMixConfig.GRIDMIX_BYTES_PER_FILE + "=" + bytesPerFile,
+ "-D", GridMixConfig.GRIDMIX_DISTCACHE_ENABLE + "=false",
+ "-D", GridMixConfig.GRIDMIX_COMPRESSION_ENABLE + "=false"
+ };
+
+ runGridmixAndVerify(runtimeValues, otherArgs, tracePath);
+ }
+}
diff --git a/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestGridmixWith5minTrace.java b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestGridmixWith5minTrace.java
new file mode 100644
index 0000000..5a141d4
--- /dev/null
+++ b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestGridmixWith5minTrace.java
@@ -0,0 +1,61 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapred.gridmix;
+
+import org.apache.commons.logging.LogFactory;
+import org.apache.commons.logging.Log;
+import org.apache.hadoop.mapred.gridmix.test.system.GridMixConfig;
+import org.junit.Test;
+
+/**
+ * Run the Gridmix with 5 minutes MR jobs trace and
+ * verify each job history against the corresponding job story
+ * in a given trace file.
+ */
+public class TestGridmixWith5minTrace extends GridmixSystemTestCase {
+ private static final Log LOG =
+ LogFactory.getLog(TestGridmixWith5minTrace.class);
+
+ /**
+ * Generate data and run gridmix by load job with SERIAL submission
+ * policy in a SubmitterUserResolver mode against 5 minutes trace file.
+ * Verify each Gridmix job history with a corresponding job story
+ * in a trace file after completion of all the jobs.
+ * @throws Exception - if an error occurs.
+ */
+ @Test
+ public void testGridmixWith5minTrace() throws Exception {
+ final long inputSizeInMB = cSize * 300;
+ final long minFileSize = 100 * 1024 * 1024;
+ String [] runtimeValues ={"LOADJOB",
+ SubmitterUserResolver.class.getName(),
+ "SERIAL",
+ inputSizeInMB + "m",
+ map.get("5m")};
+
+ String [] otherArgs = {
+ "-D", GridMixConfig.GRIDMIX_DISTCACHE_ENABLE + "=false",
+ "-D", GridMixConfig.GRIDMIX_COMPRESSION_ENABLE + "=false",
+ "-D", GridMixConfig.GRIDMIX_MINIMUM_FILE_SIZE + "=" + minFileSize
+ };
+
+ String tracePath = map.get("5m");
+ runGridmixAndVerify(runtimeValues, otherArgs, tracePath);
+ }
+}
+
diff --git a/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestGridmixWith7minTrace.java b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestGridmixWith7minTrace.java
new file mode 100644
index 0000000..0791d68
--- /dev/null
+++ b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestGridmixWith7minTrace.java
@@ -0,0 +1,61 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapred.gridmix;
+
+import org.apache.commons.logging.LogFactory;
+import org.apache.commons.logging.Log;
+import org.apache.hadoop.mapred.gridmix.test.system.GridMixConfig;
+import org.junit.Test;
+
+/**
+ * Run the Gridmix with 7 minutes MR jobs trace and
+ * verify each job history against the corresponding job story
+ * in a given trace file.
+ */
+public class TestGridmixWith7minTrace extends GridmixSystemTestCase {
+ private static final Log LOG =
+ LogFactory.getLog(TestGridmixWith7minTrace.class);
+
+ /**
+ * Generate data and run gridmix by sleep job with STRESS submission
+ * policy in a SubmitterUserResolver mode against 7 minute trace file.
+ * Verify each Gridmix job history with a corresponding job story
+ * in a trace file after completion of all the jobs execution.
+ * @throws Exception - if an error occurs.
+ */
+ @Test
+ public void testGridmixWith7minTrace() throws Exception {
+ final long inputSizeInMB = cSize * 400;
+ final long minFileSize = 200 * 1024 * 1024;
+ String [] runtimeValues ={"SLEEPJOB",
+ SubmitterUserResolver.class.getName(),
+ "STRESS",
+ inputSizeInMB + "m",
+ map.get("7m")};
+
+ String [] otherArgs = {
+ "-D", GridMixConfig.GRIDMIX_DISTCACHE_ENABLE + "=false",
+ "-D", GridMixConfig.GRIDMIX_COMPRESSION_ENABLE + "=false",
+ "-D", GridMixConfig.GRIDMIX_MINIMUM_FILE_SIZE + "=" + minFileSize,
+ "-D", GridMixConfig.GRIDMIX_JOB_SUBMISSION_QUEUE_IN_TRACE + "=false"
+ };
+ String tracePath = map.get("7m");
+ runGridmixAndVerify(runtimeValues, otherArgs, tracePath);
+ }
+}
diff --git a/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/test/system/GridMixConfig.java b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/test/system/GridMixConfig.java
new file mode 100644
index 0000000..29adaf1
--- /dev/null
+++ b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/test/system/GridMixConfig.java
@@ -0,0 +1,125 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapred.gridmix.test.system;
+
+import org.apache.hadoop.mapred.gridmix.Gridmix;
+import org.apache.hadoop.mapred.gridmix.JobCreator;
+import org.apache.hadoop.mapred.gridmix.SleepJob;
+
+public class GridMixConfig {
+
+ /**
+ * Gridmix original job id.
+ */
+ public static final String GRIDMIX_ORIGINAL_JOB_ID = Gridmix.ORIGINAL_JOB_ID;
+
+ /**
+ * Gridmix output directory.
+ */
+ public static final String GRIDMIX_OUTPUT_DIR = Gridmix.GRIDMIX_OUT_DIR;
+
+ /**
+ * Gridmix job type (LOADJOB/SLEEPJOB).
+ */
+ public static final String GRIDMIX_JOB_TYPE = JobCreator.GRIDMIX_JOB_TYPE;
+
+ /**
+ * Gridmix submission use queue.
+ */
+ /* In Gridmix package the visibility of below mentioned
+ properties are protected and it have not visible outside
+ the package. However,it should required for system tests,
+ so it's re-defining in system tests config file.*/
+ public static final String GRIDMIX_JOB_SUBMISSION_QUEUE_IN_TRACE =
+ "gridmix.job-submission.use-queue-in-trace";
+
+ /**
+ * Gridmix user resolver(RoundRobinUserResolver/
+ * SubmitterUserResolver/EchoUserResolver).
+ */
+ public static final String GRIDMIX_USER_RESOLVER = Gridmix.GRIDMIX_USR_RSV;
+
+ /**
+ * Gridmix queue depth.
+ */
+ public static final String GRIDMIX_QUEUE_DEPTH = Gridmix.GRIDMIX_QUE_DEP;
+
+ /* In Gridmix package the visibility of below mentioned
+ property is protected and it should not available for
+ outside the package. However,it should required for
+ system tests, so it's re-defining in system tests config file.*/
+ /**
+ * Gridmix generate bytes per file.
+ */
+ public static final String GRIDMIX_BYTES_PER_FILE =
+ "gridmix.gen.bytes.per.file";
+
+ /**
+ * Gridmix job submission policy(STRESS/REPLAY/SERIAL).
+ */
+
+ public static final String GRIDMIX_SUBMISSION_POLICY =
+ "gridmix.job-submission.policy";
+
+ /**
+ * Gridmix minimum file size.
+ */
+ public static final String GRIDMIX_MINIMUM_FILE_SIZE =
+ "gridmix.min.file.size";
+
+ /**
+ * Gridmix key fraction.
+ */
+ public static final String GRIDMIX_KEY_FRC =
+ "gridmix.key.fraction";
+
+ /**
+ * Gridmix compression enable
+ */
+ public static final String GRIDMIX_COMPRESSION_ENABLE =
+ "gridmix.compression-emulation.enable";
+ /**
+ * Gridmix distcache enable
+ */
+ public static final String GRIDMIX_DISTCACHE_ENABLE =
+ "gridmix.distributed-cache-emulation.enable";
+
+ /**
+ * Gridmix logger mode.
+ */
+ public static final String GRIDMIX_LOG_MODE =
+ "log4j.logger.org.apache.hadoop.mapred.gridmix";
+
+ /**
+ * Gridmix sleep job map task only.
+ */
+ public static final String GRIDMIX_SLEEPJOB_MAPTASK_ONLY =
+ SleepJob.SLEEPJOB_MAPTASK_ONLY;
+
+ /**
+ * Gridmix sleep map maximum time.
+ */
+ public static final String GRIDMIX_SLEEP_MAP_MAX_TIME =
+ SleepJob.GRIDMIX_SLEEP_MAX_MAP_TIME;
+
+ /**
+ * Gridmix sleep reduce maximum time.
+ */
+ public static final String GRIDMIX_SLEEP_REDUCE_MAX_TIME =
+ SleepJob.GRIDMIX_SLEEP_MAX_REDUCE_TIME;
+}
diff --git a/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/test/system/GridMixRunMode.java b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/test/system/GridMixRunMode.java
new file mode 100644
index 0000000..0abfc5c
--- /dev/null
+++ b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/test/system/GridMixRunMode.java
@@ -0,0 +1,34 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapred.gridmix.test.system;
+/**
+ * Gridmix run modes.
+ *
+ */
+public enum GridMixRunMode {
+ DATA_GENERATION(1), RUN_GRIDMIX(2), DATA_GENERATION_AND_RUN_GRIDMIX(3);
+ private int mode;
+
+ GridMixRunMode (int mode) {
+ this.mode = mode;
+ }
+
+ public int getValue() {
+ return mode;
+ }
+}
diff --git a/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/test/system/GridmixJobStory.java b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/test/system/GridmixJobStory.java
new file mode 100644
index 0000000..ad00f0d
--- /dev/null
+++ b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/test/system/GridmixJobStory.java
@@ -0,0 +1,86 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapred.gridmix.test.system;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.JobID;
+import org.apache.hadoop.tools.rumen.ZombieJobProducer;
+import org.apache.hadoop.tools.rumen.ZombieJob;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/**
+ * Build the job stories with a given trace file.
+ */
+public class GridmixJobStory {
+ private static Log LOG = LogFactory.getLog(GridmixJobStory.class);
+ private Path path;
+ private Map<JobID, ZombieJob> zombieJobs;
+ private Configuration conf;
+
+ public GridmixJobStory(Path path, Configuration conf) {
+ this.path = path;
+ this.conf = conf;
+ try {
+ zombieJobs = buildJobStories();
+ if(zombieJobs == null) {
+ throw new NullPointerException("No jobs found in a "
+ + " given trace file.");
+ }
+ } catch (IOException ioe) {
+ LOG.warn("Error:" + ioe.getMessage());
+ } catch (NullPointerException npe) {
+ LOG.warn("Error:" + npe.getMessage());
+ }
+ }
+
+ /**
+ * Get the zombie jobs as a map.
+ * @return the zombie jobs map.
+ */
+ public Map<JobID, ZombieJob> getZombieJobs() {
+ return zombieJobs;
+ }
+
+ /**
+ * Get the zombie job of a given job id.
+ * @param jobId - gridmix job id.
+ * @return - the zombie job object.
+ */
+ public ZombieJob getZombieJob(JobID jobId) {
+ return zombieJobs.get(jobId);
+ }
+
+ private Map<JobID, ZombieJob> buildJobStories() throws IOException {
+ ZombieJobProducer zjp = new ZombieJobProducer(path,null, conf);
+ Map<JobID, ZombieJob> hm = new HashMap<JobID, ZombieJob>();
+ ZombieJob zj = zjp.getNextJob();
+ while (zj != null) {
+ hm.put(zj.getJobID(),zj);
+ zj = zjp.getNextJob();
+ }
+ if (hm.size() == 0) {
+ return null;
+ } else {
+ return hm;
+ }
+ }
+}
diff --git a/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/test/system/GridmixJobSubmission.java b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/test/system/GridmixJobSubmission.java
new file mode 100644
index 0000000..6a5699e
--- /dev/null
+++ b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/test/system/GridmixJobSubmission.java
@@ -0,0 +1,82 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapred.gridmix.test.system;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.test.system.JTClient;
+import org.junit.Assert;
+
+/**
+ * Submit the gridmix jobs.
+ */
+public class GridmixJobSubmission {
+ private static final Log LOG =
+ LogFactory.getLog(GridmixJobSubmission.class);
+ private int gridmixJobCount;
+ private Configuration conf;
+ private Path gridmixDir;
+ private JTClient jtClient;
+
+ public GridmixJobSubmission(Configuration conf, JTClient jtClient ,
+ Path gridmixDir) {
+ this.conf = conf;
+ this.jtClient = jtClient;
+ this.gridmixDir = gridmixDir;
+ }
+
+ /**
+ * Submit the gridmix jobs.
+ * @param runtimeArgs - gridmix common runtime arguments.
+ * @param otherArgs - gridmix other runtime arguments.
+ * @param traceInterval - trace time interval.
+ * @throws Exception
+ */
+ public void submitJobs(String [] runtimeArgs,
+ String [] otherArgs, int mode) throws Exception {
+ int prvJobCount = jtClient.getClient().getAllJobs().length;
+ int exitCode = -1;
+ if (otherArgs == null) {
+ exitCode = UtilsForGridmix.runGridmixJob(gridmixDir, conf,
+ mode, runtimeArgs);
+ } else {
+ exitCode = UtilsForGridmix.runGridmixJob(gridmixDir, conf, mode,
+ runtimeArgs, otherArgs);
+ }
+ Assert.assertEquals("Gridmix jobs have failed.", 0 , exitCode);
+ gridmixJobCount = jtClient.getClient().getAllJobs().length - prvJobCount;
+ }
+
+ /**
+ * Get the submitted jobs count.
+ * @return count of no. of jobs submitted for a trace.
+ */
+ public int getGridmixJobCount() {
+ return gridmixJobCount;
+ }
+
+ /**
+ * Get the job configuration.
+ * @return Configuration of a submitted job.
+ */
+ public Configuration getJobConf() {
+ return conf;
+ }
+}
diff --git a/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/test/system/GridmixJobVerification.java b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/test/system/GridmixJobVerification.java
new file mode 100644
index 0000000..1083a83
--- /dev/null
+++ b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/test/system/GridmixJobVerification.java
@@ -0,0 +1,368 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapred.gridmix.test.system;
+
+import java.io.IOException;
+import java.io.File;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.Counters;
+import org.apache.hadoop.mapreduce.Counter;
+import org.apache.hadoop.mapreduce.CounterGroup;
+import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser;
+import org.apache.hadoop.mapreduce.JobID;
+import org.apache.hadoop.mapreduce.TaskType;
+import org.apache.hadoop.mapreduce.test.system.JTClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.tools.rumen.LoggedJob;
+import org.apache.hadoop.tools.rumen.ZombieJob;
+import org.apache.hadoop.tools.rumen.TaskInfo;
+import org.junit.Assert;
+import java.text.ParseException;
+import org.apache.hadoop.security.UserGroupInformation;
+
+/**
+ * Verifying each Gridmix job with corresponding job story in a trace file.
+ */
+public class GridmixJobVerification {
+
+ private static Log LOG = LogFactory.getLog(GridmixJobVerification.class);
+ private Path path;
+ private Configuration conf;
+ private JTClient jtClient;
+ private String userResolverVal;
+ static final String origJobIdKey = GridMixConfig.GRIDMIX_ORIGINAL_JOB_ID;
+ static final String jobSubKey = GridMixConfig.GRIDMIX_SUBMISSION_POLICY;
+ static final String jobTypeKey = GridMixConfig.GRIDMIX_JOB_TYPE;
+ static final String mapTaskKey = GridMixConfig.GRIDMIX_SLEEPJOB_MAPTASK_ONLY;
+ static final String usrResolver = GridMixConfig.GRIDMIX_USER_RESOLVER;
+
+ /**
+ * Gridmix job verification constructor
+ * @param path - path of the gridmix output directory.
+ * @param conf - cluster configuration.
+ * @param jtClient - jobtracker client.
+ */
+ public GridmixJobVerification(Path path, Configuration conf,
+ JTClient jtClient) {
+ this.path = path;
+ this.conf = conf;
+ this.jtClient = jtClient;
+ }
+
+ /**
+ * It verifies the Gridmix jobs with corresponding job story in a trace file.
+ * @param jobids - gridmix job ids.
+ * @throws IOException - if an I/O error occurs.
+ * @throws ParseException - if an parse error occurs.
+ */
+ public void verifyGridmixJobsWithJobStories(List<JobID> jobids)
+ throws IOException, ParseException {
+
+ SortedMap <Long, String> origSubmissionTime = new TreeMap <Long, String>();
+ SortedMap <Long, String> simuSubmissionTime = new TreeMap<Long, String>();
+ GridmixJobStory gjs = new GridmixJobStory(path, conf);
+ final Iterator<JobID> ite = jobids.iterator();
+ File destFolder = new File(System.getProperty("java.io.tmpdir")
+ + "/gridmix-st/");
+ destFolder.mkdir();
+ while (ite.hasNext()) {
+ JobID simuJobId = ite.next();
+ JobHistoryParser.JobInfo jhInfo = getSimulatedJobHistory(simuJobId);
+ Assert.assertNotNull("Job history not found.", jhInfo);
+ Counters counters = jhInfo.getTotalCounters();
+ JobConf simuJobConf = getSimulatedJobConf(simuJobId, destFolder);
+ String origJobId = simuJobConf.get(origJobIdKey);
+ LOG.info("OriginalJobID<->CurrentJobID:"
+ + origJobId + "<->" + simuJobId);
+
+ if (userResolverVal == null) {
+ userResolverVal = simuJobConf.get(usrResolver);
+ }
+ ZombieJob zombieJob = gjs.getZombieJob(JobID.forName(origJobId));
+ Map<String, Long> mapJobCounters = getJobMapCounters(zombieJob);
+ Map<String, Long> reduceJobCounters = getJobReduceCounters(zombieJob);
+ if (simuJobConf.get(jobSubKey).contains("REPLAY")) {
+ origSubmissionTime.put(zombieJob.getSubmissionTime(),
+ origJobId.toString() + "^" + simuJobId);
+ simuSubmissionTime.put(jhInfo.getSubmitTime() ,
+ origJobId.toString() + "^" + simuJobId); ;
+ }
+
+ LOG.info("Verifying the job <" + simuJobId + "> and wait for a while...");
+ verifySimulatedJobSummary(zombieJob, jhInfo, simuJobConf);
+ verifyJobMapCounters(counters, mapJobCounters, simuJobConf);
+ verifyJobReduceCounters(counters, reduceJobCounters, simuJobConf);
+ LOG.info("Done.");
+ }
+ }
+
+ /**
+ * Verify the job subimssion order between the jobs in replay mode.
+ * @param origSubmissionTime - sorted map of original jobs submission times.
+ * @param simuSubmissionTime - sorted map of simulated jobs submission times.
+ */
+ public void verifyJobSumissionTime(SortedMap<Long, String> origSubmissionTime,
+ SortedMap<Long, String> simuSubmissionTime) {
+ Assert.assertEquals("Simulated job's submission time count has "
+ + "not match with Original job's submission time count.",
+ origSubmissionTime.size(), simuSubmissionTime.size());
+ for ( int index = 0; index < origSubmissionTime.size(); index ++) {
+ String origAndSimuJobID = origSubmissionTime.get(index);
+ String simuAndorigJobID = simuSubmissionTime.get(index);
+ Assert.assertEquals("Simulated jobs have not submitted in same "
+ + "order as original jobs submitted in REPLAY mode.",
+ origAndSimuJobID, simuAndorigJobID);
+ }
+ }
+
+ /**
+ * It verifies the simulated job map counters.
+ * @param counters - Original job map counters.
+ * @param mapJobCounters - Simulated job map counters.
+ * @param jobConf - Simulated job configuration.
+ * @throws ParseException - If an parser error occurs.
+ */
+ public void verifyJobMapCounters(Counters counters,
+ Map<String,Long> mapCounters, JobConf jobConf) throws ParseException {
+ if (!jobConf.get(jobTypeKey, "LOADJOB").equals("SLEEPJOB")) {
+ Assert.assertEquals("Map input records have not matched.",
+ mapCounters.get("MAP_INPUT_RECS").longValue(),
+ getCounterValue(counters, "MAP_INPUT_RECORDS"));
+ } else {
+ Assert.assertTrue("Map Input Bytes are zero",
+ getCounterValue(counters,"HDFS_BYTES_READ") != 0);
+ Assert.assertNotNull("Map Input Records are zero",
+ getCounterValue(counters, "MAP_INPUT_RECORDS")!=0);
+ }
+ }
+
+ /**
+ * It verifies the simulated job reduce counters.
+ * @param counters - Original job reduce counters.
+ * @param reduceCounters - Simulated job reduce counters.
+ * @param jobConf - simulated job configuration.
+ * @throws ParseException - if an parser error occurs.
+ */
+ public void verifyJobReduceCounters(Counters counters,
+ Map<String,Long> reduceCounters, JobConf jobConf) throws ParseException {
+ if (jobConf.get(jobTypeKey, "LOADJOB").equals("SLEEPJOB")) {
+ Assert.assertTrue("Reduce output records are not zero for sleep job.",
+ getCounterValue(counters, "REDUCE_OUTPUT_RECORDS") == 0);
+ Assert.assertTrue("Reduce output bytes are not zero for sleep job.",
+ getCounterValue(counters,"HDFS_BYTES_WRITTEN") == 0);
+ }
+ }
+
+ /**
+ * It verifies the gridmix simulated job summary.
+ * @param zombieJob - Original job summary.
+ * @param jhInfo - Simulated job history info.
+ * @param jobConf - simulated job configuration.
+ * @throws IOException - if an I/O error occurs.
+ */
+ public void verifySimulatedJobSummary(ZombieJob zombieJob,
+ JobHistoryParser.JobInfo jhInfo, JobConf jobConf) throws IOException {
+ Assert.assertEquals("Job id has not matched", zombieJob.getJobID(),
+ JobID.forName(jobConf.get(origJobIdKey)));
+
+ Assert.assertEquals("Job maps have not matched", zombieJob.getNumberMaps(),
+ jhInfo.getTotalMaps());
+
+ if (!jobConf.getBoolean(mapTaskKey, false)) {
+ Assert.assertEquals("Job reducers have not matched",
+ zombieJob.getNumberReduces(), jhInfo.getTotalReduces());
+ } else {
+ Assert.assertEquals("Job reducers have not matched",
+ 0, jhInfo.getTotalReduces());
+ }
+
+ Assert.assertEquals("Job status has not matched.",
+ zombieJob.getOutcome().name(),
+ convertJobStatus(jhInfo.getJobStatus()));
+
+ LoggedJob loggedJob = zombieJob.getLoggedJob();
+ Assert.assertEquals("Job priority has not matched.",
+ loggedJob.getPriority().toString(),
+ jhInfo.getPriority());
+
+ if (jobConf.get(usrResolver).contains("RoundRobin")) {
+ String user = UserGroupInformation.getLoginUser().getShortUserName();
+ Assert.assertTrue(jhInfo.getJobId().toString()
+ + " has not impersonate with other user.",
+ !jhInfo.getUsername().equals(user));
+ }
+ }
+
+ /**
+ * Get the original job map counters from a trace.
+ * @param zombieJob - Original job story.
+ * @return - map counters as a map.
+ */
+ public Map<String, Long> getJobMapCounters(ZombieJob zombieJob) {
+ long expMapInputBytes = 0;
+ long expMapOutputBytes = 0;
+ long expMapInputRecs = 0;
+ long expMapOutputRecs = 0;
+ Map<String,Long> mapCounters = new HashMap<String,Long>();
+ for (int index = 0; index < zombieJob.getNumberMaps(); index ++) {
+ TaskInfo mapTask = zombieJob.getTaskInfo(TaskType.MAP, index);
+ expMapInputBytes += mapTask.getInputBytes();
+ expMapOutputBytes += mapTask.getOutputBytes();
+ expMapInputRecs += mapTask.getInputRecords();
+ expMapOutputRecs += mapTask.getOutputRecords();
+ }
+ mapCounters.put("MAP_INPUT_BYTES", expMapInputBytes);
+ mapCounters.put("MAP_OUTPUT_BYTES", expMapOutputBytes);
+ mapCounters.put("MAP_INPUT_RECS", expMapInputRecs);
+ mapCounters.put("MAP_OUTPUT_RECS", expMapOutputRecs);
+ return mapCounters;
+ }
+
+ /**
+ * Get the original job reduce counters from a trace.
+ * @param zombieJob - Original job story.
+ * @return - reduce counters as a map.
+ */
+ public Map<String,Long> getJobReduceCounters(ZombieJob zombieJob) {
+ long expReduceInputBytes = 0;
+ long expReduceOutputBytes = 0;
+ long expReduceInputRecs = 0;
+ long expReduceOutputRecs = 0;
+ Map<String,Long> reduceCounters = new HashMap<String,Long>();
+ for (int index = 0; index < zombieJob.getNumberReduces(); index ++) {
+ TaskInfo reduceTask = zombieJob.getTaskInfo(TaskType.REDUCE, index);
+ expReduceInputBytes += reduceTask.getInputBytes();
+ expReduceOutputBytes += reduceTask.getOutputBytes();
+ expReduceInputRecs += reduceTask.getInputRecords();
+ expReduceOutputRecs += reduceTask.getOutputRecords();
+ }
+ reduceCounters.put("REDUCE_INPUT_BYTES", expReduceInputBytes);
+ reduceCounters.put("REDUCE_OUTPUT_BYTES", expReduceOutputBytes);
+ reduceCounters.put("REDUCE_INPUT_RECS", expReduceInputRecs);
+ reduceCounters.put("REDUCE_OUTPUT_RECS", expReduceOutputRecs);
+ return reduceCounters;
+ }
+
+ /**
+ * Get the simulated job configuration of a job.
+ * @param simulatedJobID - Simulated job id.
+ * @param tmpJHFolder - temporary job history folder location.
+ * @return - simulated job configuration.
+ * @throws IOException - If an I/O error occurs.
+ */
+ public JobConf getSimulatedJobConf(JobID simulatedJobID, File tmpJHFolder)
+ throws IOException{
+ FileSystem fs = null;
+ try {
+
+ String historyFilePath =
+ jtClient.getProxy().getJobHistoryLocationForRetiredJob(simulatedJobID);
+ Path jhpath = new Path(historyFilePath);
+ fs = jhpath.getFileSystem(conf);
+ fs.copyToLocalFile(jhpath,new Path(tmpJHFolder.toString()));
+ fs.copyToLocalFile(new Path(historyFilePath + "_conf.xml"),
+ new Path(tmpJHFolder.toString()));
+ JobConf jobConf = new JobConf();
+ jobConf.addResource(new Path(tmpJHFolder.toString()
+ + "/" + simulatedJobID + "_conf.xml"));
+ jobConf.reloadConfiguration();
+ return jobConf;
+
+ }finally {
+ fs.close();
+ }
+ }
+
+ /**
+ * Get the simulated job history of a job.
+ * @param simulatedJobID - simulated job id.
+ * @return - simulated job information.
+ * @throws IOException - if an I/O error occurs.
+ */
+ public JobHistoryParser.JobInfo getSimulatedJobHistory(JobID simulatedJobID)
+ throws IOException {
+ FileSystem fs = null;
+ try {
+ String historyFilePath = jtClient.getProxy().
+ getJobHistoryLocationForRetiredJob(simulatedJobID);
+ Path jhpath = new Path(historyFilePath);
+ fs = jhpath.getFileSystem(conf);
+ JobHistoryParser jhparser = new JobHistoryParser(fs, jhpath);
+ JobHistoryParser.JobInfo jhInfo = jhparser.parse();
+ return jhInfo;
+
+ } finally {
+ fs.close();
+ }
+ }
+
+ /**
+ * Get the user resolver of a job.
+ */
+ public String getJobUserResolver() {
+ return userResolverVal;
+ }
+
+ private String convertJobStatus(String jobStatus) {
+ if (jobStatus.equals("SUCCEEDED")) {
+ return "SUCCESS";
+ } else {
+ return jobStatus;
+ }
+ }
+
+ private String convertBytes(long bytesValue) {
+ int units = 1024;
+ if( bytesValue < units ) {
+ return String.valueOf(bytesValue)+ "B";
+ } else {
+ // it converts the bytes into either KB or MB or GB or TB etc.
+ int exp = (int)(Math.log(bytesValue) / Math.log(units));
+ return String.format("%1d%sB",(long)(bytesValue / Math.pow(units, exp)),
+ "KMGTPE".charAt(exp -1));
+ }
+ }
+
+
+ private long getCounterValue(Counters counters, String key)
+ throws ParseException {
+ for (String groupName : counters.getGroupNames()) {
+ CounterGroup totalGroup = counters.getGroup(groupName);
+ Iterator<Counter> itrCounter = totalGroup.iterator();
+ while (itrCounter.hasNext()) {
+ Counter counter = itrCounter.next();
+ if (counter.getName().equals(key)) {
+ return counter.getValue();
+ }
+ }
+ }
+ return 0;
+ }
+}
+
diff --git a/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/test/system/UtilsForGridmix.java b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/test/system/UtilsForGridmix.java
new file mode 100644
index 0000000..65dd4ff
--- /dev/null
+++ b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/test/system/UtilsForGridmix.java
@@ -0,0 +1,513 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapred.gridmix.test.system;
+
+import java.io.IOException;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.hadoop.mapred.gridmix.Gridmix;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobStatus;
+import org.apache.hadoop.mapreduce.JobID;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Arrays;
+import java.net.URI;
+import java.text.SimpleDateFormat;
+import java.io.OutputStream;
+import java.util.Set;
+import java.util.List;
+import java.util.Iterator;
+import java.util.Map;
+import java.io.File;
+import java.io.FileOutputStream;
+import org.apache.hadoop.test.system.ProxyUserDefinitions;
+import org.apache.hadoop.test.system.ProxyUserDefinitions.GroupsAndHost;
+
+/**
+ * Gridmix utilities.
+ */
+public class UtilsForGridmix {
+ private static final Log LOG = LogFactory.getLog(UtilsForGridmix.class);
+ private static final Path DEFAULT_TRACES_PATH =
+ new Path(System.getProperty("user.dir") + "/src/test/system/resources/");
+
+ /**
+ * cleanup the folder or file.
+ * @param path - folder or file path.
+ * @param conf - cluster configuration
+ * @throws IOException - If an I/O error occurs.
+ */
+ public static void cleanup(Path path, Configuration conf)
+ throws IOException {
+ FileSystem fs = path.getFileSystem(conf);
+ fs.delete(path, true);
+ fs.close();
+ }
+
+ /**
+ * Get the login user.
+ * @return - login user as string..
+ * @throws IOException - if an I/O error occurs.
+ */
+ public static String getUserName() throws IOException {
+ return UserGroupInformation.getLoginUser().getUserName();
+ }
+
+ /**
+ * Get the argument list for gridmix job.
+ * @param gridmixDir - gridmix parent directory.
+ * @param gridmixRunMode - gridmix modes either 1,2,3.
+ * @param values - gridmix runtime values.
+ * @param otherArgs - gridmix other generic args.
+ * @return - argument list as string array.
+ */
+ public static String [] getArgsList(Path gridmixDir, int gridmixRunMode,
+ String [] values, String [] otherArgs) {
+ String [] runtimeArgs = {
+ "-D", GridMixConfig.GRIDMIX_LOG_MODE + " = DEBUG",
+ "-D", GridMixConfig.GRIDMIX_OUTPUT_DIR + " = gridmix",
+ "-D", GridMixConfig.GRIDMIX_JOB_SUBMISSION_QUEUE_IN_TRACE + " = true",
+ "-D", GridMixConfig.GRIDMIX_JOB_TYPE + " = " + values[0],
+ "-D", GridMixConfig.GRIDMIX_USER_RESOLVER + " = " + values[1],
+ "-D", GridMixConfig.GRIDMIX_SUBMISSION_POLICY + " = " + values[2]
+ };
+
+ String [] classArgs;
+ if ((gridmixRunMode == GridMixRunMode.DATA_GENERATION.getValue()
+ || gridmixRunMode
+ == GridMixRunMode.DATA_GENERATION_AND_RUN_GRIDMIX.getValue())
+ && values[1].indexOf("RoundRobinUserResolver") > 0) {
+ classArgs = new String[] {
+ "-generate", values[3],
+ "-users", values[4],
+ gridmixDir.toString(),
+ values[5]
+ };
+ } else if (gridmixRunMode == GridMixRunMode.DATA_GENERATION.getValue()
+ || gridmixRunMode
+ == GridMixRunMode.DATA_GENERATION_AND_RUN_GRIDMIX.getValue()) {
+ classArgs = new String[] {
+ "-generate", values[3],
+ gridmixDir.toString(),
+ values[4]
+ };
+ } else if (gridmixRunMode == GridMixRunMode.RUN_GRIDMIX.getValue()
+ && values[1].indexOf("RoundRobinUserResolver") > 0) {
+ classArgs = new String[] {
+ "-users", values[3],
+ gridmixDir.toString(),
+ values[4]
+ };
+ } else {
+ classArgs = new String[] {
+ gridmixDir.toString(),values[3]
+ };
+ }
+
+ String [] args = new String [runtimeArgs.length +
+ classArgs.length + ((otherArgs != null)?otherArgs.length:0)];
+ System.arraycopy(runtimeArgs, 0, args, 0, runtimeArgs.length);
+
+ if (otherArgs != null) {
+ System.arraycopy(otherArgs, 0, args, runtimeArgs.length,
+ otherArgs.length);
+ System.arraycopy(classArgs, 0, args, (runtimeArgs.length +
+ otherArgs.length), classArgs.length);
+ } else {
+ System.arraycopy(classArgs, 0, args, runtimeArgs.length,
+ classArgs.length);
+ }
+ return args;
+ }
+
+ /**
+ * Create a file with specified size in mb.
+ * @param sizeInMB - file size in mb.
+ * @param inputDir - input directory.
+ * @param conf - cluster configuration.
+ * @throws Exception - if an exception occurs.
+ */
+ public static void createFile(int sizeInMB, Path inputDir,
+ Configuration conf) throws Exception {
+ Date d = new Date();
+ SimpleDateFormat sdf = new SimpleDateFormat("ddMMyy_HHmmssS");
+ String formatDate = sdf.format(d);
+ FileSystem fs = inputDir.getFileSystem(conf);
+ OutputStream out = fs.create(new Path(inputDir,"datafile_" + formatDate));
+ final byte[] b = new byte[1024 * 1024];
+ for (int index = 0; index < sizeInMB; index++) {
+ out.write(b);
+ }
+ out.close();
+ fs.close();
+ }
+
+ /**
+ * Create directories for a path.
+ * @param path - directories path.
+ * @param conf - cluster configuration.
+ * @throws IOException - if an I/O error occurs.
+ */
+ public static void createDirs(Path path,Configuration conf)
+ throws IOException {
+ FileSystem fs = path.getFileSystem(conf);
+ if (!fs.exists(path)) {
+ fs.mkdirs(path);
+ }
+ }
+
+ /**
+ * Run the Gridmix job with given runtime arguments.
+ * @param gridmixDir - Gridmix parent directory.
+ * @param conf - cluster configuration.
+ * @param gridmixRunMode - gridmix run mode either 1,2,3
+ * @param runtimeValues -gridmix runtime values.
+ * @return - gridmix status either 0 or 1.
+ * @throws Exception
+ */
+ public static int runGridmixJob(Path gridmixDir, Configuration conf,
+ int gridmixRunMode, String [] runtimeValues) throws Exception {
+ return runGridmixJob(gridmixDir, conf, gridmixRunMode, runtimeValues, null);
+ }
+ /**
+ * Run the Gridmix job with given runtime arguments.
+ * @param gridmixDir - Gridmix parent directory
+ * @param conf - cluster configuration.
+ * @param gridmixRunMode - gridmix run mode.
+ * @param runtimeValues - gridmix runtime values.
+ * @param otherArgs - gridmix other generic args.
+ * @return - gridmix status either 0 or 1.
+ * @throws Exception
+ */
+
+ public static int runGridmixJob(Path gridmixDir, Configuration conf,
+ int gridmixRunMode, String [] runtimeValues,
+ String [] otherArgs) throws Exception {
+ Path outputDir = new Path(gridmixDir, "gridmix");
+ Path inputDir = new Path(gridmixDir, "input");
+ LOG.info("Cleanup the data if data already exists.");
+ String modeName = new String();
+ switch (gridmixRunMode) {
+ case 1 :
+ cleanup(inputDir, conf);
+ cleanup(outputDir, conf);
+ modeName = GridMixRunMode.DATA_GENERATION.name();
+ break;
+ case 2 :
+ cleanup(outputDir, conf);
+ modeName = GridMixRunMode.RUN_GRIDMIX.name();
+ break;
+ case 3 :
+ cleanup(inputDir, conf);
+ cleanup(outputDir, conf);
+ modeName = GridMixRunMode.DATA_GENERATION_AND_RUN_GRIDMIX.name();
+ break;
+ }
+
+ final String [] args =
+ UtilsForGridmix.getArgsList(gridmixDir, gridmixRunMode,
+ runtimeValues, otherArgs);
+ Gridmix gridmix = new Gridmix();
+ LOG.info("Submit a Gridmix job in " + runtimeValues[1]
+ + " mode for " + modeName);
+ int exitCode = ToolRunner.run(conf, gridmix, args);
+ return exitCode;
+ }
+
+ /**
+ * Get the proxy users file.
+ * @param conf - cluster configuration.
+ * @return String - proxy users file.
+ * @Exception - if no proxy users found in configuration.
+ */
+ public static String getProxyUsersFile(Configuration conf)
+ throws Exception {
+ ProxyUserDefinitions pud = getProxyUsersData(conf);
+ String fileName = buildProxyUsersFile(pud.getProxyUsers());
+ if (fileName == null) {
+ LOG.error("Proxy users file not found.");
+ throw new Exception("Proxy users file not found.");
+ } else {
+ return fileName;
+ }
+ }
+
+ /**
+ * List the current gridmix jobid's.
+ * @param client - job client.
+ * @param execJobCount - number of executed jobs.
+ * @return - list of gridmix jobid's.
+ */
+ public static List<JobID> listGridmixJobIDs(JobClient client,
+ int execJobCount) throws IOException {
+ List<JobID> jobids = new ArrayList<JobID>();
+ JobStatus [] jobStatus = client.getAllJobs();
+ int numJobs = jobStatus.length;
+ for (int index = 1; index <= execJobCount; index++) {
+ JobStatus js = jobStatus[numJobs - index];
+ JobID jobid = js.getJobID();
+ String jobName = js.getJobName();
+ if (!jobName.equals("GRIDMIX_GENERATE_INPUT_DATA") &&
+ !jobName.equals("GRIDMIX_GENERATE_DISTCACHE_DATA")) {
+ jobids.add(jobid);
+ }
+ }
+ return (jobids.size() == 0)? null : jobids;
+ }
+
+ /**
+ * List the proxy users.
+ * @param conf
+ * @return
+ * @throws Exception
+ */
+ public static List<String> listProxyUsers(Configuration conf,
+ String loginUser) throws Exception {
+ List<String> proxyUsers = new ArrayList<String>();
+ ProxyUserDefinitions pud = getProxyUsersData(conf);
+ Map<String, GroupsAndHost> usersData = pud.getProxyUsers();
+ Collection users = usersData.keySet();
+ Iterator<String> itr = users.iterator();
+ while (itr.hasNext()) {
+ String user = itr.next();
+ if (!user.equals(loginUser)){ proxyUsers.add(user); };
+ }
+ return proxyUsers;
+ }
+
+ private static String buildProxyUsersFile(final Map<String, GroupsAndHost>
+ proxyUserData) throws Exception {
+ FileOutputStream fos = null;
+ File file = null;
+ StringBuffer input = new StringBuffer();
+ Set users = proxyUserData.keySet();
+ Iterator itr = users.iterator();
+ while (itr.hasNext()) {
+ String user = itr.next().toString();
+ if (!user.equals(
+ UserGroupInformation.getLoginUser().getShortUserName())) {
+ input.append(user);
+ final GroupsAndHost gah = proxyUserData.get(user);
+ final List <String> groups = gah.getGroups();
+ for (String group : groups) {
+ input.append(",");
+ input.append(group);
+ }
+ input.append("\n");
+ }
+ }
+ if (input.length() > 0) {
+ try {
+ file = File.createTempFile("proxyusers", null);
+ fos = new FileOutputStream(file);
+ fos.write(input.toString().getBytes());
+ } catch(IOException ioexp) {
+ LOG.warn(ioexp.getMessage());
+ return null;
+ } finally {
+ fos.close();
+ file.deleteOnExit();
+ }
+ LOG.info("file.toString():" + file.toString());
+ return file.toString();
+ } else {
+ return null;
+ }
+ }
+
+ private static ProxyUserDefinitions getProxyUsersData(Configuration conf)
+ throws Exception {
+ Iterator itr = conf.iterator();
+ List<String> proxyUsersData = new ArrayList<String>();
+ while (itr.hasNext()) {
+ String property = itr.next().toString();
+ if (property.indexOf("hadoop.proxyuser") >= 0
+ && property.indexOf("groups=") >= 0) {
+ proxyUsersData.add(property.split("\\.")[2]);
+ }
+ }
+
+ if (proxyUsersData.size() == 0) {
+ LOG.error("No proxy users found in the configuration.");
+ throw new Exception("No proxy users found in the configuration.");
+ }
+
+ ProxyUserDefinitions pud = new ProxyUserDefinitions() {
+ public boolean writeToFile(URI filePath) throws IOException {
+ throw new UnsupportedOperationException("No such methood exists.");
+ };
+ };
+
+ for (String userName : proxyUsersData) {
+ List<String> groups = Arrays.asList(conf.get("hadoop.proxyuser." +
+ userName + ".groups").split("//,"));
+ List<String> hosts = Arrays.asList(conf.get("hadoop.proxyuser." +
+ userName + ".hosts").split("//,"));
+ ProxyUserDefinitions.GroupsAndHost definitions =
+ pud.new GroupsAndHost();
+ definitions.setGroups(groups);
+ definitions.setHosts(hosts);
+ pud.addProxyUser(userName, definitions);
+ }
+ return pud;
+ }
+
+ /**
+ * Gives the list of paths for MR traces against different time
+ * intervals.It fetches only the paths which followed the below
+ * file convention.
+ * Syntax : <FileName>_<TimeIntervals>.json.gz
+ * There is a restriction in a file and user has to
+ * follow the below convention for time interval.
+ * Syntax: <numeric>[m|h|d]
+ * e.g : for 10 minutes trace should specify 10m,
+ * same way for 1 hour traces should specify 1h,
+ * for 1 day traces should specify 1d.
+ *
+ * @param conf - cluster configuration.
+ * @return - list of MR paths as key/value pair based on time interval.
+ * @throws IOException - if an I/O error occurs.
+ */
+ public static Map<String, String> getMRTraces(Configuration conf)
+ throws IOException {
+ return getMRTraces(conf, DEFAULT_TRACES_PATH);
+ }
+
+ /**
+ * It gives the list of paths for MR traces against different time
+ * intervals. It fetches only the paths which followed the below
+ * file convention.
+ * Syntax : <FileNames>_<TimeInterval>.json.gz
+ * There is a restriction in a file and user has to follow the
+ * below convention for time interval.
+ * Syntax: <numeric>[m|h|d]
+ * e.g : for 10 minutes trace should specify 10m,
+ * same way for 1 hour traces should specify 1h,
+ * for 1 day traces should specify 1d.
+ *
+ * @param conf - cluster configuration object.
+ * @param tracesPath - MR traces path.
+ * @return - list of MR paths as key/value pair based on time interval.
+ * @throws IOException - If an I/O error occurs.
+ */
+ public static Map<String,String> getMRTraces(Configuration conf,
+ Path tracesPath) throws IOException {
+ Map <String, String> jobTraces = new HashMap <String, String>();
+ final FileSystem fs = FileSystem.getLocal(conf);
+ final FileStatus fstat[] = fs.listStatus(tracesPath);
+ for (FileStatus fst : fstat) {
+ final String fileName = fst.getPath().getName();
+ if (fileName.endsWith("m.json.gz")
+ || fileName.endsWith("h.json.gz")
+ || fileName.endsWith("d.json.gz")) {
+ jobTraces.put(fileName.substring(fileName.indexOf("_") + 1,
+ fileName.indexOf(".json.gz")), fst.getPath().toString());
+ }
+ }
+ if (jobTraces.size() == 0) {
+ LOG.error("No traces found in " + tracesPath.toString() + " path.");
+ throw new IOException("No traces found in "
+ + tracesPath.toString() + " path.");
+ }
+ return jobTraces;
+ }
+
+ /**
+ * It list the all the MR traces path irrespective of time.
+ * @param conf - cluster configuration.
+ * @param tracesPath - MR traces path
+ * @return - MR paths as a list.
+ * @throws IOException - if an I/O error occurs.
+ */
+ public static List<String> listMRTraces(Configuration conf,
+ Path tracesPath) throws IOException {
+ List<String> jobTraces = new ArrayList<String>();
+ final FileSystem fs = FileSystem.getLocal(conf);
+ final FileStatus fstat[] = fs.listStatus(tracesPath);
+ for (FileStatus fst : fstat) {
+ jobTraces.add(fst.getPath().toString());
+ }
+ if (jobTraces.size() == 0) {
+ LOG.error("No traces found in " + tracesPath.toString() + " path.");
+ throw new IOException("No traces found in "
+ + tracesPath.toString() + " path.");
+ }
+ return jobTraces;
+ }
+
+ /**
+ * It list the all the MR traces path irrespective of time.
+ * @param conf - cluster configuration.
+ * @param tracesPath - MR traces path
+ * @return - MR paths as a list.
+ * @throws IOException - if an I/O error occurs.
+ */
+ public static List<String> listMRTraces(Configuration conf)
+ throws IOException {
+ return listMRTraces(conf, DEFAULT_TRACES_PATH);
+ }
+
+ /**
+ * Gives the list of MR traces for given time interval.
+ * The time interval should be following convention.
+ * Syntax : <numeric>[m|h|d]
+ * e.g : 10m or 1h or 2d etc.
+ * @param conf - cluster configuration
+ * @param timeInterval - trace time interval.
+ * @param tracesPath - MR traces Path.
+ * @return - MR paths as a list for a given time interval.
+ * @throws IOException - If an I/O error occurs.
+ */
+ public static List<String> listMRTracesByTime(Configuration conf,
+ String timeInterval, Path tracesPath) throws IOException {
+ List<String> jobTraces = new ArrayList<String>();
+ final FileSystem fs = FileSystem.getLocal(conf);
+ final FileStatus fstat[] = fs.listStatus(tracesPath);
+ for (FileStatus fst : fstat) {
+ final String fileName = fst.getPath().getName();
+ if (fileName.indexOf(timeInterval) >= 0) {
+ jobTraces.add(fst.getPath().toString());
+ }
+ }
+ return jobTraces;
+ }
+
+ /**
+ * Gives the list of MR traces for given time interval.
+ * The time interval should be following convention.
+ * Syntax : <numeric>[m|h|d]
+ * e.g : 10m or 1h or 2d etc.
+ * @param conf - cluster configuration
+ * @param timeInterval - trace time interval.
+ * @return - MR paths as a list for a given time interval.
+ * @throws IOException - If an I/O error occurs.
+ */
+ public static List<String> listMRTracesByTime(Configuration conf,
+ String timeInterval) throws IOException {
+ return listMRTracesByTime(conf, timeInterval, DEFAULT_TRACES_PATH);
+ }
+}
diff --git a/src/contrib/gridmix/src/test/system/resources/2m_stream_trace.json.gz b/src/contrib/gridmix/src/test/system/resources/2m_stream_trace.json.gz
new file mode 100644
index 0000000..c145836
--- /dev/null
+++ b/src/contrib/gridmix/src/test/system/resources/2m_stream_trace.json.gz
Binary files differ
diff --git a/src/contrib/gridmix/src/test/system/resources/3m_stream_trace.json.gz b/src/contrib/gridmix/src/test/system/resources/3m_stream_trace.json.gz
new file mode 100644
index 0000000..7bf17a0
--- /dev/null
+++ b/src/contrib/gridmix/src/test/system/resources/3m_stream_trace.json.gz
Binary files differ
diff --git a/src/contrib/gridmix/src/test/system/resources/5m_stream_trace.json.gz b/src/contrib/gridmix/src/test/system/resources/5m_stream_trace.json.gz
new file mode 100644
index 0000000..a72e41f
--- /dev/null
+++ b/src/contrib/gridmix/src/test/system/resources/5m_stream_trace.json.gz
Binary files differ
diff --git a/src/contrib/gridmix/src/test/system/resources/trace_10m.json.gz b/src/contrib/gridmix/src/test/system/resources/trace_10m.json.gz
new file mode 100644
index 0000000..2be6f37
--- /dev/null
+++ b/src/contrib/gridmix/src/test/system/resources/trace_10m.json.gz
Binary files differ
diff --git a/src/contrib/gridmix/src/test/system/resources/trace_12m.json.gz b/src/contrib/gridmix/src/test/system/resources/trace_12m.json.gz
new file mode 100644
index 0000000..7850026
--- /dev/null
+++ b/src/contrib/gridmix/src/test/system/resources/trace_12m.json.gz
Binary files differ
diff --git a/src/contrib/gridmix/src/test/system/resources/trace_1m.json.gz b/src/contrib/gridmix/src/test/system/resources/trace_1m.json.gz
new file mode 100644
index 0000000..21bff55
--- /dev/null
+++ b/src/contrib/gridmix/src/test/system/resources/trace_1m.json.gz
Binary files differ
diff --git a/src/contrib/gridmix/src/test/system/resources/trace_3m.json.gz b/src/contrib/gridmix/src/test/system/resources/trace_3m.json.gz
new file mode 100644
index 0000000..a27241e
--- /dev/null
+++ b/src/contrib/gridmix/src/test/system/resources/trace_3m.json.gz
Binary files differ
diff --git a/src/contrib/gridmix/src/test/system/resources/trace_5m.json.gz b/src/contrib/gridmix/src/test/system/resources/trace_5m.json.gz
new file mode 100644
index 0000000..441ca3a
--- /dev/null
+++ b/src/contrib/gridmix/src/test/system/resources/trace_5m.json.gz
Binary files differ
diff --git a/src/contrib/gridmix/src/test/system/resources/trace_7m.json.gz b/src/contrib/gridmix/src/test/system/resources/trace_7m.json.gz
new file mode 100644
index 0000000..4aab5a1
--- /dev/null
+++ b/src/contrib/gridmix/src/test/system/resources/trace_7m.json.gz
Binary files differ