MAPREDUCE-2554. [Gridmix] Add distributed cache emulation system tests to Gridmix. (Vinay Kumar Thota via amarrk)
git-svn-id: https://svn.apache.org/repos/asf/hadoop/mapreduce/trunk@1131515 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/CHANGES.txt b/CHANGES.txt
index 1cf866e..173cd55 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -25,6 +25,9 @@
IMPROVEMENTS
+ MAPREDUCE-2554. [Gridmix] Add distributed cache emulation system tests
+ to Gridmix. (Vinay Kumar Thota via amarrk)
+
MAPREDUCE-2543. [Gridmix] High-Ram feature emulation testcase. (amarrk)
MAPREDUCE-2469. Task counters should also report the total heap usage of
diff --git a/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/DistributedCacheEmulator.java b/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/DistributedCacheEmulator.java
index 6a93428..8b80d42 100644
--- a/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/DistributedCacheEmulator.java
+++ b/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/DistributedCacheEmulator.java
@@ -366,7 +366,7 @@
* @return true if the path provided is of a local file system based
* distributed cache file
*/
- private boolean isLocalDistCacheFile(String filePath, String user,
+ static boolean isLocalDistCacheFile(String filePath, String user,
boolean visibility) {
return (!visibility && filePath.contains(user + "/.staging"));
}
diff --git a/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/GridmixSystemTestCase.java b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/GridmixSystemTestCase.java
index 10cd9aa..4031005 100644
--- a/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/GridmixSystemTestCase.java
+++ b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/GridmixSystemTestCase.java
@@ -162,5 +162,11 @@
}
return null;
}
+
+ public static boolean isLocalDistCache(String fileName, String userName,
+ boolean visibility) {
+ return DistributedCacheEmulator.isLocalDistCacheFile(fileName,
+ userName, visibility);
+ }
}
diff --git a/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestEmulationOfHDFSAndLocalFSDCFiles.java b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestEmulationOfHDFSAndLocalFSDCFiles.java
new file mode 100644
index 0000000..d98b259
--- /dev/null
+++ b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestEmulationOfHDFSAndLocalFSDCFiles.java
@@ -0,0 +1,93 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapred.gridmix;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.mapred.gridmix.test.system.GridMixConfig;
+import org.apache.hadoop.mapred.gridmix.test.system.GridMixRunMode;
+import org.apache.hadoop.mapred.gridmix.test.system.UtilsForGridmix;
+import org.apache.hadoop.mapreduce.MRJobConfig;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * Verify the emulation of HDFS and Local FS distributed cache files against
+ * the given input trace file.
+ */
+public class TestEmulationOfHDFSAndLocalFSDCFiles extends
+ GridmixSystemTestCase {
+ private static final Log LOG =
+ LogFactory.getLog("TestEmulationOfLocalFSDCFiles.class");
+
+ /**
+ * Generate the input data and distributed cache files for HDFS and
+ * local FS. Verify the gridmix emulation of HDFS and Local FS
+ * distributed cache files in RoundRobinUserResolver mode with STRESS
+ * submission policy.
+ * @throws Exception - if an error occurs.
+ */
+ @Test
+ public void testGenerateDataEmulateHDFSAndLocalFSDCFiles()
+ throws Exception {
+ final long inputSizeInMB = 1024 * 6;
+ final String tracePath = getTraceFile("distcache_case8_trace");
+ Assert.assertNotNull("Trace file has not found.", tracePath);
+ final String [] runtimeValues =
+ {"LOADJOB",
+ RoundRobinUserResolver.class.getName(),
+ "STRESS",
+ inputSizeInMB + "m",
+ "file://" + UtilsForGridmix.getProxyUsersFile(conf),
+ tracePath};
+
+ final String [] otherArgs = {
+ "-D", MRJobConfig.JOB_CANCEL_DELEGATION_TOKEN + "=false",
+ "-D", GridMixConfig.GRIDMIX_DISTCACHE_ENABLE + "=true",
+ "-D", GridMixConfig.GRIDMIX_COMPRESSION_ENABLE + "=false"
+ };
+ runGridmixAndVerify(runtimeValues, otherArgs, tracePath,
+ GridMixRunMode.DATA_GENERATION_AND_RUN_GRIDMIX.getValue());
+ }
+
+ /**
+ * Use existing input and distributed cache files for HDFS and
+ * local FS. Verify the gridmix emulation of HDFS and Local FS
+ * distributed cache files in SubmitterUserResolver mode with REPLAY
+ * submission policy.
+ * @throws Exception - if an error occurs.
+ */
+ @Test
+ public void testEmulationOfHDFSAndLocalFSDCFiles()
+ throws Exception {
+ final String tracePath = getTraceFile("distcache_case8_trace");
+ Assert.assertNotNull("Trace file has not found.", tracePath);
+ final String [] runtimeValues ={"LOADJOB",
+ SubmitterUserResolver.class.getName(),
+ "STRESS",
+ tracePath};
+
+ final String [] otherArgs = {
+ "-D", MRJobConfig.JOB_CANCEL_DELEGATION_TOKEN + "=false",
+ "-D", GridMixConfig.GRIDMIX_DISTCACHE_ENABLE + "=true",
+ "-D", GridMixConfig.GRIDMIX_COMPRESSION_ENABLE + "=false"
+ };
+ runGridmixAndVerify(runtimeValues, otherArgs, tracePath,
+ GridMixRunMode.RUN_GRIDMIX.getValue());
+ }
+}
diff --git a/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestEmulationOfHDFSDCFileUsesMultipleJobs.java b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestEmulationOfHDFSDCFileUsesMultipleJobs.java
new file mode 100644
index 0000000..00d2e48
--- /dev/null
+++ b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestEmulationOfHDFSDCFileUsesMultipleJobs.java
@@ -0,0 +1,89 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapred.gridmix;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.mapred.gridmix.test.system.GridMixConfig;
+import org.apache.hadoop.mapred.gridmix.test.system.GridMixRunMode;
+import org.apache.hadoop.mapred.gridmix.test.system.UtilsForGridmix;
+import org.apache.hadoop.mapreduce.MRJobConfig;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * Verify the Gridmix emulation of HDFS distributed cache file which uses
+ * different jobs that are submitted with different users.
+ */
+public class TestEmulationOfHDFSDCFileUsesMultipleJobs extends
+ GridmixSystemTestCase {
+ private static final Log LOG =
+ LogFactory.getLog("TestEmulationOfHDFSDCFileUsesMultipleJobs.class");
+
+ /**
+ * Generate the input data and HDFS distributed cache file based
+ * on given input trace. Verify the Gridmix emulation of HDFS
+ * distributed cache file in RoundRobinResolver mode with
+ * STRESS submission policy.
+ * @throws Exception - if an error occurs.
+ */
+ @Test
+ public void testGenerateAndEmulationOfHDFSDCFile()
+ throws Exception {
+ final long inputSizeInMB = 1024 * 6;
+ final String tracePath = getTraceFile("distcache_case9_trace");
+ Assert.assertNotNull("Trace file has not found.", tracePath);
+ final String [] runtimeValues =
+ {"LOADJOB",
+ RoundRobinUserResolver.class.getName(),
+ "STRESS",
+ inputSizeInMB + "m",
+ "file://" + UtilsForGridmix.getProxyUsersFile(conf),
+ tracePath};
+
+ final String [] otherArgs = {
+ "-D", MRJobConfig.JOB_CANCEL_DELEGATION_TOKEN + "=false",
+ "-D", GridMixConfig.GRIDMIX_DISTCACHE_ENABLE + "=true"
+ };
+ runGridmixAndVerify(runtimeValues, otherArgs, tracePath,
+ GridMixRunMode.DATA_GENERATION_AND_RUN_GRIDMIX.getValue());
+ }
+
+ /**
+ * Verify the Gridmix emulation of HDFS distributed cache
+ * file in SubmitterUserResolver mode with STRESS submission policy
+ * by using the existing input data and HDFS distributed cache file.
+ * @throws Exception - if an error occurs.
+ */
+ @Test
+ public void testGridmixEmulationOfHDFSPublicDCFile()
+ throws Exception {
+ final String tracePath = getTraceFile("distcache_case9_trace");
+ Assert.assertNotNull("Trace file has not found.", tracePath);
+ final String [] runtimeValues = {"LOADJOB",
+ SubmitterUserResolver.class.getName(),
+ "STRESS",
+ tracePath};
+
+ final String [] otherArgs = {
+ "-D", GridMixConfig.GRIDMIX_DISTCACHE_ENABLE + "=true"
+ };
+ runGridmixAndVerify(runtimeValues, otherArgs, tracePath,
+ GridMixRunMode.RUN_GRIDMIX.getValue());
+ }
+}
diff --git a/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestEmulationOfHDFSDCFilesWithDifferentVisibilities.java b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestEmulationOfHDFSDCFilesWithDifferentVisibilities.java
new file mode 100644
index 0000000..3840f1b
--- /dev/null
+++ b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestEmulationOfHDFSDCFilesWithDifferentVisibilities.java
@@ -0,0 +1,90 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapred.gridmix;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.mapred.gridmix.test.system.GridMixConfig;
+import org.apache.hadoop.mapred.gridmix.test.system.GridMixRunMode;
+import org.apache.hadoop.mapred.gridmix.test.system.UtilsForGridmix;
+import org.apache.hadoop.mapreduce.MRJobConfig;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * Verify the Gridmix emulation of HDFS distributed cache files of
+ * different visibilities.
+ */
+
+public class TestEmulationOfHDFSDCFilesWithDifferentVisibilities
+ extends GridmixSystemTestCase {
+ private static final Log LOG =
+ LogFactory.getLog(
+ "TestEmulationOfHDFSDCFilesWithDifferentVisibilities.class");
+
+ /**
+ * Generate input data and HDFS distributed cache files of different
+ * visibilities based on given input trace. Verify the Gridmix emulation
+ * of HDFS distributed cache files of different visibilities in
+ * RoundRobinUserResolver mode with SERIAL submission policy.
+ * @throws Exception - if an error occurs.
+ */
+ @Test
+ public void testGenerateAndEmulateOfHDFSDCFilesWithDiffVisibilities()
+ throws Exception {
+ final long INPUT_SIZE = 1024 * 9;
+ final String tracePath = getTraceFile("distcache_case5_trace");
+ Assert.assertNotNull("Trace file was not found.", tracePath);
+ final String [] runtimeValues =
+ { "LOADJOB",
+ RoundRobinUserResolver.class.getName(),
+ "STRESS",
+ INPUT_SIZE+"m",
+ "file://" + UtilsForGridmix.getProxyUsersFile(conf),
+ tracePath};
+
+ final String [] otherArgs = {
+ "-D", MRJobConfig.JOB_CANCEL_DELEGATION_TOKEN + "=false",
+ "-D", GridMixConfig.GRIDMIX_DISTCACHE_ENABLE + "=true"
+ };
+ runGridmixAndVerify(runtimeValues, otherArgs, tracePath,
+ GridMixRunMode.DATA_GENERATION_AND_RUN_GRIDMIX.getValue());
+ }
+
+ /**
+ * Disable the distributed cache emulation and verify the Gridmix jobs
+ * whether it emulates or not.
+ * @throws Exception
+ */
+ @Test
+ public void testHDFSDCFilesWithoutEnableDCEmulation()
+ throws Exception {
+ final String tracePath = getTraceFile("distcache_case6_trace");
+ Assert.assertNotNull("Trace file was not found.", tracePath);
+ final String [] runtimeValues ={ "LOADJOB",
+ SubmitterUserResolver.class.getName(),
+ "REPLAY",
+ tracePath};
+ final String [] otherArgs = {
+ "-D", GridMixConfig.GRIDMIX_DISTCACHE_ENABLE + "=false"
+ };
+ runGridmixAndVerify(runtimeValues, otherArgs, tracePath,
+ GridMixRunMode.RUN_GRIDMIX.getValue());
+ }
+}
+
diff --git a/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestEmulationOfLocalFSDCFiles.java b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestEmulationOfLocalFSDCFiles.java
new file mode 100644
index 0000000..e50eb6e
--- /dev/null
+++ b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestEmulationOfLocalFSDCFiles.java
@@ -0,0 +1,91 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapred.gridmix;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.mapred.gridmix.test.system.GridMixConfig;
+import org.apache.hadoop.mapred.gridmix.test.system.GridMixRunMode;
+import org.apache.hadoop.mapred.gridmix.test.system.UtilsForGridmix;
+import org.apache.hadoop.mapreduce.MRJobConfig;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * Verify the emulation of local FS distributed cache files.
+ *
+ */
+public class TestEmulationOfLocalFSDCFiles extends GridmixSystemTestCase {
+ private static final Log LOG =
+ LogFactory.getLog("TestEmulationOfLocalFSDCFiles.class");
+
+ /**
+ * Generate the input data and distributer cache files.Verify the
+ * gridmix emulation of local file system distributed cache files
+ * in RoundRobinUserResolver mode with REPLAY submission policy.
+ * @throws Exception - if an error occurs.
+ */
+ @Test
+ public void testGenerateInputAndEmulateLocalFSDCFile()
+ throws Exception {
+ final long inputSizeInMB = 1024 * 6;
+ final String tracePath = getTraceFile("distcache_case7_trace");
+ Assert.assertNotNull("Trace file has not found.", tracePath);
+ final String [] runtimeValues =
+ {"LOADJOB",
+ RoundRobinUserResolver.class.getName(),
+ "REPLAY",
+ inputSizeInMB + "m",
+ "file://" + UtilsForGridmix.getProxyUsersFile(conf),
+ tracePath};
+
+ final String [] otherArgs = {
+ "-D", MRJobConfig.JOB_CANCEL_DELEGATION_TOKEN + "=false",
+ "-D", GridMixConfig.GRIDMIX_DISTCACHE_ENABLE + "=true",
+ "-D", GridMixConfig.GRIDMIX_COMPRESSION_ENABLE + "=false"
+ };
+ runGridmixAndVerify(runtimeValues, otherArgs, tracePath,
+ GridMixRunMode.DATA_GENERATION_AND_RUN_GRIDMIX.getValue());
+ }
+
+ /**
+ * Use existing input and local distributed cache files and verify
+ * the gridmix emulation of local file system distributed cache
+ * files in SubmitterUserResolver mode with STRESS
+ * Submission policy.
+ * @throws Exception - if an error occurs.
+ */
+ @Test
+ public void testEmulationOfLocalFSDCFile()
+ throws Exception {
+ final String tracePath = getTraceFile("distcache_case7_trace");
+ Assert.assertNotNull("Trace file has not found.", tracePath);
+ final String [] runtimeValues = {"LOADJOB",
+ SubmitterUserResolver.class.getName(),
+ "STRESS",
+ tracePath};
+
+ final String [] otherArgs = {
+ "-D", MRJobConfig.JOB_CANCEL_DELEGATION_TOKEN + "=false",
+ "-D", GridMixConfig.GRIDMIX_DISTCACHE_ENABLE + "=true",
+ "-D", GridMixConfig.GRIDMIX_COMPRESSION_ENABLE + "=false"
+ };
+ runGridmixAndVerify(runtimeValues, otherArgs, tracePath,
+ GridMixRunMode.RUN_GRIDMIX.getValue());
+ }
+}
diff --git a/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestGridmixEmulationOfHDFSPrivateDCFile.java b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestGridmixEmulationOfHDFSPrivateDCFile.java
new file mode 100644
index 0000000..5289bf3
--- /dev/null
+++ b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestGridmixEmulationOfHDFSPrivateDCFile.java
@@ -0,0 +1,87 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapred.gridmix;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.mapred.gridmix.test.system.GridMixConfig;
+import org.apache.hadoop.mapred.gridmix.test.system.GridMixRunMode;
+import org.apache.hadoop.mapred.gridmix.test.system.UtilsForGridmix;
+import org.apache.hadoop.mapreduce.MRJobConfig;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * Verify the Gridmix emulation of HDFS private distributed cache file.
+ */
+public class TestGridmixEmulationOfHDFSPrivateDCFile
+ extends GridmixSystemTestCase {
+ private static final Log LOG =
+ LogFactory.getLog("TestGridmixEmulationOfHDFSPrivateDCFile.class");
+ /**
+ * Generate input data and single HDFS private distributed cache
+ * file based on given input trace.Verify the Gridmix emulation of
+ * single private HDFS distributed cache file in RoundRobinUserResolver
+ * mode with STRESS submission policy.
+ * @throws Exception - if an error occurs.
+ */
+ @Test
+ public void testGenerateAndEmulateOfHDFSPrivateDCFile()
+ throws Exception {
+ final long inputSizeInMB = 8192;
+ final String tracePath = getTraceFile("distcache_case3_trace");
+ Assert.assertNotNull("Trace file was not found.", tracePath);
+ final String [] runtimeValues =
+ {"LOADJOB",
+ RoundRobinUserResolver.class.getName(),
+ "STRESS",
+ inputSizeInMB + "m",
+ "file://" + UtilsForGridmix.getProxyUsersFile(conf),
+ tracePath};
+
+ final String [] otherArgs = {
+ "-D", MRJobConfig.JOB_CANCEL_DELEGATION_TOKEN + "=false",
+ "-D", GridMixConfig.GRIDMIX_DISTCACHE_ENABLE + "=true"
+ };
+ runGridmixAndVerify(runtimeValues, otherArgs, tracePath,
+ GridMixRunMode.DATA_GENERATION_AND_RUN_GRIDMIX.getValue());
+ }
+ /**
+ * Verify the Gridmix emulation of single HDFS private distributed
+ * cache file in SubmitterUserResolver mode with REPLAY submission
+ * policy by using the existing input data and HDFS private
+ * distributed cache file.
+ * @throws Exception - if an error occurs.
+ */
+ @Test
+ public void testGridmixEmulationOfHDFSPrivateDCFile()
+ throws Exception {
+ final String tracePath = getTraceFile("distcache_case3_trace");
+ Assert.assertNotNull("Trace file was not found.", tracePath);
+ final String [] runtimeValues ={"LOADJOB",
+ SubmitterUserResolver.class.getName(),
+ "REPLAY",
+ tracePath};
+ final String [] otherArgs = {
+ "-D", GridMixConfig.GRIDMIX_DISTCACHE_ENABLE + "=true"
+ };
+ runGridmixAndVerify(runtimeValues, otherArgs, tracePath,
+ GridMixRunMode.RUN_GRIDMIX.getValue());
+ }
+}
+
diff --git a/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestGridmixEmulationOfHDFSPublicDCFile.java b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestGridmixEmulationOfHDFSPublicDCFile.java
new file mode 100644
index 0000000..e12180c
--- /dev/null
+++ b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestGridmixEmulationOfHDFSPublicDCFile.java
@@ -0,0 +1,89 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapred.gridmix;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.mapred.gridmix.test.system.GridMixConfig;
+import org.apache.hadoop.mapred.gridmix.test.system.GridMixRunMode;
+import org.apache.hadoop.mapred.gridmix.test.system.UtilsForGridmix;
+import org.apache.hadoop.mapreduce.MRJobConfig;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * Verify the Gridmix emulation of HDFS public distributed cache file.
+ */
+public class TestGridmixEmulationOfHDFSPublicDCFile
+ extends GridmixSystemTestCase {
+ private static final Log LOG =
+ LogFactory.getLog("TestGridmixEmulationOfHDFSPublicDCFile.class");
+
+ /**
+ * Generate the input data and HDFS distributed cache file based
+ * on given input trace. Verify the Gridmix emulation of single HDFS
+ * public distributed cache file in SubmitterUserResolver mode with
+ * STRESS submission policy.
+ * @throws Exception - if an error occurs.
+ */
+ @Test
+ public void testGenerateAndEmulationOfSingleHDFSDCFile()
+ throws Exception {
+ final long inputSizeInMB = 7168;
+ final String tracePath = getTraceFile("distcache_case1_trace");
+ Assert.assertNotNull("Trace file was not found.", tracePath);
+ final String [] runtimeValues = {"LOADJOB",
+ SubmitterUserResolver.class.getName(),
+ "STRESS",
+ inputSizeInMB + "m",
+ tracePath};
+
+ final String [] otherArgs = {
+ "-D", MRJobConfig.JOB_CANCEL_DELEGATION_TOKEN + "=false",
+ "-D", GridMixConfig.GRIDMIX_DISTCACHE_ENABLE + "=true"
+ };
+ runGridmixAndVerify(runtimeValues, otherArgs, tracePath,
+ GridMixRunMode.DATA_GENERATION_AND_RUN_GRIDMIX.getValue());
+ }
+
+ /**
+ * Verify the Gridmix emulation of Single HDFS public distributed cache
+ * file in RoundRobinUserResolver mode with REPLAY submission policy
+ * by using the existing input data and HDFS public distributed cache file.
+ * @throws Exception - if an error occurs.
+ */
+ @Test
+ public void testGridmixEmulationOfSingleHDFSPublicDCFile()
+ throws Exception {
+ final String tracePath = getTraceFile("distcache_case1_trace");
+ Assert.assertNotNull("Trace file was not found.", tracePath);
+ final String [] runtimeValues =
+ { "LOADJOB",
+ RoundRobinUserResolver.class.getName(),
+ "REPLAY",
+ "file://" + UtilsForGridmix.getProxyUsersFile(conf),
+ tracePath};
+
+ final String [] otherArgs = {
+ "-D", GridMixConfig.GRIDMIX_DISTCACHE_ENABLE + "=true"
+ };
+ runGridmixAndVerify(runtimeValues, otherArgs, tracePath,
+ GridMixRunMode.RUN_GRIDMIX.getValue());
+ }
+}
+
diff --git a/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestGridmixEmulationOfMultipleHDFSPrivateDCFiles.java b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestGridmixEmulationOfMultipleHDFSPrivateDCFiles.java
new file mode 100644
index 0000000..4dca1a2
--- /dev/null
+++ b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestGridmixEmulationOfMultipleHDFSPrivateDCFiles.java
@@ -0,0 +1,89 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapred.gridmix;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.mapred.gridmix.test.system.GridMixConfig;
+import org.apache.hadoop.mapred.gridmix.test.system.GridMixRunMode;
+import org.apache.hadoop.mapred.gridmix.test.system.UtilsForGridmix;
+import org.apache.hadoop.mapreduce.MRJobConfig;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * Verify the Gridmix emulation of Multiple HDFS private distributed
+ * cache files.
+ */
+public class TestGridmixEmulationOfMultipleHDFSPrivateDCFiles
+ extends GridmixSystemTestCase {
+ private static final Log LOG =
+ LogFactory.getLog(
+ "TestGridmixEmulationOfMultipleHDFSPrivateDCFiles.class");
+
+ /**
+ * Generate input data and multiple HDFS private distributed cache
+ * files based on given input trace.Verify the Gridmix emulation of
+ * multiple private HDFS distributed cache files in RoundRobinUserResolver
+ * mode with SERIAL submission policy.
+ * @throws Exception - if an error occurs.
+ */
+ @Test
+ public void testGenerateAndEmulationOfMultipleHDFSPrivateDCFiles()
+ throws Exception {
+ final long inputSize = 6144;
+ final String tracePath = getTraceFile("distcache_case4_trace");
+ Assert.assertNotNull("Trace file was not found.", tracePath);
+ final String [] runtimeValues =
+ {"LOADJOB",
+ RoundRobinUserResolver.class.getName(),
+ "SERIAL",
+ inputSize+"m",
+ "file://" + UtilsForGridmix.getProxyUsersFile(conf),
+ tracePath};
+ final String [] otherArgs = {
+ "-D", MRJobConfig.JOB_CANCEL_DELEGATION_TOKEN + "=false",
+ "-D", GridMixConfig.GRIDMIX_DISTCACHE_ENABLE + "=true"
+ };
+ runGridmixAndVerify(runtimeValues, otherArgs, tracePath,
+ GridMixRunMode.DATA_GENERATION_AND_RUN_GRIDMIX.getValue());
+ }
+
+ /**
+ * Verify the Gridmix emulation of multiple HDFS private distributed
+ * cache files in SubmitterUserResolver mode with STRESS submission
+ * policy by using the existing input data and HDFS private
+ * distributed cache files.
+ * @throws Exception - if an error occurs.
+ */
+ @Test
+ public void testGridmixEmulationOfMultipleHDFSPrivateDCFiles()
+ throws Exception {
+ final String tracePath = getTraceFile("distcache_case4_trace");
+ Assert.assertNotNull("Trace file was not found.", tracePath);
+ final String [] runtimeValues = {"LOADJOB",
+ SubmitterUserResolver.class.getName(),
+ "STRESS",
+ tracePath};
+ final String [] otherArgs = {
+ "-D", GridMixConfig.GRIDMIX_DISTCACHE_ENABLE + "=true"
+ };
+ runGridmixAndVerify(runtimeValues, otherArgs, tracePath,
+ GridMixRunMode.RUN_GRIDMIX.getValue());
+ }
+}
diff --git a/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestGridmixEmulationOfMultipleHDFSPublicDCFiles.java b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestGridmixEmulationOfMultipleHDFSPublicDCFiles.java
new file mode 100644
index 0000000..09bbf18
--- /dev/null
+++ b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/TestGridmixEmulationOfMultipleHDFSPublicDCFiles.java
@@ -0,0 +1,90 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapred.gridmix;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.mapred.gridmix.test.system.GridMixConfig;
+import org.apache.hadoop.mapred.gridmix.test.system.GridMixRunMode;
+import org.apache.hadoop.mapred.gridmix.test.system.UtilsForGridmix;
+import org.apache.hadoop.mapreduce.MRJobConfig;
+import org.junit.Assert;
+import org.junit.Test;
+import java.io.IOException;
+
+/**
+ * Verify the Gridmix emulation of Multiple HDFS public distributed
+ * cache files.
+ */
+public class TestGridmixEmulationOfMultipleHDFSPublicDCFiles
+ extends GridmixSystemTestCase {
+ private static final Log LOG =
+ LogFactory.getLog(
+ "TestGridmixEmulationOfMultipleHDFSPublicDCFiles.class");
+
+ /**
+ * Generate the compressed input data and dist cache files based
+ * on input trace. Verify the Gridmix emulation of
+ * multiple HDFS public distributed cache file.
+ * @throws Exception - if an error occurs.
+ */
+ @Test
+ public void testGenerateAndEmulationOfMultipleHDFSDCFiles()
+ throws Exception {
+ final long inputSizeInMB = 7168;
+ final String tracePath = getTraceFile("distcache_case2_trace");
+ Assert.assertNotNull("Trace file was not found.", tracePath);
+ final String [] runtimeValues =
+ {"LOADJOB",
+ RoundRobinUserResolver.class.getName(),
+ "STRESS",
+ inputSizeInMB + "m",
+ "file://" + UtilsForGridmix.getProxyUsersFile(conf),
+ tracePath};
+
+ final String [] otherArgs = {
+ "-D", MRJobConfig.JOB_CANCEL_DELEGATION_TOKEN + "=false",
+ "-D", GridMixConfig.GRIDMIX_DISTCACHE_ENABLE + "=true"
+ };
+ runGridmixAndVerify(runtimeValues, otherArgs, tracePath,
+ GridMixRunMode.DATA_GENERATION_AND_RUN_GRIDMIX.getValue());
+ }
+
+ /**
+ * Verify the Gridmix emulation of Single HDFS public distributed cache file
+ * by using an existing input compressed data and HDFS dist cache file.
+ * @throws Exception - if an error occurs.
+ */
+ @Test
+ public void testGridmixEmulationOfMulitpleHDFSPublicDCFile()
+ throws Exception {
+ final String tracePath = getTraceFile("distcache_case2_trace");
+ Assert.assertNotNull("Trace file was not found.", tracePath);
+ final String [] runtimeValues = {"LOADJOB",
+ SubmitterUserResolver.class.getName(),
+ "SERIAL",
+ tracePath};
+
+ final String [] otherArgs = {
+ "-D", GridMixConfig.GRIDMIX_DISTCACHE_ENABLE + "=true"
+ };
+ runGridmixAndVerify(runtimeValues, otherArgs, tracePath,
+ GridMixRunMode.RUN_GRIDMIX.getValue());
+ }
+}
+
diff --git a/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/test/system/GridMixConfig.java b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/test/system/GridMixConfig.java
index 2cecc40..8b44f0b 100644
--- a/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/test/system/GridMixConfig.java
+++ b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/test/system/GridMixConfig.java
@@ -20,6 +20,7 @@
import org.apache.hadoop.mapred.gridmix.Gridmix;
import org.apache.hadoop.mapred.gridmix.JobCreator;
import org.apache.hadoop.mapred.gridmix.SleepJob;
+import org.apache.hadoop.mapreduce.MRJobConfig;
public class GridMixConfig {
@@ -124,6 +125,30 @@
"gridmix.compression-emulation.reduce-output.compression-ratio";
/**
+ * Gridmix distributed cache visibilities.
+ */
+ public static final String GRIDMIX_DISTCACHE_VISIBILITIES =
+ MRJobConfig.CACHE_FILE_VISIBILITIES;
+
+ /**
+ * Gridmix distributed cache files.
+ */
+ public static final String GRIDMIX_DISTCACHE_FILES =
+ MRJobConfig.CACHE_FILES;
+
+ /**
+ * Gridmix distributed cache files size.
+ */
+ public static final String GRIDMIX_DISTCACHE_FILESSIZE =
+ MRJobConfig.CACHE_FILES_SIZES;
+
+ /**
+ * Gridmix distributed cache files time stamp.
+ */
+ public static final String GRIDMIX_DISTCACHE_TIMESTAMP =
+ MRJobConfig.CACHE_FILE_TIMESTAMPS;
+
+ /**
* Gridmix logger mode.
*/
public static final String GRIDMIX_LOG_MODE =
diff --git a/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/test/system/GridmixJobVerification.java b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/test/system/GridmixJobVerification.java
index a3ad0e8..2b71d4f 100644
--- a/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/test/system/GridmixJobVerification.java
+++ b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/test/system/GridmixJobVerification.java
@@ -25,11 +25,18 @@
import java.util.HashMap;
import java.util.SortedMap;
import java.util.TreeMap;
+import java.util.Collections;
+import java.util.Set;
+import java.util.ArrayList;
+import java.util.Arrays;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.Counters;
import org.apache.hadoop.mapreduce.Counter;
@@ -47,6 +54,7 @@
import org.junit.Assert;
import java.text.ParseException;
import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.mapred.gridmix.GridmixSystemTestCase;
/**
* Verifying each Gridmix job with corresponding job story in a trace file.
@@ -74,6 +82,8 @@
GridMixConfig.GRIDMIX_INTERMEDIATE_COMPRESSION_RATIO;
static final String reduceOutputCompRatio =
GridMixConfig.GRIDMIX_OUTPUT_COMPRESSION_RATIO;
+ private Map<String, List<JobConf>> simuAndOrigJobsInfo =
+ new HashMap<String, List<JobConf>>();
/**
* Gridmix job verification constructor
@@ -133,12 +143,16 @@
verifyJobReduceCounters(counters, reduceJobCounters, simuJobConf);
verifyCompressionEmulation(zombieJob.getJobConf(), simuJobConf, counters,
reduceJobCounters, mapJobCounters);
+ verifyDistributeCache(zombieJob,simuJobConf);
+ setJobDistributedCacheInfo(simuJobId.toString(), simuJobConf,
+ zombieJob.getJobConf());
LOG.info("Done.");
}
+ verifyDistributedCacheBetweenJobs(simuAndOrigJobsInfo);
}
/**
- * Verify the job subimssion order between the jobs in replay mode.
+ * Verify the job submission order between the jobs in replay mode.
* @param origSubmissionTime - sorted map of original jobs submission times.
* @param simuSubmissionTime - sorted map of simulated jobs submission times.
*/
@@ -362,6 +376,12 @@
if (simuJobConf.getBoolean(compEmulKey, false)) {
String inputDir = origJobConf.get(fileInputFormatKey);
Assert.assertNotNull(fileInputFormatKey + " is Null",inputDir);
+ long simMapInputBytes = getCounterValue(counters, "HDFS_BYTES_READ");
+ long uncompressedInputSize = origMapJobCounters.get("MAP_INPUT_BYTES");
+ long simReduceInputBytes =
+ getCounterValue(counters, "REDUCE_SHUFFLE_BYTES");
+ long simMapOutputBytes = getCounterValue(counters, "MAP_OUTPUT_BYTES");
+
// Verify input compression whether it's enable or not.
if (inputDir.contains(".gz") || inputDir.contains(".tgz")
|| inputDir.contains(".bz")) {
@@ -375,22 +395,16 @@
getExpectedCompressionRatio(simuJobConf, mapOutputCompRatio);
// Verify Map Input Compression Ratio.
- long simMapInputBytes = getCounterValue(counters, "HDFS_BYTES_READ");
- long uncompressedInputSize = origMapJobCounters.get("MAP_INPUT_BYTES");
assertMapInputCompressionRatio(simMapInputBytes, uncompressedInputSize,
INPUT_COMP_RATIO);
// Verify Map Output Compression Ratio.
- long simReduceInputBytes =
- getCounterValue(counters, "REDUCE_SHUFFLE_BYTES");
- long simMapOutputBytes = getCounterValue(counters, "MAP_OUTPUT_BYTES");
assertMapOuputCompressionRatio(simReduceInputBytes, simMapOutputBytes,
INTERMEDIATE_COMP_RATIO);
} else {
- Assert.assertFalse("Input decompression attribute has been enabled "
- + "for uncompressed input. ",
- Boolean.valueOf(
- simuJobConf.getBoolean(inputDecompKey, false)));
+ Assert.assertEquals("MAP input bytes has not matched.",
+ convertBytes(uncompressedInputSize),
+ convertBytes(simMapInputBytes));
}
Assert.assertEquals("Simulated job output format has not matched with "
@@ -497,6 +511,235 @@
return ((float)significant)/100;
}
+ /**
+ * Verify the distributed cache files between the jobs in a gridmix run.
+ * @param jobsInfo - jobConfs of simulated and original jobs as a map.
+ */
+ public void verifyDistributedCacheBetweenJobs(
+ Map<String,List<JobConf>> jobsInfo) {
+ if (jobsInfo.size() > 1) {
+ Map<String, Integer> simJobfilesOccurBtnJobs =
+ getDistcacheFilesOccurenceBetweenJobs(jobsInfo, 0);
+ Map<String, Integer> origJobfilesOccurBtnJobs =
+ getDistcacheFilesOccurenceBetweenJobs(jobsInfo, 1);
+ List<Integer> simuOccurList =
+ getMapValuesAsList(simJobfilesOccurBtnJobs);
+ Collections.sort(simuOccurList);
+ List<Integer> origOccurList =
+ getMapValuesAsList(origJobfilesOccurBtnJobs);
+ Collections.sort(origOccurList);
+ Assert.assertEquals("The unique count of distibuted cache files in "
+ + "simulated jobs have not matched with the unique "
+ + "count of original jobs distributed files ",
+ simuOccurList.size(), origOccurList.size());
+ int index = 0;
+ for (Integer origDistFileCount : origOccurList) {
+ Assert.assertEquals("Distributed cache file reused in simulated "
+ + "jobs has not matched with reused of distributed"
+ + "cache file in original jobs.",
+ origDistFileCount, simuOccurList.get(index));
+ index ++;
+ }
+ }
+ }
+
+ /**
+ * Get the unique distributed cache files and occurrence between the jobs.
+ * @param jobsInfo - job's configurations as a map.
+ * @param jobConfIndex - 0 for simulated job configuration and
+ * 1 for original jobs configuration.
+ * @return - unique distributed cache files and occurrences as map.
+ */
+ private Map<String, Integer> getDistcacheFilesOccurenceBetweenJobs(
+ Map<String, List<JobConf>> jobsInfo, int jobConfIndex) {
+ Map<String,Integer> filesOccurBtnJobs = new HashMap <String,Integer>();
+ Set<String> jobIds = jobsInfo.keySet();
+ Iterator<String > ite = jobIds.iterator();
+ while (ite.hasNext()) {
+ String jobId = ite.next();
+ List<JobConf> jobconfs = jobsInfo.get(jobId);
+ String [] distCacheFiles = jobconfs.get(jobConfIndex).get(
+ GridMixConfig.GRIDMIX_DISTCACHE_FILES).split(",");
+ String [] distCacheFileTimeStamps = jobconfs.get(jobConfIndex).get(
+ GridMixConfig.GRIDMIX_DISTCACHE_TIMESTAMP).split(",");
+ String [] distCacheFileVisib = jobconfs.get(jobConfIndex).get(
+ GridMixConfig.GRIDMIX_DISTCACHE_VISIBILITIES).split(",");
+ int indx = 0;
+ for (String distCacheFile : distCacheFiles) {
+ String fileAndSize = distCacheFile + "^"
+ + distCacheFileTimeStamps[indx] + "^"
+ + jobconfs.get(jobConfIndex).getUser();
+ if (filesOccurBtnJobs.get(fileAndSize) != null) {
+ int count = filesOccurBtnJobs.get(fileAndSize);
+ count ++;
+ filesOccurBtnJobs.put(fileAndSize, count);
+ } else {
+ filesOccurBtnJobs.put(fileAndSize, 1);
+ }
+ }
+ }
+ return filesOccurBtnJobs;
+ }
+
+ /**
+ * It verifies the distributed cache emulation of a job.
+ * @param zombieJob - Original job story.
+ * @param simuJobConf - Simulated job configuration.
+ */
+ public void verifyDistributeCache(ZombieJob zombieJob,
+ JobConf simuJobConf) throws IOException {
+ if (simuJobConf.getBoolean(GridMixConfig.GRIDMIX_DISTCACHE_ENABLE, false)) {
+ JobConf origJobConf = zombieJob.getJobConf();
+ assertFileVisibility(simuJobConf);
+ assertDistcacheFiles(simuJobConf,origJobConf);
+ assertFileSizes(simuJobConf,origJobConf);
+ assertFileStamps(simuJobConf,origJobConf);
+ } else {
+ Assert.assertNull("Configuration has distributed cache visibilites"
+ + "without enabled distributed cache emulation.",
+ simuJobConf.get(GridMixConfig.GRIDMIX_DISTCACHE_VISIBILITIES));
+ Assert.assertNull("Configuration has distributed cache files time "
+ + "stamps without enabled distributed cache emulation.",
+ simuJobConf.get(GridMixConfig.GRIDMIX_DISTCACHE_TIMESTAMP));
+ Assert.assertNull("Configuration has distributed cache files paths"
+ + "without enabled distributed cache emulation.",
+ simuJobConf.get(GridMixConfig.GRIDMIX_DISTCACHE_FILES));
+ Assert.assertNull("Configuration has distributed cache files sizes"
+ + "without enabled distributed cache emulation.",
+ simuJobConf.get(GridMixConfig.GRIDMIX_DISTCACHE_FILESSIZE));
+ }
+ }
+
+ private void assertFileStamps(JobConf simuJobConf, JobConf origJobConf) {
+ //Verify simulated jobs against distributed cache files time stamps.
+ String [] origDCFTS =
+ origJobConf.get(GridMixConfig.GRIDMIX_DISTCACHE_TIMESTAMP).split(",");
+ String [] simuDCFTS =
+ simuJobConf.get(GridMixConfig.GRIDMIX_DISTCACHE_TIMESTAMP).split(",");
+ for (int index = 0; index < origDCFTS.length; index++) {
+ Assert.assertTrue("Invalid time stamps between original "
+ +"and simulated job", Long.parseLong(origDCFTS[index])
+ < Long.parseLong(simuDCFTS[index]));
+ }
+ }
+
+ private void assertFileVisibility(JobConf simuJobConf ) {
+ // Verify simulated jobs against distributed cache files visibilities.
+ String [] distFiles =
+ simuJobConf.get(GridMixConfig.GRIDMIX_DISTCACHE_FILES).split(",");
+ String [] simuDistVisibilities =
+ simuJobConf.get(GridMixConfig.GRIDMIX_DISTCACHE_VISIBILITIES).split(",");
+ List<Boolean> expFileVisibility = new ArrayList<Boolean >();
+ int index = 0;
+ for (String distFile : distFiles) {
+ boolean isLocalDistCache = GridmixSystemTestCase.isLocalDistCache(
+ distFile,
+ simuJobConf.getUser(),
+ Boolean.valueOf(simuDistVisibilities[index]));
+ if (!isLocalDistCache) {
+ expFileVisibility.add(true);
+ } else {
+ expFileVisibility.add(false);
+ }
+ index ++;
+ }
+ index = 0;
+ for (String actFileVisibility : simuDistVisibilities) {
+ Assert.assertEquals("Simulated job distributed cache file "
+ + "visibilities has not matched.",
+ expFileVisibility.get(index),
+ Boolean.valueOf(actFileVisibility));
+ index ++;
+ }
+ }
+
+ private void assertDistcacheFiles(JobConf simuJobConf, JobConf origJobConf)
+ throws IOException {
+ //Verify simulated jobs against distributed cache files.
+ String [] origDistFiles = origJobConf.get(
+ GridMixConfig.GRIDMIX_DISTCACHE_FILES).split(",");
+ String [] simuDistFiles = simuJobConf.get(
+ GridMixConfig.GRIDMIX_DISTCACHE_FILES).split(",");
+ String [] simuDistVisibilities = simuJobConf.get(
+ GridMixConfig.GRIDMIX_DISTCACHE_VISIBILITIES).split(",");
+ Assert.assertEquals("No. of simulatued job's distcache files mismacted"
+ + "with no.of original job's distcache files",
+ origDistFiles.length, simuDistFiles.length);
+
+ int index = 0;
+ for (String simDistFile : simuDistFiles) {
+ Path distPath = new Path(simDistFile);
+ boolean isLocalDistCache =
+ GridmixSystemTestCase.isLocalDistCache(simDistFile,
+ simuJobConf.getUser(),
+ Boolean.valueOf(simuDistVisibilities[index]));
+ if (!isLocalDistCache) {
+ FileSystem fs = distPath.getFileSystem(conf);
+ FileStatus fstat = fs.getFileStatus(distPath);
+ FsPermission permission = fstat.getPermission();
+ Assert.assertTrue("HDFS distributed cache file has wrong "
+ + "permissions for users.",
+ FsAction.READ_WRITE.SYMBOL
+ == permission.getUserAction().SYMBOL);
+ Assert.assertTrue("HDFS distributed cache file has wrong "
+ + "permissions for groups.",
+ FsAction.READ.SYMBOL
+ == permission.getGroupAction().SYMBOL);
+ Assert.assertTrue("HDSFS distributed cache file has wrong "
+ + "permissions for others.",
+ FsAction.READ.SYMBOL
+ == permission.getOtherAction().SYMBOL);
+ }
+ index++;
+ }
+ }
+
+ private void assertFileSizes(JobConf simuJobConf, JobConf origJobConf) {
+ // Verify simulated jobs against distributed cache files size.
+ List<String> origDistFilesSize =
+ Arrays.asList(origJobConf.get(
+ GridMixConfig.GRIDMIX_DISTCACHE_FILESSIZE).split(","));
+ Collections.sort(origDistFilesSize);
+
+ List<String> simuDistFilesSize =
+ Arrays.asList(simuJobConf.get(
+ GridMixConfig.GRIDMIX_DISTCACHE_FILESSIZE).split(","));
+ Collections.sort(simuDistFilesSize);
+
+ Assert.assertEquals("Simulated job's file size list has not "
+ + "matched with the Original job's file size list.",
+ origDistFilesSize.size(),
+ simuDistFilesSize.size());
+
+ for (int index = 0; index < origDistFilesSize.size(); index ++) {
+ Assert.assertEquals("Simulated job distcache file size has not "
+ + "matched with original job distcache file size.",
+ origDistFilesSize.get(index),
+ simuDistFilesSize.get(index));
+ }
+ }
+
+ private void setJobDistributedCacheInfo(String jobId, JobConf simuJobConf,
+ JobConf origJobConf) {
+ if (simuJobConf.get(GridMixConfig.GRIDMIX_DISTCACHE_FILES) != null) {
+ List<JobConf> jobConfs = new ArrayList<JobConf>();
+ jobConfs.add(simuJobConf);
+ jobConfs.add(origJobConf);
+ simuAndOrigJobsInfo.put(jobId,jobConfs);
+ }
+ }
+
+ private List<Integer> getMapValuesAsList(Map<String,Integer> jobOccurs) {
+ List<Integer> occursList = new ArrayList<Integer>();
+ Set<String> files = jobOccurs.keySet();
+ Iterator<String > ite = files.iterator();
+ while (ite.hasNext()) {
+ String file = ite.next();
+ occursList.add(jobOccurs.get(file));
+ }
+ return occursList;
+ }
+
private String convertJobStatus(String jobStatus) {
if (jobStatus.equals("SUCCEEDED")) {
return "SUCCESS";
diff --git a/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/test/system/UtilsForGridmix.java b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/test/system/UtilsForGridmix.java
index 65dd4ff..723adbc 100644
--- a/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/test/system/UtilsForGridmix.java
+++ b/src/contrib/gridmix/src/test/system/org/apache/hadoop/mapred/gridmix/test/system/UtilsForGridmix.java
@@ -88,12 +88,12 @@
public static String [] getArgsList(Path gridmixDir, int gridmixRunMode,
String [] values, String [] otherArgs) {
String [] runtimeArgs = {
- "-D", GridMixConfig.GRIDMIX_LOG_MODE + " = DEBUG",
- "-D", GridMixConfig.GRIDMIX_OUTPUT_DIR + " = gridmix",
- "-D", GridMixConfig.GRIDMIX_JOB_SUBMISSION_QUEUE_IN_TRACE + " = true",
- "-D", GridMixConfig.GRIDMIX_JOB_TYPE + " = " + values[0],
- "-D", GridMixConfig.GRIDMIX_USER_RESOLVER + " = " + values[1],
- "-D", GridMixConfig.GRIDMIX_SUBMISSION_POLICY + " = " + values[2]
+ "-D", GridMixConfig.GRIDMIX_LOG_MODE + "=DEBUG",
+ "-D", GridMixConfig.GRIDMIX_OUTPUT_DIR + "=gridmix",
+ "-D", GridMixConfig.GRIDMIX_JOB_SUBMISSION_QUEUE_IN_TRACE + "=true",
+ "-D", GridMixConfig.GRIDMIX_JOB_TYPE + "=" + values[0],
+ "-D", GridMixConfig.GRIDMIX_USER_RESOLVER + "=" + values[1],
+ "-D", GridMixConfig.GRIDMIX_SUBMISSION_POLICY + "=" + values[2]
};
String [] classArgs;
diff --git a/src/contrib/gridmix/src/test/system/resources/distcache_case1_trace.json.gz b/src/contrib/gridmix/src/test/system/resources/distcache_case1_trace.json.gz
new file mode 100644
index 0000000..7b93b07
--- /dev/null
+++ b/src/contrib/gridmix/src/test/system/resources/distcache_case1_trace.json.gz
Binary files differ
diff --git a/src/contrib/gridmix/src/test/system/resources/distcache_case2_trace.json.gz b/src/contrib/gridmix/src/test/system/resources/distcache_case2_trace.json.gz
new file mode 100644
index 0000000..7bdd313
--- /dev/null
+++ b/src/contrib/gridmix/src/test/system/resources/distcache_case2_trace.json.gz
Binary files differ
diff --git a/src/contrib/gridmix/src/test/system/resources/distcache_case3_trace.json.gz b/src/contrib/gridmix/src/test/system/resources/distcache_case3_trace.json.gz
new file mode 100644
index 0000000..04fd705
--- /dev/null
+++ b/src/contrib/gridmix/src/test/system/resources/distcache_case3_trace.json.gz
Binary files differ
diff --git a/src/contrib/gridmix/src/test/system/resources/distcache_case4_trace.json.gz b/src/contrib/gridmix/src/test/system/resources/distcache_case4_trace.json.gz
new file mode 100644
index 0000000..74742fc
--- /dev/null
+++ b/src/contrib/gridmix/src/test/system/resources/distcache_case4_trace.json.gz
Binary files differ
diff --git a/src/contrib/gridmix/src/test/system/resources/distcache_case5_trace.json.gz b/src/contrib/gridmix/src/test/system/resources/distcache_case5_trace.json.gz
new file mode 100644
index 0000000..c178761
--- /dev/null
+++ b/src/contrib/gridmix/src/test/system/resources/distcache_case5_trace.json.gz
Binary files differ
diff --git a/src/contrib/gridmix/src/test/system/resources/distcache_case6_trace.json.gz b/src/contrib/gridmix/src/test/system/resources/distcache_case6_trace.json.gz
new file mode 100644
index 0000000..9a53ad2
--- /dev/null
+++ b/src/contrib/gridmix/src/test/system/resources/distcache_case6_trace.json.gz
Binary files differ
diff --git a/src/contrib/gridmix/src/test/system/resources/distcache_case7_trace.json.gz b/src/contrib/gridmix/src/test/system/resources/distcache_case7_trace.json.gz
new file mode 100644
index 0000000..43a181a
--- /dev/null
+++ b/src/contrib/gridmix/src/test/system/resources/distcache_case7_trace.json.gz
Binary files differ
diff --git a/src/contrib/gridmix/src/test/system/resources/distcache_case8_trace.json.gz b/src/contrib/gridmix/src/test/system/resources/distcache_case8_trace.json.gz
new file mode 100644
index 0000000..fa3d791
--- /dev/null
+++ b/src/contrib/gridmix/src/test/system/resources/distcache_case8_trace.json.gz
Binary files differ
diff --git a/src/contrib/gridmix/src/test/system/resources/distcache_case9_trace.json.gz b/src/contrib/gridmix/src/test/system/resources/distcache_case9_trace.json.gz
new file mode 100644
index 0000000..ee009ed
--- /dev/null
+++ b/src/contrib/gridmix/src/test/system/resources/distcache_case9_trace.json.gz
Binary files differ