src/test/system/test/org/apache/hadoop/mapred/TestDistributedCacheModifiedFile.java - hadoop-mapreduce - Git at Google

 /**
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.hadoop.mapred;

 import java.io.DataOutputStream;
 import java.net.URI;
 import java.util.Collection;
 import java.util.ArrayList;
 import org.apache.commons.logging.LogFactory;
 import org.apache.commons.logging.Log;
 import org.apache.hadoop.mapreduce.test.system.JTProtocol;
 import org.apache.hadoop.mapreduce.test.system.TTClient;
 import org.apache.hadoop.mapreduce.test.system.JobInfo;
 import org.apache.hadoop.mapreduce.test.system.TaskInfo;
 import org.apache.hadoop.mapreduce.test.system.MRCluster;

 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.mapred.UtilsForTests;

 import org.apache.hadoop.mapreduce.test.system.FinishTaskControlAction;
 import org.apache.hadoop.filecache.DistributedCache;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.SleepJob;

 import org.junit.Assert;
 import org.junit.BeforeClass;
 import org.junit.AfterClass;
 import org.junit.Test;

 /**
  * Verify the Distributed Cache functionality. This test scenario is for a
  * distributed cache file behaviour when it is modified before and after being
  * accessed by maximum two jobs. Once a job uses a distributed cache file that
  * file is stored in the mapred.local.dir. If the next job uses the same file,
  * but with differnt timestamp, then that file is stored again. So, if two jobs
  * choose the same tasktracker for their job execution then, the distributed
  * cache file should be found twice.
  *
  * This testcase runs a job with a distributed cache file. All the tasks'
  * corresponding tasktracker's handle is got and checked for the presence of
  * distributed cache with proper permissions in the proper directory. Next when
  * job runs again and if any of its tasks hits the same tasktracker, which ran
  * one of the task of the previous job, then that file should be uploaded again
  * and task should not use the old file. This is verified.
  */

 public class TestDistributedCacheModifiedFile {

   private static MRCluster cluster = null;
   private static FileSystem dfs = null;
   private static FileSystem ttFs = null;
   private static JobClient client = null;
   private static FsPermission permission = new FsPermission((short) 00777);

   private static String uriPath = "hdfs:///tmp/test.txt";
   private static final Path URIPATH = new Path(uriPath);
   private String distributedFileName = "test.txt";

   static final Log LOG =
       LogFactory.getLog(TestDistributedCacheModifiedFile.class);

   public TestDistributedCacheModifiedFile() throws Exception {
   }

   @BeforeClass
   public static void setUp() throws Exception {
     cluster = MRCluster.createCluster(new Configuration());
     cluster.setUp();
     client = cluster.getJTClient().getClient();
     dfs = client.getFs();
     // Deleting the file if it already exists
     dfs.delete(URIPATH, true);

     Collection<TTClient> tts = cluster.getTTClients();
     // Stopping all TTs
     for (TTClient tt : tts) {
       tt.kill();
     }
     // Starting all TTs
     for (TTClient tt : tts) {
       tt.start();
     }
     // Waiting for 5 seconds to make sure tasktrackers are ready
     Thread.sleep(5000);
   }

   @AfterClass
   public static void tearDown() throws Exception {
     cluster.tearDown();
     dfs.delete(URIPATH, true);

     Collection<TTClient> tts = cluster.getTTClients();
     // Stopping all TTs
     for (TTClient tt : tts) {
       tt.kill();
     }
     // Starting all TTs
     for (TTClient tt : tts) {
       tt.start();
     }
   }

   @Test
   /**
    * This tests Distributed Cache for modified file
    * @param none
    * @return void
    */
   public void testDistributedCache() throws Exception {
     Configuration conf = new Configuration(cluster.getConf());
     JTProtocol wovenClient = cluster.getJTClient().getProxy();

     // This counter will check for count of a loop,
     // which might become infinite.
     int count = 0;
     // This boolean will decide whether to run job again
     boolean continueLoop = true;
     // counter for job Loop
     int countLoop = 0;
     // This counter increases with all the tasktrackers in which tasks ran
     int taskTrackerCounter = 0;
     // This will store all the tasktrackers in which tasks ran
     ArrayList<String> taskTrackerCollection = new ArrayList<String>();
     // This boolean tells if two tasks ran onteh same tasktracker or not
     boolean taskTrackerFound = false;

     do {
       SleepJob job = new SleepJob();
       job.setConf(conf);
       Job slpJob = job.createJob(5, 1, 1000, 1000, 100, 100);

       // Before starting, Modify the file
       String input = "This will be the content of\n" + "distributed cache\n";
       // Creating the path with the file
       DataOutputStream file =
           UtilsForTests.createTmpFileDFS(dfs, URIPATH, permission, input);

       DistributedCache.createSymlink(conf);
       URI uri = URI.create(uriPath);
       DistributedCache.addCacheFile(uri, conf);
       JobConf jconf = new JobConf(conf);

       // Controls the job till all verification is done
       FinishTaskControlAction.configureControlActionForJob(conf);

       slpJob.submit();
       // Submitting the job
       RunningJob rJob =
           cluster.getJTClient().getClient().getJob(
               org.apache.hadoop.mapred.JobID.downgrade(slpJob.getJobID()));

       // counter for job Loop
       countLoop++;

       TTClient tClient = null;
       JobInfo jInfo = wovenClient.getJobInfo(rJob.getID());
       LOG.info("jInfo is :" + jInfo);

       // Assert if jobInfo is null
       Assert.assertNotNull("jobInfo is null", jInfo);

       // Wait for the job to start running.
       count = 0;
       while (jInfo.getStatus().getRunState() != JobStatus.RUNNING) {
         UtilsForTests.waitFor(10000);
         count++;
         jInfo = wovenClient.getJobInfo(rJob.getID());
         // If the count goes beyond a point, then break; This is to avoid
         // infinite loop under unforeseen circumstances. Testcase will anyway
         // fail later.
         if (count > 10) {
           Assert.fail("job has not reached running state for more than"
               + "100 seconds. Failing at this point");
         }
       }

       LOG.info("job id is :" + rJob.getID().toString());

       TaskInfo[] taskInfos =
           cluster.getJTClient().getProxy().getTaskInfo(rJob.getID());

       boolean distCacheFileIsFound;

       for (TaskInfo taskInfo : taskInfos) {
         distCacheFileIsFound = false;
         String[] taskTrackers = taskInfo.getTaskTrackers();
         for (String taskTracker : taskTrackers) {
           // Formatting tasktracker to get just its FQDN
           taskTracker = UtilsForTests.getFQDNofTT(taskTracker);
           LOG.info("taskTracker is :" + taskTracker);

           // The tasktrackerFound variable is initialized
           taskTrackerFound = false;

           // This will be entered from the second job onwards
           if (countLoop > 1) {
             if (taskTracker != null) {
               continueLoop = taskTrackerCollection.contains(taskTracker);
             }
             if (continueLoop) {
               taskTrackerFound = true;
             }
           }
           // Collecting the tasktrackers
           if (taskTracker != null)
             taskTrackerCollection.add(taskTracker);

           // we have loopped through two times to look for task
           // getting submitted on same tasktrackers.The same tasktracker
           // for subsequent jobs was not hit maybe because of many number
           // of tasktrackers. So, testcase has to stop here.
           if (countLoop > 1) {
             continueLoop = false;
           }

           tClient = cluster.getTTClient(taskTracker);

           // tClient maybe null because the task is already dead. Ex: setup
           if (tClient == null) {
             continue;
           }

           String[] localDirs = tClient.getMapredLocalDirs();
           int distributedFileCount = 0;
           // Go to every single path
           for (String localDir : localDirs) {
             // Public Distributed cache will always be stored under
             // mapre.local.dir/tasktracker/archive
             localDir =
                 localDir
                     + Path.SEPARATOR
                     + TaskTracker.getPublicDistributedCacheDir();
             LOG.info("localDir is : " + localDir);

             // Get file status of all the directories
             // and files under that path.
             FileStatus[] fileStatuses =
                 tClient.listStatus(localDir, true, true);
             for (FileStatus fileStatus : fileStatuses) {
               Path path = fileStatus.getPath();
               LOG.info("path is :" + path.toString());
               // Checking if the received path ends with
               // the distributed filename
               distCacheFileIsFound =
                   (path.toString()).endsWith(distributedFileName);
               // If file is found, check for its permission.
               // Since the file is found break out of loop
               if (distCacheFileIsFound) {
                 LOG.info("PATH found is :" + path.toString());
                 distributedFileCount++;
                 String filename = path.getName();
                 FsPermission fsPerm = fileStatus.getPermission();
                 Assert.assertTrue("File Permission is not 777", fsPerm
                     .equals(new FsPermission("777")));
               }
             }
           }

           LOG.debug("The distributed FileCount is :" + distributedFileCount);
           LOG.debug("The taskTrackerFound is :" + taskTrackerFound);

           // If distributed cache is modified in dfs
           // between two job runs, it can be present more than once
           // in any of the task tracker, in which job ran.
           if (distributedFileCount != 2 && taskTrackerFound) {
             Assert.fail("The distributed cache file has to be two. "
                 + "But found was " + distributedFileCount);
           } else if (distributedFileCount > 1 && !taskTrackerFound) {
             Assert.fail("The distributed cache file cannot more than one."
                 + " But found was " + distributedFileCount);
           } else if (distributedFileCount < 1)
             Assert.fail("The distributed cache file is less than one. "
                 + "But found was " + distributedFileCount);
           if (!distCacheFileIsFound) {
             Assert.assertEquals(
                 "The distributed cache file does not exist",
                 distCacheFileIsFound, false);
           }
         }
       }
       // Allow the job to continue through MR control job.
       for (TaskInfo taskInfoRemaining : taskInfos) {
         FinishTaskControlAction action =
             new FinishTaskControlAction(TaskID.downgrade(taskInfoRemaining
                 .getTaskID()));
         Collection<TTClient> tts = cluster.getTTClients();
         for (TTClient cli : tts) {
           cli.getProxy().sendAction(action);
         }
       }

       // Killing the job because all the verification needed
       // for this testcase is completed.
       rJob.killJob();

       // Waiting for 3 seconds for cleanup to start
       Thread.sleep(3000);

       // Getting the last cleanup task's tasktracker also, as
       // distributed cache gets uploaded even during cleanup.
       TaskInfo[] myTaskInfos = wovenClient.getTaskInfo(rJob.getID());
       if (myTaskInfos != null) {
         for (TaskInfo info : myTaskInfos) {
           if (info.isSetupOrCleanup()) {
             String[] taskTrackers = info.getTaskTrackers();
             for (String taskTracker : taskTrackers) {
               // Formatting tasktracker to get just its FQDN
               taskTracker = UtilsForTests.getFQDNofTT(taskTracker);
               LOG.info("taskTracker is :" + taskTracker);
               // Collecting the tasktrackers
               if (taskTracker != null)
                 taskTrackerCollection.add(taskTracker);
             }
           }
         }
       }

       // Making sure that the job is complete.
       while (jInfo != null && !jInfo.getStatus().isJobComplete()) {
         Thread.sleep(10000);
         jInfo = wovenClient.getJobInfo(rJob.getID());
       }

     } while (continueLoop);
   }
 }
	/**
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package org.apache.hadoop.mapred;

	import java.io.DataOutputStream;
	import java.net.URI;
	import java.util.Collection;
	import java.util.ArrayList;
	import org.apache.commons.logging.LogFactory;
	import org.apache.commons.logging.Log;
	import org.apache.hadoop.mapreduce.test.system.JTProtocol;
	import org.apache.hadoop.mapreduce.test.system.TTClient;
	import org.apache.hadoop.mapreduce.test.system.JobInfo;
	import org.apache.hadoop.mapreduce.test.system.TaskInfo;
	import org.apache.hadoop.mapreduce.test.system.MRCluster;

	import org.apache.hadoop.conf.Configuration;
	import org.apache.hadoop.mapred.UtilsForTests;

	import org.apache.hadoop.mapreduce.test.system.FinishTaskControlAction;
	import org.apache.hadoop.filecache.DistributedCache;
	import org.apache.hadoop.fs.FileSystem;
	import org.apache.hadoop.fs.permission.FsPermission;
	import org.apache.hadoop.fs.Path;
	import org.apache.hadoop.fs.FileStatus;
	import org.apache.hadoop.mapreduce.Job;
	import org.apache.hadoop.mapreduce.SleepJob;

	import org.junit.Assert;
	import org.junit.BeforeClass;
	import org.junit.AfterClass;
	import org.junit.Test;

	/**
	* Verify the Distributed Cache functionality. This test scenario is for a
	* distributed cache file behaviour when it is modified before and after being
	* accessed by maximum two jobs. Once a job uses a distributed cache file that
	* file is stored in the mapred.local.dir. If the next job uses the same file,
	* but with differnt timestamp, then that file is stored again. So, if two jobs
	* choose the same tasktracker for their job execution then, the distributed
	* cache file should be found twice.
	*
	* This testcase runs a job with a distributed cache file. All the tasks'
	* corresponding tasktracker's handle is got and checked for the presence of
	* distributed cache with proper permissions in the proper directory. Next when
	* job runs again and if any of its tasks hits the same tasktracker, which ran
	* one of the task of the previous job, then that file should be uploaded again
	* and task should not use the old file. This is verified.
	*/

	public class TestDistributedCacheModifiedFile {

	private static MRCluster cluster = null;
	private static FileSystem dfs = null;
	private static FileSystem ttFs = null;
	private static JobClient client = null;
	private static FsPermission permission = new FsPermission((short) 00777);

	private static String uriPath = "hdfs:///tmp/test.txt";
	private static final Path URIPATH = new Path(uriPath);
	private String distributedFileName = "test.txt";

	static final Log LOG =
	LogFactory.getLog(TestDistributedCacheModifiedFile.class);

	public TestDistributedCacheModifiedFile() throws Exception {
	}

	@BeforeClass
	public static void setUp() throws Exception {
	cluster = MRCluster.createCluster(new Configuration());
	cluster.setUp();
	client = cluster.getJTClient().getClient();
	dfs = client.getFs();
	// Deleting the file if it already exists
	dfs.delete(URIPATH, true);

	Collection<TTClient> tts = cluster.getTTClients();
	// Stopping all TTs
	for (TTClient tt : tts) {
	tt.kill();
	}
	// Starting all TTs
	for (TTClient tt : tts) {
	tt.start();
	}
	// Waiting for 5 seconds to make sure tasktrackers are ready
	Thread.sleep(5000);
	}

	@AfterClass
	public static void tearDown() throws Exception {
	cluster.tearDown();
	dfs.delete(URIPATH, true);

	Collection<TTClient> tts = cluster.getTTClients();
	// Stopping all TTs
	for (TTClient tt : tts) {
	tt.kill();
	}
	// Starting all TTs
	for (TTClient tt : tts) {
	tt.start();
	}
	}

	@Test
	/**
	* This tests Distributed Cache for modified file
	* @param none
	* @return void
	*/
	public void testDistributedCache() throws Exception {
	Configuration conf = new Configuration(cluster.getConf());
	JTProtocol wovenClient = cluster.getJTClient().getProxy();

	// This counter will check for count of a loop,
	// which might become infinite.
	int count = 0;
	// This boolean will decide whether to run job again
	boolean continueLoop = true;
	// counter for job Loop
	int countLoop = 0;
	// This counter increases with all the tasktrackers in which tasks ran
	int taskTrackerCounter = 0;
	// This will store all the tasktrackers in which tasks ran
	ArrayList<String> taskTrackerCollection = new ArrayList<String>();
	// This boolean tells if two tasks ran onteh same tasktracker or not
	boolean taskTrackerFound = false;

	do {
	SleepJob job = new SleepJob();
	job.setConf(conf);
	Job slpJob = job.createJob(5, 1, 1000, 1000, 100, 100);

	// Before starting, Modify the file
	String input = "This will be the content of\n" + "distributed cache\n";
	// Creating the path with the file
	DataOutputStream file =
	UtilsForTests.createTmpFileDFS(dfs, URIPATH, permission, input);

	DistributedCache.createSymlink(conf);
	URI uri = URI.create(uriPath);
	DistributedCache.addCacheFile(uri, conf);
	JobConf jconf = new JobConf(conf);

	// Controls the job till all verification is done
	FinishTaskControlAction.configureControlActionForJob(conf);

	slpJob.submit();
	// Submitting the job
	RunningJob rJob =
	cluster.getJTClient().getClient().getJob(
	org.apache.hadoop.mapred.JobID.downgrade(slpJob.getJobID()));

	// counter for job Loop
	countLoop++;

	TTClient tClient = null;
	JobInfo jInfo = wovenClient.getJobInfo(rJob.getID());
	LOG.info("jInfo is :" + jInfo);

	// Assert if jobInfo is null
	Assert.assertNotNull("jobInfo is null", jInfo);

	// Wait for the job to start running.
	count = 0;
	while (jInfo.getStatus().getRunState() != JobStatus.RUNNING) {
	UtilsForTests.waitFor(10000);
	count++;
	jInfo = wovenClient.getJobInfo(rJob.getID());
	// If the count goes beyond a point, then break; This is to avoid
	// infinite loop under unforeseen circumstances. Testcase will anyway
	// fail later.
	if (count > 10) {
	Assert.fail("job has not reached running state for more than"
	+ "100 seconds. Failing at this point");
	}
	}

	LOG.info("job id is :" + rJob.getID().toString());

	TaskInfo[] taskInfos =
	cluster.getJTClient().getProxy().getTaskInfo(rJob.getID());

	boolean distCacheFileIsFound;

	for (TaskInfo taskInfo : taskInfos) {
	distCacheFileIsFound = false;
	String[] taskTrackers = taskInfo.getTaskTrackers();
	for (String taskTracker : taskTrackers) {
	// Formatting tasktracker to get just its FQDN
	taskTracker = UtilsForTests.getFQDNofTT(taskTracker);
	LOG.info("taskTracker is :" + taskTracker);

	// The tasktrackerFound variable is initialized
	taskTrackerFound = false;

	// This will be entered from the second job onwards
	if (countLoop > 1) {
	if (taskTracker != null) {
	continueLoop = taskTrackerCollection.contains(taskTracker);
	}
	if (continueLoop) {
	taskTrackerFound = true;
	}
	}
	// Collecting the tasktrackers
	if (taskTracker != null)
	taskTrackerCollection.add(taskTracker);

	// we have loopped through two times to look for task
	// getting submitted on same tasktrackers.The same tasktracker
	// for subsequent jobs was not hit maybe because of many number
	// of tasktrackers. So, testcase has to stop here.
	if (countLoop > 1) {
	continueLoop = false;
	}

	tClient = cluster.getTTClient(taskTracker);

	// tClient maybe null because the task is already dead. Ex: setup
	if (tClient == null) {
	continue;
	}

	String[] localDirs = tClient.getMapredLocalDirs();
	int distributedFileCount = 0;
	// Go to every single path
	for (String localDir : localDirs) {
	// Public Distributed cache will always be stored under
	// mapre.local.dir/tasktracker/archive
	localDir =
	localDir
	+ Path.SEPARATOR
	+ TaskTracker.getPublicDistributedCacheDir();
	LOG.info("localDir is : " + localDir);

	// Get file status of all the directories
	// and files under that path.
	FileStatus[] fileStatuses =
	tClient.listStatus(localDir, true, true);
	for (FileStatus fileStatus : fileStatuses) {
	Path path = fileStatus.getPath();
	LOG.info("path is :" + path.toString());
	// Checking if the received path ends with
	// the distributed filename
	distCacheFileIsFound =
	(path.toString()).endsWith(distributedFileName);
	// If file is found, check for its permission.
	// Since the file is found break out of loop
	if (distCacheFileIsFound) {
	LOG.info("PATH found is :" + path.toString());
	distributedFileCount++;
	String filename = path.getName();
	FsPermission fsPerm = fileStatus.getPermission();
	Assert.assertTrue("File Permission is not 777", fsPerm
	.equals(new FsPermission("777")));
	}
	}
	}

	LOG.debug("The distributed FileCount is :" + distributedFileCount);
	LOG.debug("The taskTrackerFound is :" + taskTrackerFound);

	// If distributed cache is modified in dfs
	// between two job runs, it can be present more than once
	// in any of the task tracker, in which job ran.
	if (distributedFileCount != 2 && taskTrackerFound) {
	Assert.fail("The distributed cache file has to be two. "
	+ "But found was " + distributedFileCount);
	} else if (distributedFileCount > 1 && !taskTrackerFound) {
	Assert.fail("The distributed cache file cannot more than one."
	+ " But found was " + distributedFileCount);
	} else if (distributedFileCount < 1)
	Assert.fail("The distributed cache file is less than one. "
	+ "But found was " + distributedFileCount);
	if (!distCacheFileIsFound) {
	Assert.assertEquals(
	"The distributed cache file does not exist",
	distCacheFileIsFound, false);
	}
	}
	}
	// Allow the job to continue through MR control job.
	for (TaskInfo taskInfoRemaining : taskInfos) {
	FinishTaskControlAction action =
	new FinishTaskControlAction(TaskID.downgrade(taskInfoRemaining
	.getTaskID()));
	Collection<TTClient> tts = cluster.getTTClients();
	for (TTClient cli : tts) {
	cli.getProxy().sendAction(action);
	}
	}

	// Killing the job because all the verification needed
	// for this testcase is completed.
	rJob.killJob();

	// Waiting for 3 seconds for cleanup to start
	Thread.sleep(3000);

	// Getting the last cleanup task's tasktracker also, as
	// distributed cache gets uploaded even during cleanup.
	TaskInfo[] myTaskInfos = wovenClient.getTaskInfo(rJob.getID());
	if (myTaskInfos != null) {
	for (TaskInfo info : myTaskInfos) {
	if (info.isSetupOrCleanup()) {
	String[] taskTrackers = info.getTaskTrackers();
	for (String taskTracker : taskTrackers) {
	// Formatting tasktracker to get just its FQDN
	taskTracker = UtilsForTests.getFQDNofTT(taskTracker);
	LOG.info("taskTracker is :" + taskTracker);
	// Collecting the tasktrackers
	if (taskTracker != null)
	taskTrackerCollection.add(taskTracker);
	}
	}
	}
	}

	// Making sure that the job is complete.
	while (jInfo != null && !jInfo.getStatus().isJobComplete()) {
	Thread.sleep(10000);
	jInfo = wovenClient.getJobInfo(rJob.getID());
	}

	} while (continueLoop);
	}
	}