hadoop-mapreduce/src/test/system/test/org/apache/hadoop/mapred/TestSortValidate.java - hadoop - Git at Google

 /**
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.hadoop.mapred;

 import junit.framework.Assert;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.util.ToolRunner;
 import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.examples.RandomWriter;
 import org.apache.hadoop.examples.Sort;

 import org.apache.hadoop.mapreduce.test.system.JTProtocol;
 import org.apache.hadoop.mapreduce.test.system.JobInfo;
 import org.apache.hadoop.mapreduce.test.system.MRCluster;

 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;

 /**
  * A System test to test the Map-Reduce framework's sort
  * with a real Map-Reduce Cluster.
  */
 public class TestSortValidate {
   // Input/Output paths for sort
   private static final Path SORT_INPUT_PATH = new Path("inputDirectory");
   private static final Path SORT_OUTPUT_PATH = new Path("outputDirectory");

   // make it big enough to cause a spill in the map
   private static final int RW_BYTES_PER_MAP = 3 * 1024 * 1024;
   private static final int RW_MAPS_PER_HOST = 2;

   private MRCluster cluster = null;
   private FileSystem dfs = null;
   private JobClient client = null;

   private static final Log LOG = LogFactory.getLog(TestSortValidate.class);

   public TestSortValidate()
   throws Exception {
     cluster = MRCluster.createCluster(new Configuration());
   }

   @Before
   public void setUp() throws java.lang.Exception {
     cluster.setUp();
     client = cluster.getJTClient().getClient();

     dfs = client.getFs();
     dfs.delete(SORT_INPUT_PATH, true);
     dfs.delete(SORT_OUTPUT_PATH, true);
   }

   @After
   public void after() throws Exception {
     cluster.tearDown();
     dfs.delete(SORT_INPUT_PATH, true);
     dfs.delete(SORT_OUTPUT_PATH, true);
   }

   public void runRandomWriter(Configuration job, Path sortInput)
   throws Exception {
     // Scale down the default settings for RandomWriter for the test-case
     // Generates NUM_HADOOP_SLAVES * RW_MAPS_PER_HOST * RW_BYTES_PER_MAP
     job.setInt("test.randomwrite.bytes_per_map", RW_BYTES_PER_MAP);
     job.setInt("test.randomwriter.maps_per_host", RW_MAPS_PER_HOST);
     String[] rwArgs = {sortInput.toString()};

     runAndVerify(job,new RandomWriter(), rwArgs);
   }

   private void runAndVerify(Configuration job, Tool tool, String[] args)
     throws Exception {

     // This calculates the previous number fo jobs submitted before a new
     // job gets submitted.
     int prevJobsNum = 0;

     // JTProtocol wovenClient
     JTProtocol wovenClient = cluster.getJTClient().getProxy();

     // JobStatus
     JobStatus[] jobStatus = null;

     // JobID
     JobID id = null;

     // RunningJob rJob;
     RunningJob rJob = null;

     // JobInfo jInfo;
     JobInfo jInfo = null;

     //Getting the previous job numbers that are submitted.
     jobStatus = client.getAllJobs();
     prevJobsNum = jobStatus.length;

     // Run RandomWriter
     Assert.assertEquals(ToolRunner.run(job, tool, args), 0);

     //Waiting for the job to appear in the jobstatus
     jobStatus = client.getAllJobs();

     while (jobStatus.length - prevJobsNum == 0) {
       LOG.info("Waiting for the job to appear in the jobStatus");
       Thread.sleep(1000);
       jobStatus = client.getAllJobs();
     }

     //Getting the jobId of the just submitted job
     //The just submitted job is always added in the first slot of jobstatus
     id = jobStatus[0].getJobID();

     rJob = client.getJob(id);

     jInfo = wovenClient.getJobInfo(id);

     //Making sure that the job is complete.
     while (jInfo != null && !jInfo.getStatus().isJobComplete()) {
       Thread.sleep(10000);
       jInfo = wovenClient.getJobInfo(id);
     }

     cluster.getJTClient().verifyCompletedJob(id);
   }

   private void runSort(Configuration job, Path sortInput, Path sortOutput)
   throws Exception {

     job.setInt("io.sort.mb", 1);

     // Setup command-line arguments to 'sort'
     String[] sortArgs = {sortInput.toString(), sortOutput.toString()};

     runAndVerify(job,new Sort(), sortArgs);

   }

   private void runSortValidator(Configuration job,
                                        Path sortInput, Path sortOutput)
   throws Exception {
     String[] svArgs = {"-sortInput", sortInput.toString(),
                        "-sortOutput", sortOutput.toString()};

     runAndVerify(job,new SortValidator(), svArgs);

   }

   @Test
   public void testMapReduceSort() throws Exception {
     // Run randomwriter to generate input for 'sort'
     runRandomWriter(cluster.getConf(), SORT_INPUT_PATH);

     // Run sort
     runSort(cluster.getConf(), SORT_INPUT_PATH, SORT_OUTPUT_PATH);

     // Run sort-validator to check if sort worked correctly
     runSortValidator(cluster.getConf(), SORT_INPUT_PATH,
                      SORT_OUTPUT_PATH);
   }
 }
	/**
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package org.apache.hadoop.mapred;

	import junit.framework.Assert;
	import org.apache.commons.logging.Log;
	import org.apache.commons.logging.LogFactory;
	import org.apache.hadoop.conf.Configuration;
	import org.apache.hadoop.fs.FileSystem;
	import org.apache.hadoop.fs.Path;
	import org.apache.hadoop.util.ToolRunner;
	import org.apache.hadoop.util.Tool;
	import org.apache.hadoop.examples.RandomWriter;
	import org.apache.hadoop.examples.Sort;

	import org.apache.hadoop.mapreduce.test.system.JTProtocol;
	import org.apache.hadoop.mapreduce.test.system.JobInfo;
	import org.apache.hadoop.mapreduce.test.system.MRCluster;

	import org.junit.After;
	import org.junit.Before;
	import org.junit.Test;

	/**
	* A System test to test the Map-Reduce framework's sort
	* with a real Map-Reduce Cluster.
	*/
	public class TestSortValidate {
	// Input/Output paths for sort
	private static final Path SORT_INPUT_PATH = new Path("inputDirectory");
	private static final Path SORT_OUTPUT_PATH = new Path("outputDirectory");

	// make it big enough to cause a spill in the map
	private static final int RW_BYTES_PER_MAP = 3 * 1024 * 1024;
	private static final int RW_MAPS_PER_HOST = 2;

	private MRCluster cluster = null;
	private FileSystem dfs = null;
	private JobClient client = null;

	private static final Log LOG = LogFactory.getLog(TestSortValidate.class);

	public TestSortValidate()
	throws Exception {
	cluster = MRCluster.createCluster(new Configuration());
	}

	@Before
	public void setUp() throws java.lang.Exception {
	cluster.setUp();
	client = cluster.getJTClient().getClient();

	dfs = client.getFs();
	dfs.delete(SORT_INPUT_PATH, true);
	dfs.delete(SORT_OUTPUT_PATH, true);
	}

	@After
	public void after() throws Exception {
	cluster.tearDown();
	dfs.delete(SORT_INPUT_PATH, true);
	dfs.delete(SORT_OUTPUT_PATH, true);
	}

	public void runRandomWriter(Configuration job, Path sortInput)
	throws Exception {
	// Scale down the default settings for RandomWriter for the test-case
	// Generates NUM_HADOOP_SLAVES * RW_MAPS_PER_HOST * RW_BYTES_PER_MAP
	job.setInt("test.randomwrite.bytes_per_map", RW_BYTES_PER_MAP);
	job.setInt("test.randomwriter.maps_per_host", RW_MAPS_PER_HOST);
	String[] rwArgs = {sortInput.toString()};

	runAndVerify(job,new RandomWriter(), rwArgs);
	}

	private void runAndVerify(Configuration job, Tool tool, String[] args)
	throws Exception {

	// This calculates the previous number fo jobs submitted before a new
	// job gets submitted.
	int prevJobsNum = 0;

	// JTProtocol wovenClient
	JTProtocol wovenClient = cluster.getJTClient().getProxy();

	// JobStatus
	JobStatus[] jobStatus = null;

	// JobID
	JobID id = null;

	// RunningJob rJob;
	RunningJob rJob = null;

	// JobInfo jInfo;
	JobInfo jInfo = null;

	//Getting the previous job numbers that are submitted.
	jobStatus = client.getAllJobs();
	prevJobsNum = jobStatus.length;

	// Run RandomWriter
	Assert.assertEquals(ToolRunner.run(job, tool, args), 0);

	//Waiting for the job to appear in the jobstatus
	jobStatus = client.getAllJobs();

	while (jobStatus.length - prevJobsNum == 0) {
	LOG.info("Waiting for the job to appear in the jobStatus");
	Thread.sleep(1000);
	jobStatus = client.getAllJobs();
	}

	//Getting the jobId of the just submitted job
	//The just submitted job is always added in the first slot of jobstatus
	id = jobStatus[0].getJobID();

	rJob = client.getJob(id);

	jInfo = wovenClient.getJobInfo(id);

	//Making sure that the job is complete.
	while (jInfo != null && !jInfo.getStatus().isJobComplete()) {
	Thread.sleep(10000);
	jInfo = wovenClient.getJobInfo(id);
	}

	cluster.getJTClient().verifyCompletedJob(id);
	}

	private void runSort(Configuration job, Path sortInput, Path sortOutput)
	throws Exception {

	job.setInt("io.sort.mb", 1);

	// Setup command-line arguments to 'sort'
	String[] sortArgs = {sortInput.toString(), sortOutput.toString()};

	runAndVerify(job,new Sort(), sortArgs);

	}

	private void runSortValidator(Configuration job,
	Path sortInput, Path sortOutput)
	throws Exception {
	String[] svArgs = {"-sortInput", sortInput.toString(),
	"-sortOutput", sortOutput.toString()};

	runAndVerify(job,new SortValidator(), svArgs);

	}

	@Test
	public void testMapReduceSort() throws Exception {
	// Run randomwriter to generate input for 'sort'
	runRandomWriter(cluster.getConf(), SORT_INPUT_PATH);

	// Run sort
	runSort(cluster.getConf(), SORT_INPUT_PATH, SORT_OUTPUT_PATH);

	// Run sort-validator to check if sort worked correctly
	runSortValidator(cluster.getConf(), SORT_INPUT_PATH,
	SORT_OUTPUT_PATH);
	}
	}