src/test/mapred/org/apache/hadoop/mapred/TestMapProgress.java - hadoop-mapreduce - Git at Google

 /**
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.hadoop.mapred;

 import java.io.DataOutputStream;
 import java.io.File;
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.List;

 import junit.framework.TestCase;

 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.mapreduce.InputFormat;
 import org.apache.hadoop.mapreduce.InputSplit;
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.OutputFormat;
 import org.apache.hadoop.mapreduce.jobhistory.JobSubmittedEvent;
 import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
 import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig;
 import org.apache.hadoop.mapreduce.split.JobSplitWriter;
 import org.apache.hadoop.mapreduce.split.SplitMetaInfoReader;
 import org.apache.hadoop.mapreduce.split.JobSplit.SplitMetaInfo;
 import org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitIndex;
 import org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitMetaInfo;
 import org.apache.hadoop.util.ReflectionUtils;

 /**
  *  Validates map phase progress.
  *  Testcase uses newApi.
  *  We extend Task.TaskReporter class and override setProgress()
  *  to validate the map phase progress being set.
  *  We extend MapTask and override startReporter() method that creates
  *  TestTaskReporter instead of TaskReporter and call mapTask.run().
  *  Similar to LocalJobRunner, we set up splits and call mapTask.run()
  *  directly. No job is run, only map task is run.
  *  We use IsolationRunner.FakeUmbilical.
  *  As the reporter's setProgress() validates progress after
  *  every record is read, we are done with the validation of map phase progress
  *  once mapTask.run() is finished. Sort phase progress in map task is not
  *  validated here.
  */
 public class TestMapProgress extends TestCase {
   public static final Log LOG = LogFactory.getLog(TestMapProgress.class);
   private static String TEST_ROOT_DIR = new File(System.getProperty(
            "test.build.data", "/tmp")).getAbsolutePath() + "/mapPahseprogress";

   private FileSystem fs = null;
   private TestMapTask map = null;
   private JobID jobId = null;
   private IsolationRunner.FakeUmbilical fakeUmbilical =
                                         new IsolationRunner.FakeUmbilical();

   /**
    *  Task Reporter that validates map phase progress after each record is
    *  processed by map task
    */
   public class TestTaskReporter extends Task.TaskReporter {
     private int recordNum = 0; // number of records processed
     TestTaskReporter(Task task) {
       task.super(task.getProgress(), fakeUmbilical);
     }

     @Override
     public void setProgress(float progress) {
       super.setProgress(progress);
       float mapTaskProgress = map.getProgress().getProgress();
       LOG.info("Map task progress is " + mapTaskProgress);
       if (recordNum < 3) {
         // only 3 records are there; Ignore validating progress after 3 times
         recordNum++;
       }
       else {
         return;
       }
       // validate map task progress when the map task is in map phase
       assertTrue("Map progress is not the expected value.",
                  Math.abs(mapTaskProgress - ((0.667/3)*recordNum)) < 0.001);
     }
   }

   /**
    * Map Task that overrides run method and uses TestTaskReporter instead of
    * TaskReporter and uses FakeUmbilical.
    */
   class TestMapTask extends MapTask {
     public TestMapTask(String jobFile, TaskAttemptID taskId,
         int partition, TaskSplitIndex splitIndex,
         int numSlotsRequired) {
       super(jobFile, taskId, partition, splitIndex, numSlotsRequired);
     }

     /**
      * Create a TestTaskReporter and use it for validating map phase progress
      */
     @Override
     TaskReporter startReporter(final TaskUmbilicalProtocol umbilical) {
       // start thread that will handle communication with parent
       TaskReporter reporter = new TestTaskReporter(map);
       return reporter;
     }
   }

   // In the given dir, creates part-0 file with 3 records of same size
   private void createInputFile(Path rootDir) throws IOException {
     if(fs.exists(rootDir)){
       fs.delete(rootDir, true);
     }

     String str = "The quick brown fox\n" + "The brown quick fox\n"
     + "The fox brown quick\n";
     DataOutputStream inpFile = fs.create(new Path(rootDir, "part-0"));
     inpFile.writeBytes(str);
     inpFile.close();
   }

   /**
    *  Validates map phase progress after each record is processed by map task
    *  using custom task reporter.
    */
   public void testMapProgress() throws Exception {
     JobConf job = new JobConf();
     fs = FileSystem.getLocal(job);
     Path rootDir = new Path(TEST_ROOT_DIR);
     createInputFile(rootDir);

     job.setNumReduceTasks(0);
     TaskAttemptID taskId = TaskAttemptID.forName(
                                   "attempt_200907082313_0424_m_000000_0");
     job.setClass("mapreduce.job.outputformat.class",
                  NullOutputFormat.class, OutputFormat.class);
     job.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR,
             TEST_ROOT_DIR);
     jobId = taskId.getJobID();

     JobContext jContext = new JobContextImpl(job, jobId);
     InputFormat<?, ?> input =
       ReflectionUtils.newInstance(jContext.getInputFormatClass(), job);

     List<InputSplit> splits = input.getSplits(jContext);
     JobSplitWriter.createSplitFiles(new Path(TEST_ROOT_DIR), job, splits);
     TaskSplitMetaInfo[] splitMetaInfo =
       SplitMetaInfoReader.readSplitMetaInfo(jobId, fs, job, new Path(TEST_ROOT_DIR));
     job.setUseNewMapper(true); // use new api
     for (int i = 0; i < splitMetaInfo.length; i++) {// rawSplits.length is 1
       map = new TestMapTask(
           job.get(JTConfig.JT_SYSTEM_DIR, "/tmp/hadoop/mapred/system") +
           jobId + "job.xml",
           taskId, i,
           splitMetaInfo[i].getSplitIndex(), 1);

       JobConf localConf = new JobConf(job);
       map.localizeConfiguration(localConf);
       map.setConf(localConf);
       map.run(localConf, fakeUmbilical);
     }
     // clean up
     fs.delete(rootDir, true);
   }
 }
	/**
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.hadoop.mapred;

	import java.io.DataOutputStream;
	import java.io.File;
	import java.io.IOException;
	import java.util.Arrays;
	import java.util.List;

	import junit.framework.TestCase;

	import org.apache.commons.logging.Log;
	import org.apache.commons.logging.LogFactory;
	import org.apache.hadoop.fs.FileSystem;
	import org.apache.hadoop.fs.Path;
	import org.apache.hadoop.io.BytesWritable;
	import org.apache.hadoop.mapreduce.InputFormat;
	import org.apache.hadoop.mapreduce.InputSplit;
	import org.apache.hadoop.mapreduce.Job;
	import org.apache.hadoop.mapreduce.OutputFormat;
	import org.apache.hadoop.mapreduce.jobhistory.JobSubmittedEvent;
	import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
	import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig;
	import org.apache.hadoop.mapreduce.split.JobSplitWriter;
	import org.apache.hadoop.mapreduce.split.SplitMetaInfoReader;
	import org.apache.hadoop.mapreduce.split.JobSplit.SplitMetaInfo;
	import org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitIndex;
	import org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitMetaInfo;
	import org.apache.hadoop.util.ReflectionUtils;

	/**
	* Validates map phase progress.
	* Testcase uses newApi.
	* We extend Task.TaskReporter class and override setProgress()
	* to validate the map phase progress being set.
	* We extend MapTask and override startReporter() method that creates
	* TestTaskReporter instead of TaskReporter and call mapTask.run().
	* Similar to LocalJobRunner, we set up splits and call mapTask.run()
	* directly. No job is run, only map task is run.
	* We use IsolationRunner.FakeUmbilical.
	* As the reporter's setProgress() validates progress after
	* every record is read, we are done with the validation of map phase progress
	* once mapTask.run() is finished. Sort phase progress in map task is not
	* validated here.
	*/
	public class TestMapProgress extends TestCase {
	public static final Log LOG = LogFactory.getLog(TestMapProgress.class);
	private static String TEST_ROOT_DIR = new File(System.getProperty(
	"test.build.data", "/tmp")).getAbsolutePath() + "/mapPahseprogress";

	private FileSystem fs = null;
	private TestMapTask map = null;
	private JobID jobId = null;
	private IsolationRunner.FakeUmbilical fakeUmbilical =
	new IsolationRunner.FakeUmbilical();

	/**
	* Task Reporter that validates map phase progress after each record is
	* processed by map task
	*/
	public class TestTaskReporter extends Task.TaskReporter {
	private int recordNum = 0; // number of records processed
	TestTaskReporter(Task task) {
	task.super(task.getProgress(), fakeUmbilical);
	}

	@Override
	public void setProgress(float progress) {
	super.setProgress(progress);
	float mapTaskProgress = map.getProgress().getProgress();
	LOG.info("Map task progress is " + mapTaskProgress);
	if (recordNum < 3) {
	// only 3 records are there; Ignore validating progress after 3 times
	recordNum++;
	}
	else {
	return;
	}
	// validate map task progress when the map task is in map phase
	assertTrue("Map progress is not the expected value.",
	Math.abs(mapTaskProgress - ((0.667/3)*recordNum)) < 0.001);
	}
	}

	/**
	* Map Task that overrides run method and uses TestTaskReporter instead of
	* TaskReporter and uses FakeUmbilical.
	*/
	class TestMapTask extends MapTask {
	public TestMapTask(String jobFile, TaskAttemptID taskId,
	int partition, TaskSplitIndex splitIndex,
	int numSlotsRequired) {
	super(jobFile, taskId, partition, splitIndex, numSlotsRequired);
	}

	/**
	* Create a TestTaskReporter and use it for validating map phase progress
	*/
	@Override
	TaskReporter startReporter(final TaskUmbilicalProtocol umbilical) {
	// start thread that will handle communication with parent
	TaskReporter reporter = new TestTaskReporter(map);
	return reporter;
	}
	}

	// In the given dir, creates part-0 file with 3 records of same size
	private void createInputFile(Path rootDir) throws IOException {
	if(fs.exists(rootDir)){
	fs.delete(rootDir, true);
	}

	String str = "The quick brown fox\n" + "The brown quick fox\n"
	+ "The fox brown quick\n";
	DataOutputStream inpFile = fs.create(new Path(rootDir, "part-0"));
	inpFile.writeBytes(str);
	inpFile.close();
	}

	/**
	* Validates map phase progress after each record is processed by map task
	* using custom task reporter.
	*/
	public void testMapProgress() throws Exception {
	JobConf job = new JobConf();
	fs = FileSystem.getLocal(job);
	Path rootDir = new Path(TEST_ROOT_DIR);
	createInputFile(rootDir);

	job.setNumReduceTasks(0);
	TaskAttemptID taskId = TaskAttemptID.forName(
	"attempt_200907082313_0424_m_000000_0");
	job.setClass("mapreduce.job.outputformat.class",
	NullOutputFormat.class, OutputFormat.class);
	job.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR,
	TEST_ROOT_DIR);
	jobId = taskId.getJobID();

	JobContext jContext = new JobContextImpl(job, jobId);
	InputFormat<?, ?> input =
	ReflectionUtils.newInstance(jContext.getInputFormatClass(), job);

	List<InputSplit> splits = input.getSplits(jContext);
	JobSplitWriter.createSplitFiles(new Path(TEST_ROOT_DIR), job, splits);
	TaskSplitMetaInfo[] splitMetaInfo =
	SplitMetaInfoReader.readSplitMetaInfo(jobId, fs, job, new Path(TEST_ROOT_DIR));
	job.setUseNewMapper(true); // use new api
	for (int i = 0; i < splitMetaInfo.length; i++) {// rawSplits.length is 1
	map = new TestMapTask(
	job.get(JTConfig.JT_SYSTEM_DIR, "/tmp/hadoop/mapred/system") +
	jobId + "job.xml",
	taskId, i,
	splitMetaInfo[i].getSplitIndex(), 1);

	JobConf localConf = new JobConf(job);
	map.localizeConfiguration(localConf);
	map.setConf(localConf);
	map.run(localConf, fakeUmbilical);
	}
	// clean up
	fs.delete(rootDir, true);
	}
	}