blob: 982d4052218714f1f02b45e077b5689d7c22ad1e [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.mapred;
import java.io.DataOutputStream;
import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import junit.framework.TestCase;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.ipc.ProtocolSignature;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.jobhistory.JobSubmittedEvent;
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig;
import org.apache.hadoop.mapreduce.split.JobSplitWriter;
import org.apache.hadoop.mapreduce.split.SplitMetaInfoReader;
import org.apache.hadoop.mapreduce.split.JobSplit.SplitMetaInfo;
import org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitIndex;
import org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitMetaInfo;
import org.apache.hadoop.util.ReflectionUtils;
/**
* Validates map phase progress.
* Testcase uses newApi.
* We extend Task.TaskReporter class and override setProgress()
* to validate the map phase progress being set.
* We extend MapTask and override startReporter() method that creates
* TestTaskReporter instead of TaskReporter and call mapTask.run().
* Similar to LocalJobRunner, we set up splits and call mapTask.run()
* directly. No job is run, only map task is run.
* As the reporter's setProgress() validates progress after
* every record is read, we are done with the validation of map phase progress
* once mapTask.run() is finished. Sort phase progress in map task is not
* validated here.
*/
public class TestMapProgress extends TestCase {
public static final Log LOG = LogFactory.getLog(TestMapProgress.class);
private static String TEST_ROOT_DIR = new File(System.getProperty(
"test.build.data", "/tmp")).getAbsolutePath() + "/mapPahseprogress";
static class FakeUmbilical implements TaskUmbilicalProtocol {
public long getProtocolVersion(String protocol, long clientVersion) {
return TaskUmbilicalProtocol.versionID;
}
@Override
public ProtocolSignature getProtocolSignature(String protocol,
long clientVersion, int clientMethodsHash) throws IOException {
return ProtocolSignature.getProtocolSignature(
this, protocol, clientVersion, clientMethodsHash);
}
public void done(TaskAttemptID taskid) throws IOException {
LOG.info("Task " + taskid + " reporting done.");
}
public void fsError(TaskAttemptID taskId, String message) throws IOException {
LOG.info("Task " + taskId + " reporting file system error: " + message);
}
public void shuffleError(TaskAttemptID taskId, String message) throws IOException {
LOG.info("Task " + taskId + " reporting shuffle error: " + message);
}
public void fatalError(TaskAttemptID taskId, String msg) throws IOException {
LOG.info("Task " + taskId + " reporting fatal error: " + msg);
}
public JvmTask getTask(JvmContext context) throws IOException {
return null;
}
public boolean ping(TaskAttemptID taskid) throws IOException {
return true;
}
public void commitPending(TaskAttemptID taskId, TaskStatus taskStatus)
throws IOException, InterruptedException {
statusUpdate(taskId, taskStatus);
}
public boolean canCommit(TaskAttemptID taskid) throws IOException {
return true;
}
public boolean statusUpdate(TaskAttemptID taskId, TaskStatus taskStatus)
throws IOException, InterruptedException {
StringBuffer buf = new StringBuffer("Task ");
buf.append(taskId);
buf.append(" making progress to ");
buf.append(taskStatus.getProgress());
String state = taskStatus.getStateString();
if (state != null) {
buf.append(" and state of ");
buf.append(state);
}
LOG.info(buf.toString());
// ignore phase
// ignore counters
return true;
}
public void reportDiagnosticInfo(TaskAttemptID taskid, String trace) throws IOException {
LOG.info("Task " + taskid + " has problem " + trace);
}
public MapTaskCompletionEventsUpdate getMapCompletionEvents(JobID jobId,
int fromEventId, int maxLocs, TaskAttemptID id) throws IOException {
return new MapTaskCompletionEventsUpdate(TaskCompletionEvent.EMPTY_ARRAY,
false);
}
public void reportNextRecordRange(TaskAttemptID taskid,
SortedRanges.Range range) throws IOException {
LOG.info("Task " + taskid + " reportedNextRecordRange " + range);
}
}
private FileSystem fs = null;
private TestMapTask map = null;
private JobID jobId = null;
private FakeUmbilical fakeUmbilical = new FakeUmbilical();
/**
* Task Reporter that validates map phase progress after each record is
* processed by map task
*/
public class TestTaskReporter extends Task.TaskReporter {
private int recordNum = 0; // number of records processed
TestTaskReporter(Task task) {
task.super(task.getProgress(), fakeUmbilical);
}
@Override
public void setProgress(float progress) {
super.setProgress(progress);
float mapTaskProgress = map.getProgress().getProgress();
LOG.info("Map task progress is " + mapTaskProgress);
if (recordNum < 3) {
// only 3 records are there; Ignore validating progress after 3 times
recordNum++;
}
else {
return;
}
// validate map task progress when the map task is in map phase
assertTrue("Map progress is not the expected value.",
Math.abs(mapTaskProgress - ((float)recordNum/3)) < 0.001);
}
}
/**
* Map Task that overrides run method and uses TestTaskReporter instead of
* TaskReporter and uses FakeUmbilical.
*/
class TestMapTask extends MapTask {
public TestMapTask(String jobFile, TaskAttemptID taskId,
int partition, TaskSplitIndex splitIndex,
int numSlotsRequired) {
super(jobFile, taskId, partition, splitIndex, numSlotsRequired);
}
/**
* Create a TestTaskReporter and use it for validating map phase progress
*/
@Override
TaskReporter startReporter(final TaskUmbilicalProtocol umbilical) {
// start thread that will handle communication with parent
TaskReporter reporter = new TestTaskReporter(map);
return reporter;
}
}
// In the given dir, creates part-0 file with 3 records of same size
private void createInputFile(Path rootDir) throws IOException {
if(fs.exists(rootDir)){
fs.delete(rootDir, true);
}
String str = "The quick brown fox\n" + "The brown quick fox\n"
+ "The fox brown quick\n";
DataOutputStream inpFile = fs.create(new Path(rootDir, "part-0"));
inpFile.writeBytes(str);
inpFile.close();
}
/**
* Validates map phase progress after each record is processed by map task
* using custom task reporter.
*/
public void testMapProgress() throws Exception {
JobConf job = new JobConf();
fs = FileSystem.getLocal(job);
Path rootDir = new Path(TEST_ROOT_DIR);
createInputFile(rootDir);
job.setNumReduceTasks(0);
TaskAttemptID taskId = TaskAttemptID.forName(
"attempt_200907082313_0424_m_000000_0");
job.setClass("mapreduce.job.outputformat.class",
NullOutputFormat.class, OutputFormat.class);
job.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR,
TEST_ROOT_DIR);
jobId = taskId.getJobID();
JobContext jContext = new JobContextImpl(job, jobId);
InputFormat<?, ?> input =
ReflectionUtils.newInstance(jContext.getInputFormatClass(), job);
List<InputSplit> splits = input.getSplits(jContext);
JobSplitWriter.createSplitFiles(new Path(TEST_ROOT_DIR), job,
new Path(TEST_ROOT_DIR).getFileSystem(job),
splits);
TaskSplitMetaInfo[] splitMetaInfo =
SplitMetaInfoReader.readSplitMetaInfo(jobId, fs, job, new Path(TEST_ROOT_DIR));
job.setUseNewMapper(true); // use new api
for (int i = 0; i < splitMetaInfo.length; i++) {// rawSplits.length is 1
map = new TestMapTask(
job.get(JTConfig.JT_SYSTEM_DIR, "/tmp/hadoop/mapred/system") +
jobId + "job.xml",
taskId, i,
splitMetaInfo[i].getSplitIndex(), 1);
JobConf localConf = new JobConf(job);
map.localizeConfiguration(localConf);
map.setConf(localConf);
map.run(localConf, fakeUmbilical);
}
// clean up
fs.delete(rootDir, true);
}
}