blob: ef38fee1aa24a51b742076df716832cff3cafc3c [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.mapred;
import java.io.IOException;
import junit.framework.TestCase;
import org.apache.hadoop.mapred.FakeObjectUtilities.FakeJobInProgress;
import org.apache.hadoop.mapred.FakeObjectUtilities.FakeJobTracker;
import org.apache.hadoop.mapred.UtilsForTests.FakeClock;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig;
/**
* A test to verify JobTracker's resilience to lost task trackers.
*
*/
@SuppressWarnings("deprecation")
public class TestLostTracker extends TestCase {
FakeJobInProgress job;
static FakeJobTracker jobTracker;
static FakeClock clock;
static String trackers[] = new String[] {"tracker_tracker1:1000",
"tracker_tracker2:1000"};
@Override
protected void setUp() throws Exception {
JobConf conf = new JobConf();
conf.set(JTConfig.JT_IPC_ADDRESS, "localhost:0");
conf.set(JTConfig.JT_HTTP_ADDRESS, "0.0.0.0:0");
conf.setLong(JTConfig.JT_TRACKER_EXPIRY_INTERVAL, 1000);
conf.set(JTConfig.JT_MAX_TRACKER_BLACKLISTS, "1");
jobTracker = new FakeJobTracker(conf, (clock = new FakeClock()), trackers);
jobTracker.startExpireTrackersThread();
}
@Override
protected void tearDown() throws Exception {
jobTracker.stopExpireTrackersThread();
}
public void testLostTracker() throws IOException {
// Tracker 0 contacts JT
FakeObjectUtilities.establishFirstContact(jobTracker, trackers[0]);
TaskAttemptID[] tid = new TaskAttemptID[2];
JobConf conf = new JobConf();
conf.setNumMapTasks(1);
conf.setNumReduceTasks(1);
FakeJobInProgress job = new FakeJobInProgress(conf, jobTracker);
job.initTasks();
// Tracker 0 gets the map task
tid[0] = job.findMapTask(trackers[0]);
job.finishTask(tid[0]);
// Advance clock. Tracker 0 would have got lost
clock.advance(8 * 1000);
jobTracker.checkExpiredTrackers();
// Tracker 1 establishes contact with JT
FakeObjectUtilities.establishFirstContact(jobTracker, trackers[1]);
// Tracker1 should get assigned the lost map task
tid[1] = job.findMapTask(trackers[1]);
assertNotNull("Map Task from Lost Tracker did not get reassigned", tid[1]);
assertEquals("Task ID of reassigned map task does not match",
tid[0].getTaskID().toString(), tid[1].getTaskID().toString());
job.finishTask(tid[1]);
}
/**
* Test whether the tracker gets blacklisted after its lost.
*/
public void testLostTrackerBeforeBlacklisting() throws Exception {
FakeObjectUtilities.establishFirstContact(jobTracker, trackers[0]);
TaskAttemptID[] tid = new TaskAttemptID[3];
JobConf conf = new JobConf();
conf.setNumMapTasks(1);
conf.setNumReduceTasks(1);
conf.set(JobContext.MAX_TASK_FAILURES_PER_TRACKER, "1");
conf.set(JobContext.SETUP_CLEANUP_NEEDED, "false");
FakeJobInProgress job = new FakeJobInProgress(conf, jobTracker);
job.initTasks();
job.setClusterSize(4);
// Tracker 0 gets the map task
tid[0] = job.findMapTask(trackers[0]);
job.finishTask(tid[0]);
// validate the total tracker count
assertEquals("Active tracker count mismatch",
1, jobTracker.getClusterStatus(false).getTaskTrackers());
// lose the tracker
clock.advance(1100);
jobTracker.checkExpiredTrackers();
assertFalse("Tracker 0 not lost",
jobTracker.getClusterStatus(false).getActiveTrackerNames()
.contains(trackers[0]));
// validate the total tracker count
assertEquals("Active tracker count mismatch",
0, jobTracker.getClusterStatus(false).getTaskTrackers());
// Tracker 1 establishes contact with JT
FakeObjectUtilities.establishFirstContact(jobTracker, trackers[1]);
// Tracker1 should get assigned the lost map task
tid[1] = job.findMapTask(trackers[1]);
assertNotNull("Map Task from Lost Tracker did not get reassigned", tid[1]);
assertEquals("Task ID of reassigned map task does not match",
tid[0].getTaskID().toString(), tid[1].getTaskID().toString());
// finish the map task
job.finishTask(tid[1]);
// finish the reduce task
tid[2] = job.findReduceTask(trackers[1]);
job.finishTask(tid[2]);
// check if job is successful
assertEquals("Job not successful",
JobStatus.SUCCEEDED, job.getStatus().getRunState());
// check if the tracker is lost
// validate the total tracker count
assertEquals("Active tracker count mismatch",
1, jobTracker.getClusterStatus(false).getTaskTrackers());
// validate blacklisted count .. since we lost one blacklisted tracker
assertEquals("Blacklisted tracker count mismatch",
0, jobTracker.getClusterStatus(false).getBlacklistedTrackers());
}
/**
* Test whether the tracker gets lost after its blacklisted.
*/
public void testLostTrackerAfterBlacklisting() throws Exception {
FakeObjectUtilities.establishFirstContact(jobTracker, trackers[0]);
clock.advance(600);
TaskAttemptID[] tid = new TaskAttemptID[2];
JobConf conf = new JobConf();
conf.setNumMapTasks(1);
conf.setNumReduceTasks(0);
conf.set(JobContext.MAX_TASK_FAILURES_PER_TRACKER, "1");
conf.set(JobContext.SETUP_CLEANUP_NEEDED, "false");
FakeJobInProgress job = new FakeJobInProgress(conf, jobTracker);
job.initTasks();
job.setClusterSize(4);
// check if the tracker count is correct
assertEquals("Active tracker count mismatch",
1, jobTracker.taskTrackers().size());
// Tracker 0 gets the map task
tid[0] = job.findMapTask(trackers[0]);
// Fail the task
job.failTask(tid[0]);
// Tracker 1 establishes contact with JT
FakeObjectUtilities.establishFirstContact(jobTracker, trackers[1]);
// check if the tracker count is correct
assertEquals("Active tracker count mismatch",
2, jobTracker.taskTrackers().size());
// Tracker 1 gets the map task
tid[1] = job.findMapTask(trackers[1]);
// Finish the task and also the job
job.finishTask(tid[1]);
// check if job is successful
assertEquals("Job not successful",
JobStatus.SUCCEEDED, job.getStatus().getRunState());
// check if the trackers 1 got blacklisted
assertTrue("Tracker 0 not blacklisted",
jobTracker.getBlacklistedTrackers()[0].getTaskTrackerName()
.equals(trackers[0]));
// check if the tracker count is correct
assertEquals("Active tracker count mismatch",
2, jobTracker.taskTrackers().size());
// validate blacklisted count
assertEquals("Blacklisted tracker count mismatch",
1, jobTracker.getClusterStatus(false).getBlacklistedTrackers());
// Advance clock. Tracker 0 should be lost
clock.advance(500);
jobTracker.checkExpiredTrackers();
// check if the task tracker is lost
assertFalse("Tracker 0 not lost",
jobTracker.getClusterStatus(false).getActiveTrackerNames()
.contains(trackers[0]));
// check if the lost tracker has removed from the jobtracker
assertEquals("Active tracker count mismatch",
1, jobTracker.taskTrackers().size());
// validate blacklisted count
assertEquals("Blacklisted tracker count mismatch",
0, jobTracker.getClusterStatus(false).getBlacklistedTrackers());
}
}