blob: bfd0b4e75eace2a4044018534ad550c4e2ba62e5 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server.resourcemanager;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.ha.ClientBaseWithFixes;
import org.apache.hadoop.ha.ServiceFailedException;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.junit.Before;
import org.junit.Test;
import java.io.IOException;
import java.util.concurrent.atomic.AtomicBoolean;
import static org.junit.Assert.fail;
import static org.mockito.Matchers.any;
import static org.mockito.Mockito.atLeast;
import static org.mockito.Mockito.atMost;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.never;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
public class TestRMEmbeddedElector extends ClientBaseWithFixes {
private static final Log LOG =
LogFactory.getLog(TestRMEmbeddedElector.class.getName());
private static final String RM1_NODE_ID = "rm1";
private static final int RM1_PORT_BASE = 10000;
private static final String RM2_NODE_ID = "rm2";
private static final int RM2_PORT_BASE = 20000;
private Configuration conf;
private AtomicBoolean callbackCalled;
private enum SyncTestType {
ACTIVE,
STANDBY,
NEUTRAL,
ACTIVE_TIMING,
STANDBY_TIMING
}
@Before
public void setup() throws IOException {
conf = new YarnConfiguration();
conf.setBoolean(YarnConfiguration.RM_HA_ENABLED, true);
conf.setBoolean(YarnConfiguration.AUTO_FAILOVER_ENABLED, true);
conf.setBoolean(YarnConfiguration.AUTO_FAILOVER_EMBEDDED, true);
conf.set(YarnConfiguration.RM_CLUSTER_ID, "yarn-test-cluster");
conf.set(YarnConfiguration.RM_ZK_ADDRESS, hostPort);
conf.setInt(YarnConfiguration.RM_ZK_TIMEOUT_MS, 2000);
conf.set(YarnConfiguration.RM_HA_IDS, RM1_NODE_ID + "," + RM2_NODE_ID);
conf.set(YarnConfiguration.RM_HA_ID, RM1_NODE_ID);
HATestUtil.setRpcAddressForRM(RM1_NODE_ID, RM1_PORT_BASE, conf);
HATestUtil.setRpcAddressForRM(RM2_NODE_ID, RM2_PORT_BASE, conf);
conf.setLong(YarnConfiguration.CLIENT_FAILOVER_SLEEPTIME_BASE_MS, 100L);
callbackCalled = new AtomicBoolean(false);
}
/**
* Test that tries to see if there is a deadlock between
* (a) the thread stopping the RM
* (b) thread processing the ZK event asking RM to transition to active
*
* The test times out if there is a deadlock.
*/
@Test (timeout = 10000)
public void testDeadlockShutdownBecomeActive() throws InterruptedException {
MockRM rm = new MockRMWithElector(conf, 1000);
rm.start();
LOG.info("Waiting for callback");
while (!callbackCalled.get());
LOG.info("Stopping RM");
rm.stop();
LOG.info("Stopped RM");
}
/**
* Test that neutral mode plays well with all other transitions.
*
* @throws IOException if there's an issue transitioning
* @throws InterruptedException if interrupted
*/
@Test
public void testCallbackSynchronization()
throws IOException, InterruptedException {
testCallbackSynchronization(SyncTestType.ACTIVE);
testCallbackSynchronization(SyncTestType.STANDBY);
testCallbackSynchronization(SyncTestType.NEUTRAL);
testCallbackSynchronization(SyncTestType.ACTIVE_TIMING);
testCallbackSynchronization(SyncTestType.STANDBY_TIMING);
}
/**
* Helper method to test that neutral mode plays well with other transitions.
*
* @param type the type of test to run
* @throws IOException if there's an issue transitioning
* @throws InterruptedException if interrupted
*/
private void testCallbackSynchronization(SyncTestType type)
throws IOException, InterruptedException {
AdminService as = mock(AdminService.class);
RMContext rc = mock(RMContext.class);
Configuration myConf = new Configuration(conf);
myConf.setInt(YarnConfiguration.RM_ZK_TIMEOUT_MS, 50);
when(rc.getRMAdminService()).thenReturn(as);
EmbeddedElectorService ees = new EmbeddedElectorService(rc);
ees.init(myConf);
ees.enterNeutralMode();
switch (type) {
case ACTIVE:
testCallbackSynchronizationActive(as, ees);
break;
case STANDBY:
testCallbackSynchronizationStandby(as, ees);
break;
case NEUTRAL:
testCallbackSynchronizationNeutral(as, ees);
break;
case ACTIVE_TIMING:
testCallbackSynchronizationTimingActive(as, ees);
break;
case STANDBY_TIMING:
testCallbackSynchronizationTimingStandby(as, ees);
break;
default:
fail("Unknown test type: " + type);
break;
}
}
/**
* Helper method to test that neutral mode plays well with an active
* transition.
*
* @param as the admin service
* @param ees the embedded elector service
* @throws IOException if there's an issue transitioning
* @throws InterruptedException if interrupted
*/
private void testCallbackSynchronizationActive(AdminService as,
EmbeddedElectorService ees) throws IOException, InterruptedException {
ees.becomeActive();
Thread.sleep(100);
verify(as).transitionToActive(any());
verify(as, never()).transitionToStandby(any());
}
/**
* Helper method to test that neutral mode plays well with a standby
* transition.
*
* @param as the admin service
* @param ees the embedded elector service
* @throws IOException if there's an issue transitioning
* @throws InterruptedException if interrupted
*/
private void testCallbackSynchronizationStandby(AdminService as,
EmbeddedElectorService ees) throws IOException, InterruptedException {
ees.becomeStandby();
Thread.sleep(100);
verify(as, atLeast(1)).transitionToStandby(any());
verify(as, atMost(1)).transitionToStandby(any());
}
/**
* Helper method to test that neutral mode plays well with itself.
*
* @param as the admin service
* @param ees the embedded elector service
* @throws IOException if there's an issue transitioning
* @throws InterruptedException if interrupted
*/
private void testCallbackSynchronizationNeutral(AdminService as,
EmbeddedElectorService ees) throws IOException, InterruptedException {
ees.enterNeutralMode();
Thread.sleep(100);
verify(as, atLeast(1)).transitionToStandby(any());
verify(as, atMost(1)).transitionToStandby(any());
}
/**
* Helper method to test that neutral mode does not race with an active
* transition.
*
* @param as the admin service
* @param ees the embedded elector service
* @throws IOException if there's an issue transitioning
* @throws InterruptedException if interrupted
*/
private void testCallbackSynchronizationTimingActive(AdminService as,
EmbeddedElectorService ees) throws IOException, InterruptedException {
synchronized (ees.zkDisconnectLock) {
// Sleep while holding the lock so that the timer thread can't do
// anything when it runs. Sleep until we're pretty sure the timer thread
// has tried to run.
Thread.sleep(100);
// While still holding the lock cancel the timer by transitioning. This
// simulates a race where the callback goes to cancel the timer while the
// timer is trying to run.
ees.becomeActive();
}
// Sleep just a little more so that the timer thread can do whatever it's
// going to do, hopefully nothing.
Thread.sleep(50);
verify(as).transitionToActive(any());
verify(as, never()).transitionToStandby(any());
}
/**
* Helper method to test that neutral mode does not race with an active
* transition.
*
* @param as the admin service
* @param ees the embedded elector service
* @throws IOException if there's an issue transitioning
* @throws InterruptedException if interrupted
*/
private void testCallbackSynchronizationTimingStandby(AdminService as,
EmbeddedElectorService ees) throws IOException, InterruptedException {
synchronized (ees.zkDisconnectLock) {
// Sleep while holding the lock so that the timer thread can't do
// anything when it runs. Sleep until we're pretty sure the timer thread
// has tried to run.
Thread.sleep(100);
// While still holding the lock cancel the timer by transitioning. This
// simulates a race where the callback goes to cancel the timer while the
// timer is trying to run.
ees.becomeStandby();
}
// Sleep just a little more so that the timer thread can do whatever it's
// going to do, hopefully nothing.
Thread.sleep(50);
verify(as, atLeast(1)).transitionToStandby(any());
verify(as, atMost(1)).transitionToStandby(any());
}
private class MockRMWithElector extends MockRM {
private long delayMs = 0;
MockRMWithElector(Configuration conf) {
super(conf);
}
MockRMWithElector(Configuration conf, long delayMs) {
this(conf);
this.delayMs = delayMs;
}
@Override
protected AdminService createAdminService() {
return new AdminService(MockRMWithElector.this, getRMContext()) {
@Override
protected EmbeddedElectorService createEmbeddedElectorService() {
return new EmbeddedElectorService(getRMContext()) {
@Override
public void becomeActive() throws
ServiceFailedException {
try {
callbackCalled.set(true);
TestRMEmbeddedElector.LOG.info("Callback called. Sleeping now");
Thread.sleep(delayMs);
TestRMEmbeddedElector.LOG.info("Sleep done");
} catch (InterruptedException e) {
e.printStackTrace();
}
super.becomeActive();
}
};
}
};
}
}
}