blob: 9a12ca87cd707fedfcc922f645327c69182d8e71 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server.resourcemanager.recovery;
import org.apache.curator.framework.CuratorFramework;
import org.apache.curator.framework.CuratorFrameworkFactory;
import org.apache.curator.retry.RetryNTimes;
import org.apache.curator.test.TestingServer;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.ha.HAServiceProtocol;
import org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.security.token.delegation.DelegationKey;
import org.apache.hadoop.service.Service;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationSubmissionContextPBImpl;
import org.apache.hadoop.yarn.api.records.impl.pb.ContainerPBImpl;
import org.apache.hadoop.yarn.conf.HAUtil;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier;
import org.apache.hadoop.yarn.server.records.Version;
import org.apache.hadoop.yarn.server.records.impl.pb.VersionPBImpl;
import org.apache.hadoop.yarn.server.resourcemanager.MockRM;
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationAttemptStateData;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AggregateAppResourceUsage;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptMetrics;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
import org.apache.hadoop.yarn.server.resourcemanager.security.ClientToAMTokenSecretManagerInRM;
import org.apache.hadoop.yarn.util.ConverterUtils;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.ZooDefs.Perms;
import org.apache.zookeeper.data.ACL;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
import java.io.IOException;
import java.util.List;
import javax.crypto.SecretKey;
public class TestZKRMStateStore extends RMStateStoreTestBase {
public static final Log LOG = LogFactory.getLog(TestZKRMStateStore.class);
private static final int ZK_TIMEOUT_MS = 1000;
private TestingServer curatorTestingServer;
private CuratorFramework curatorFramework;
@Before
public void setupCuratorServer() throws Exception {
curatorTestingServer = new TestingServer();
curatorTestingServer.start();
curatorFramework = CuratorFrameworkFactory.builder()
.connectString(curatorTestingServer.getConnectString())
.retryPolicy(new RetryNTimes(100, 100))
.build();
curatorFramework.start();
}
@After
public void cleanupCuratorServer() throws IOException {
curatorFramework.close();
curatorTestingServer.stop();
}
class TestZKRMStateStoreTester implements RMStateStoreHelper {
TestZKRMStateStoreInternal store;
String workingZnode;
class TestZKRMStateStoreInternal extends ZKRMStateStore {
public TestZKRMStateStoreInternal(Configuration conf, String workingZnode)
throws Exception {
init(conf);
start();
assertTrue(znodeWorkingPath.equals(workingZnode));
}
public String getVersionNode() {
return znodeWorkingPath + "/" + ROOT_ZNODE_NAME + "/" + VERSION_NODE;
}
public Version getCurrentVersion() {
return CURRENT_VERSION_INFO;
}
public String getAppNode(String appId) {
return workingZnode + "/" + ROOT_ZNODE_NAME + "/" + RM_APP_ROOT + "/"
+ appId;
}
/**
* Emulating retrying createRootDir not to raise NodeExist exception
* @throws Exception
*/
public void testRetryingCreateRootDir() throws Exception {
create(znodeWorkingPath);
}
}
public RMStateStore getRMStateStore() throws Exception {
YarnConfiguration conf = new YarnConfiguration();
workingZnode = "/jira/issue/3077/rmstore";
conf.set(YarnConfiguration.RM_ZK_ADDRESS,
curatorTestingServer.getConnectString());
conf.set(YarnConfiguration.ZK_RM_STATE_STORE_PARENT_PATH, workingZnode);
this.store = new TestZKRMStateStoreInternal(conf, workingZnode);
return this.store;
}
@Override
public boolean isFinalStateValid() throws Exception {
return 1 ==
curatorFramework.getChildren().forPath(store.znodeWorkingPath).size();
}
@Override
public void writeVersion(Version version) throws Exception {
curatorFramework.setData().withVersion(-1)
.forPath(store.getVersionNode(),
((VersionPBImpl) version).getProto().toByteArray());
}
@Override
public Version getCurrentVersion() throws Exception {
return store.getCurrentVersion();
}
public boolean appExists(RMApp app) throws Exception {
return null != curatorFramework.checkExists()
.forPath(store.getAppNode(app.getApplicationId().toString()));
}
}
@Test (timeout = 60000)
public void testZKRMStateStoreRealZK() throws Exception {
TestZKRMStateStoreTester zkTester = new TestZKRMStateStoreTester();
testRMAppStateStore(zkTester);
testRMDTSecretManagerStateStore(zkTester);
testCheckVersion(zkTester);
testEpoch(zkTester);
testAppDeletion(zkTester);
testDeleteStore(zkTester);
testRemoveApplication(zkTester);
testAMRMTokenSecretManagerStateStore(zkTester);
testReservationStateStore(zkTester);
((TestZKRMStateStoreTester.TestZKRMStateStoreInternal)
zkTester.getRMStateStore()).testRetryingCreateRootDir();
}
@Test (timeout = 60000)
public void testCheckMajorVersionChange() throws Exception {
TestZKRMStateStoreTester zkTester = new TestZKRMStateStoreTester() {
Version VERSION_INFO = Version.newInstance(Integer.MAX_VALUE, 0);
@Override
public Version getCurrentVersion() throws Exception {
return VERSION_INFO;
}
@Override
public RMStateStore getRMStateStore() throws Exception {
YarnConfiguration conf = new YarnConfiguration();
workingZnode = "/jira/issue/3077/rmstore";
conf.set(YarnConfiguration.RM_ZK_ADDRESS,
curatorTestingServer.getConnectString());
conf.set(YarnConfiguration.ZK_RM_STATE_STORE_PARENT_PATH, workingZnode);
this.store = new TestZKRMStateStoreInternal(conf, workingZnode) {
Version storedVersion = null;
@Override
public Version getCurrentVersion() {
return VERSION_INFO;
}
@Override
protected synchronized Version loadVersion() throws Exception {
return storedVersion;
}
@Override
protected synchronized void storeVersion() throws Exception {
storedVersion = VERSION_INFO;
}
};
return this.store;
}
};
// default version
RMStateStore store = zkTester.getRMStateStore();
Version defaultVersion = zkTester.getCurrentVersion();
store.checkVersion();
Assert.assertEquals(defaultVersion, store.loadVersion());
}
private Configuration createHARMConf(
String rmIds, String rmId, int adminPort) {
Configuration conf = new YarnConfiguration();
conf.setBoolean(YarnConfiguration.RM_HA_ENABLED, true);
conf.set(YarnConfiguration.RM_HA_IDS, rmIds);
conf.setBoolean(YarnConfiguration.RECOVERY_ENABLED, true);
conf.set(YarnConfiguration.RM_STORE, ZKRMStateStore.class.getName());
conf.set(YarnConfiguration.RM_ZK_ADDRESS,
curatorTestingServer.getConnectString());
conf.setInt(YarnConfiguration.RM_ZK_TIMEOUT_MS, ZK_TIMEOUT_MS);
conf.set(YarnConfiguration.RM_HA_ID, rmId);
conf.set(YarnConfiguration.RM_WEBAPP_ADDRESS, "localhost:0");
for (String rpcAddress : YarnConfiguration.getServiceAddressConfKeys(conf)) {
for (String id : HAUtil.getRMHAIds(conf)) {
conf.set(HAUtil.addSuffix(rpcAddress, id), "localhost:0");
}
}
conf.set(HAUtil.addSuffix(YarnConfiguration.RM_ADMIN_ADDRESS, rmId),
"localhost:" + adminPort);
return conf;
}
private static boolean verifyZKACL(String id, String scheme, int perm,
List<ACL> acls) {
for (ACL acl : acls) {
if (acl.getId().getScheme().equals(scheme) &&
acl.getId().getId().startsWith(id) &&
acl.getPerms() == perm) {
return true;
}
}
return false;
}
/**
* Test if RM can successfully start in HA disabled mode if it was previously
* running in HA enabled mode. And then start it in HA mode after running it
* with HA disabled. NoAuth Exception should not be sent by zookeeper and RM
* should start successfully.
*/
@Test
public void testZKRootPathAcls() throws Exception {
StateChangeRequestInfo req = new StateChangeRequestInfo(
HAServiceProtocol.RequestSource.REQUEST_BY_USER);
String rootPath =
YarnConfiguration.DEFAULT_ZK_RM_STATE_STORE_PARENT_PATH + "/" +
ZKRMStateStore.ROOT_ZNODE_NAME;
// Start RM with HA enabled
Configuration conf = createHARMConf("rm1,rm2", "rm1", 1234);
conf.setBoolean(YarnConfiguration.AUTO_FAILOVER_ENABLED, false);
ResourceManager rm = new MockRM(conf);
rm.start();
rm.getRMContext().getRMAdminService().transitionToActive(req);
List<ACL> acls =
((ZKRMStateStore)rm.getRMContext().getStateStore()).getACL(rootPath);
assertEquals(acls.size(), 2);
// CREATE and DELETE permissions for root node based on RM ID
verifyZKACL("digest", "localhost", Perms.CREATE | Perms.DELETE, acls);
verifyZKACL(
"world", "anyone", Perms.ALL ^ (Perms.CREATE | Perms.DELETE), acls);
rm.close();
// Now start RM with HA disabled. NoAuth Exception should not be thrown.
conf.setBoolean(YarnConfiguration.RM_HA_ENABLED, false);
rm = new MockRM(conf);
rm.start();
rm.getRMContext().getRMAdminService().transitionToActive(req);
acls = ((ZKRMStateStore)rm.getRMContext().getStateStore()).getACL(rootPath);
assertEquals(acls.size(), 1);
verifyZKACL("world", "anyone", Perms.ALL, acls);
rm.close();
// Start RM with HA enabled.
conf.setBoolean(YarnConfiguration.RM_HA_ENABLED, true);
rm = new MockRM(conf);
rm.start();
rm.getRMContext().getRMAdminService().transitionToActive(req);
acls = ((ZKRMStateStore)rm.getRMContext().getStateStore()).getACL(rootPath);
assertEquals(acls.size(), 2);
verifyZKACL("digest", "localhost", Perms.CREATE | Perms.DELETE, acls);
verifyZKACL(
"world", "anyone", Perms.ALL ^ (Perms.CREATE | Perms.DELETE), acls);
rm.close();
}
@SuppressWarnings("unchecked")
@Test
public void testFencing() throws Exception {
StateChangeRequestInfo req = new StateChangeRequestInfo(
HAServiceProtocol.RequestSource.REQUEST_BY_USER);
Configuration conf1 = createHARMConf("rm1,rm2", "rm1", 1234);
conf1.setBoolean(YarnConfiguration.AUTO_FAILOVER_ENABLED, false);
ResourceManager rm1 = new MockRM(conf1);
rm1.start();
rm1.getRMContext().getRMAdminService().transitionToActive(req);
assertEquals("RM with ZKStore didn't start",
Service.STATE.STARTED, rm1.getServiceState());
assertEquals("RM should be Active",
HAServiceProtocol.HAServiceState.ACTIVE,
rm1.getRMContext().getRMAdminService().getServiceStatus().getState());
Configuration conf2 = createHARMConf("rm1,rm2", "rm2", 5678);
conf2.setBoolean(YarnConfiguration.AUTO_FAILOVER_ENABLED, false);
ResourceManager rm2 = new MockRM(conf2);
rm2.start();
rm2.getRMContext().getRMAdminService().transitionToActive(req);
assertEquals("RM with ZKStore didn't start",
Service.STATE.STARTED, rm2.getServiceState());
assertEquals("RM should be Active",
HAServiceProtocol.HAServiceState.ACTIVE,
rm2.getRMContext().getRMAdminService().getServiceStatus().getState());
for (int i = 0; i < ZK_TIMEOUT_MS / 50; i++) {
if (HAServiceProtocol.HAServiceState.ACTIVE ==
rm1.getRMContext().getRMAdminService().getServiceStatus().getState()) {
Thread.sleep(100);
}
}
assertEquals("RM should have been fenced",
HAServiceProtocol.HAServiceState.STANDBY,
rm1.getRMContext().getRMAdminService().getServiceStatus().getState());
assertEquals("RM should be Active",
HAServiceProtocol.HAServiceState.ACTIVE,
rm2.getRMContext().getRMAdminService().getServiceStatus().getState());
}
@Test
public void testFencedState() throws Exception {
TestZKRMStateStoreTester zkTester = new TestZKRMStateStoreTester();
RMStateStore store = zkTester.getRMStateStore();
// Move state to FENCED from ACTIVE
store.updateFencedState();
assertEquals("RMStateStore should have been in fenced state",
true, store.isFencedState());
long submitTime = System.currentTimeMillis();
long startTime = submitTime + 1000;
// Add a new app
RMApp mockApp = mock(RMApp.class);
ApplicationSubmissionContext context =
new ApplicationSubmissionContextPBImpl();
when(mockApp.getSubmitTime()).thenReturn(submitTime);
when(mockApp.getStartTime()).thenReturn(startTime);
when(mockApp.getApplicationSubmissionContext()).thenReturn(context);
when(mockApp.getUser()).thenReturn("test");
store.storeNewApplication(mockApp);
assertEquals("RMStateStore should have been in fenced state",
true, store.isFencedState());
// Add a new attempt
ClientToAMTokenSecretManagerInRM clientToAMTokenMgr =
new ClientToAMTokenSecretManagerInRM();
ApplicationAttemptId attemptId = ConverterUtils
.toApplicationAttemptId("appattempt_1234567894321_0001_000001");
SecretKey clientTokenMasterKey =
clientToAMTokenMgr.createMasterKey(attemptId);
RMAppAttemptMetrics mockRmAppAttemptMetrics =
mock(RMAppAttemptMetrics.class);
Container container = new ContainerPBImpl();
container.setId(ConverterUtils.toContainerId("container_1234567891234_0001_01_000001"));
RMAppAttempt mockAttempt = mock(RMAppAttempt.class);
when(mockAttempt.getAppAttemptId()).thenReturn(attemptId);
when(mockAttempt.getMasterContainer()).thenReturn(container);
when(mockAttempt.getClientTokenMasterKey())
.thenReturn(clientTokenMasterKey);
when(mockAttempt.getRMAppAttemptMetrics())
.thenReturn(mockRmAppAttemptMetrics);
when(mockRmAppAttemptMetrics.getAggregateAppResourceUsage())
.thenReturn(new AggregateAppResourceUsage(0,0));
store.storeNewApplicationAttempt(mockAttempt);
assertEquals("RMStateStore should have been in fenced state",
true, store.isFencedState());
long finishTime = submitTime + 1000;
// Update attempt
ApplicationAttemptStateData newAttemptState =
ApplicationAttemptStateData.newInstance(attemptId, container,
store.getCredentialsFromAppAttempt(mockAttempt),
startTime, RMAppAttemptState.FINISHED, "testUrl",
"test", FinalApplicationStatus.SUCCEEDED, 100,
finishTime, 0, 0);
store.updateApplicationAttemptState(newAttemptState);
assertEquals("RMStateStore should have been in fenced state",
true, store.isFencedState());
// Update app
ApplicationStateData appState = ApplicationStateData.newInstance(submitTime,
startTime, context, "test");
store.updateApplicationState(appState);
assertEquals("RMStateStore should have been in fenced state",
true, store.isFencedState());
// Remove app
store.removeApplication(mockApp);
assertEquals("RMStateStore should have been in fenced state",
true, store.isFencedState());
// store RM delegation token;
RMDelegationTokenIdentifier dtId1 =
new RMDelegationTokenIdentifier(new Text("owner1"),
new Text("renewer1"), new Text("realuser1"));
Long renewDate1 = new Long(System.currentTimeMillis());
dtId1.setSequenceNumber(1111);
store.storeRMDelegationToken(dtId1, renewDate1);
assertEquals("RMStateStore should have been in fenced state", true,
store.isFencedState());
store.updateRMDelegationToken(dtId1, renewDate1);
assertEquals("RMStateStore should have been in fenced state", true,
store.isFencedState());
// remove delegation key;
store.removeRMDelegationToken(dtId1);
assertEquals("RMStateStore should have been in fenced state", true,
store.isFencedState());
// store delegation master key;
DelegationKey key = new DelegationKey(1234, 4321, "keyBytes".getBytes());
store.storeRMDTMasterKey(key);
assertEquals("RMStateStore should have been in fenced state", true,
store.isFencedState());
// remove delegation master key;
store.removeRMDTMasterKey(key);
assertEquals("RMStateStore should have been in fenced state", true,
store.isFencedState());
// store or update AMRMToken;
store.storeOrUpdateAMRMTokenSecretManager(null, false);
assertEquals("RMStateStore should have been in fenced state", true,
store.isFencedState());
store.close();
}
@Test
public void testDuplicateRMAppDeletion() throws Exception {
TestZKRMStateStoreTester zkTester = new TestZKRMStateStoreTester();
long submitTime = System.currentTimeMillis();
long startTime = System.currentTimeMillis() + 1234;
RMStateStore store = zkTester.getRMStateStore();
TestDispatcher dispatcher = new TestDispatcher();
store.setRMDispatcher(dispatcher);
ApplicationAttemptId attemptIdRemoved = ConverterUtils
.toApplicationAttemptId("appattempt_1352994193343_0002_000001");
ApplicationId appIdRemoved = attemptIdRemoved.getApplicationId();
storeApp(store, appIdRemoved, submitTime, startTime);
storeAttempt(store, attemptIdRemoved,
"container_1352994193343_0002_01_000001", null, null, dispatcher);
ApplicationSubmissionContext context =
new ApplicationSubmissionContextPBImpl();
context.setApplicationId(appIdRemoved);
ApplicationStateData appStateRemoved =
ApplicationStateData.newInstance(
submitTime, startTime, context, "user1");
appStateRemoved.attempts.put(attemptIdRemoved, null);
store.removeApplicationStateInternal(appStateRemoved);
try {
store.removeApplicationStateInternal(appStateRemoved);
} catch (KeeperException.NoNodeException nne) {
Assert.fail("NoNodeException should not happen.");
}
store.close();
}
}