blob: 16cdef6a5da4718aaa7e5637431b0cad62e3a1bf [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.slider.providers.hbase.minicluster.failures
import groovy.transform.CompileStatic
import groovy.util.logging.Slf4j
import org.apache.hadoop.hbase.ClusterStatus
import org.apache.slider.api.ClusterDescription
import org.apache.slider.api.RoleKeys
import org.apache.slider.providers.hbase.HBaseKeys
import org.apache.slider.client.SliderClient
import org.apache.slider.common.params.ActionKillContainerArgs
import org.apache.slider.providers.hbase.minicluster.HBaseMiniClusterTestBase
import org.apache.slider.core.main.ServiceLauncher
import org.junit.Test
/**
* test create a live region service
*/
@CompileStatic
@Slf4j
class TestFailedRegionService extends HBaseMiniClusterTestBase {
@Test
public void testFailedRegionService() throws Throwable {
testRegionService("test_failed_region_service", true)
}
@Test
public void testStoppedRegionService() throws Throwable {
testRegionService("test_stopped_region_service", false)
}
private void testRegionService(String testName, boolean toKill) {
String clustername = testName
String action = toKill ? "kill" : "stop"
int regionServerCount = 2
createMiniCluster(clustername, configuration, 1, 1, 1, true, true)
describe("Create a single region service cluster then " + action + " the RS");
//now launch the cluster
ServiceLauncher<SliderClient> launcher = createHBaseCluster(clustername, regionServerCount, [], true, true)
SliderClient sliderClient = launcher.service
addToTeardown(sliderClient);
ClusterDescription status = sliderClient.getClusterDescription(clustername)
ClusterStatus clustat = basicHBaseClusterStartupSequence(sliderClient)
status = waitForWorkerInstanceCount(sliderClient, regionServerCount, hbaseClusterStartupToLiveTime)
//get the hbase status
ClusterStatus hbaseStat = waitForHBaseRegionServerCount(sliderClient, clustername, regionServerCount, hbaseClusterStartupToLiveTime)
log.info("Initial cluster status : ${hbaseStatusToString(hbaseStat)}");
describe("running processes")
lsJavaProcesses()
describe("about to " + action + " servers")
if (toKill) {
killAllRegionServers()
} else {
stopAllRegionServers()
}
//sleep a bit
sleep(toKill ? 15000 : 30000);
lsJavaProcesses()
describe("waiting for recovery")
//and expect a recovery
status = waitForWorkerInstanceCount(sliderClient, regionServerCount, hbaseClusterStartupToLiveTime)
//now we expect the failure count to be two
hbaseStat = waitForHBaseRegionServerCount(sliderClient, clustername, regionServerCount, hbaseClusterStartupToLiveTime)
status = sliderClient.clusterDescription
assert status.roles[HBaseKeys.ROLE_WORKER][RoleKeys.ROLE_FAILED_INSTANCES] == "2"
log.info("Updated cluster status : ${hbaseStatusToString(hbaseStat)}");
//now attempt it by container kill command
def workers = status.instances[HBaseKeys.ROLE_WORKER]
assert workers.size() == 2
def worker1 = workers[0]
ActionKillContainerArgs args = new ActionKillContainerArgs();
args.id = worker1
assert 0 == sliderClient.actionKillContainer(clustername, args)
sleep(15000)
waitForWorkerInstanceCount(
sliderClient,
regionServerCount,
hbaseClusterStartupToLiveTime)
status = sliderClient.clusterDescription
assert status.roles[HBaseKeys.ROLE_WORKER][RoleKeys.ROLE_FAILED_INSTANCES] == "3"
}
}