blob: 838f3bf9c74f14e4c394863cee1b71a90bf0125a [file] [log] [blame]
/**
* Copyright 2010 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.master;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Abortable;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HServerInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.MiniHBaseCluster;
import org.apache.hadoop.hbase.executor.RegionTransitionData;
import org.apache.hadoop.hbase.executor.EventHandler.EventType;
import org.apache.hadoop.hbase.master.AssignmentManager.RegionState;
import org.apache.hadoop.hbase.master.LoadBalancer.RegionPlan;
import org.apache.hadoop.hbase.regionserver.HRegionServer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.JVMClusterUtil;
import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
import org.apache.hadoop.hbase.zookeeper.ZKAssign;
import org.apache.hadoop.hbase.zookeeper.ZKTable;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
import org.junit.Test;
public class TestMasterFailover {
private static final Log LOG = LogFactory.getLog(TestMasterFailover.class);
/**
* Simple test of master failover.
* <p>
* Starts with three masters. Kills a backup master. Then kills the active
* master. Ensures the final master becomes active and we can still contact
* the cluster.
* @throws Exception
*/
@Test (timeout=180000)
public void testSimpleMasterFailover() throws Exception {
final int NUM_MASTERS = 3;
final int NUM_RS = 3;
// Create config to use for this cluster
Configuration conf = HBaseConfiguration.create();
conf.setInt("hbase.master.wait.on.regionservers.mintostart", 3);
conf.setInt("hbase.master.wait.on.regionservers.maxtostart", 3);
// Start the cluster
HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
// get all the master threads
List<MasterThread> masterThreads = cluster.getMasterThreads();
// wait for each to come online
for (MasterThread mt : masterThreads) {
assertTrue(mt.isAlive());
}
// verify only one is the active master and we have right number
int numActive = 0;
int activeIndex = -1;
String activeName = null;
for (int i = 0; i < masterThreads.size(); i++) {
if (masterThreads.get(i).getMaster().isActiveMaster()) {
numActive++;
activeIndex = i;
activeName = masterThreads.get(i).getMaster().getServerName();
}
}
assertEquals(1, numActive);
assertEquals(NUM_MASTERS, masterThreads.size());
// attempt to stop one of the inactive masters
LOG.debug("\n\nStopping a backup master\n");
int backupIndex = (activeIndex == 0 ? 1 : activeIndex - 1);
cluster.stopMaster(backupIndex, false);
cluster.waitOnMaster(backupIndex);
// verify still one active master and it's the same
for (int i = 0; i < masterThreads.size(); i++) {
if (masterThreads.get(i).getMaster().isActiveMaster()) {
assertTrue(activeName.equals(
masterThreads.get(i).getMaster().getServerName()));
activeIndex = i;
}
}
assertEquals(1, numActive);
assertEquals(2, masterThreads.size());
// kill the active master
LOG.debug("\n\nStopping the active master\n");
cluster.stopMaster(activeIndex, false);
cluster.waitOnMaster(activeIndex);
// wait for an active master to show up and be ready
assertTrue(cluster.waitForActiveAndReadyMaster());
LOG.debug("\n\nVerifying backup master is now active\n");
// should only have one master now
assertEquals(1, masterThreads.size());
// and he should be active
assertTrue(masterThreads.get(0).getMaster().isActiveMaster());
// Stop the cluster
TEST_UTIL.shutdownMiniCluster();
}
/**
* Complex test of master failover that tests as many permutations of the
* different possible states that regions in transition could be in within ZK.
* <p>
* This tests the proper handling of these states by the failed-over master
* and includes a thorough testing of the timeout code as well.
* <p>
* Starts with a single master and three regionservers.
* <p>
* Creates two tables, enabledTable and disabledTable, each containing 5
* regions. The disabledTable is then disabled.
* <p>
* After reaching steady-state, the master is killed. We then mock several
* states in ZK.
* <p>
* After mocking them, we will startup a new master which should become the
* active master and also detect that it is a failover. The primary test
* passing condition will be that all regions of the enabled table are
* assigned and all the regions of the disabled table are not assigned.
* <p>
* The different scenarios to be tested are below:
* <p>
* <b>ZK State: OFFLINE</b>
* <p>A node can get into OFFLINE state if</p>
* <ul>
* <li>An RS fails to open a region, so it reverts the state back to OFFLINE
* <li>The Master is assigning the region to a RS before it sends RPC
* </ul>
* <p>We will mock the scenarios</p>
* <ul>
* <li>Master has assigned an enabled region but RS failed so a region is
* not assigned anywhere and is sitting in ZK as OFFLINE</li>
* <li>This seems to cover both cases?</li>
* </ul>
* <p>
* <b>ZK State: CLOSING</b>
* <p>A node can get into CLOSING state if</p>
* <ul>
* <li>An RS has begun to close a region
* </ul>
* <p>We will mock the scenarios</p>
* <ul>
* <li>Region of enabled table was being closed but did not complete
* <li>Region of disabled table was being closed but did not complete
* </ul>
* <p>
* <b>ZK State: CLOSED</b>
* <p>A node can get into CLOSED state if</p>
* <ul>
* <li>An RS has completed closing a region but not acknowledged by master yet
* </ul>
* <p>We will mock the scenarios</p>
* <ul>
* <li>Region of a table that should be enabled was closed on an RS
* <li>Region of a table that should be disabled was closed on an RS
* </ul>
* <p>
* <b>ZK State: OPENING</b>
* <p>A node can get into OPENING state if</p>
* <ul>
* <li>An RS has begun to open a region
* </ul>
* <p>We will mock the scenarios</p>
* <ul>
* <li>RS was opening a region of enabled table but never finishes
* </ul>
* <p>
* <b>ZK State: OPENED</b>
* <p>A node can get into OPENED state if</p>
* <ul>
* <li>An RS has finished opening a region but not acknowledged by master yet
* </ul>
* <p>We will mock the scenarios</p>
* <ul>
* <li>Region of a table that should be enabled was opened on an RS
* <li>Region of a table that should be disabled was opened on an RS
* </ul>
* @throws Exception
*/
@Test (timeout=180000)
public void testMasterFailoverWithMockedRIT() throws Exception {
final int NUM_MASTERS = 1;
final int NUM_RS = 3;
// Create config to use for this cluster
Configuration conf = HBaseConfiguration.create();
// Need to drop the timeout much lower
conf.setInt("hbase.master.assignment.timeoutmonitor.period", 2000);
conf.setInt("hbase.master.assignment.timeoutmonitor.timeout", 4000);
conf.setInt("hbase.master.wait.on.regionservers.mintostart", 3);
conf.setInt("hbase.master.wait.on.regionservers.maxtostart", 3);
// Start the cluster
HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
log("Cluster started");
// Create a ZKW to use in the test
ZooKeeperWatcher zkw = new ZooKeeperWatcher(TEST_UTIL.getConfiguration(),
"unittest", new Abortable() {
@Override
public void abort(String why, Throwable e) {
throw new RuntimeException("Fatal ZK error, why=" + why, e);
}
});
// get all the master threads
List<MasterThread> masterThreads = cluster.getMasterThreads();
assertEquals(1, masterThreads.size());
// only one master thread, let's wait for it to be initialized
assertTrue(cluster.waitForActiveAndReadyMaster());
HMaster master = masterThreads.get(0).getMaster();
assertTrue(master.isActiveMaster());
assertTrue(master.isInitialized());
// disable load balancing on this master
master.balanceSwitch(false);
// create two tables in META, each with 10 regions
byte [] FAMILY = Bytes.toBytes("family");
byte [][] SPLIT_KEYS = new byte [][] {
new byte[0], Bytes.toBytes("aaa"), Bytes.toBytes("bbb"),
Bytes.toBytes("ccc"), Bytes.toBytes("ddd"), Bytes.toBytes("eee"),
Bytes.toBytes("fff"), Bytes.toBytes("ggg"), Bytes.toBytes("hhh"),
Bytes.toBytes("iii"), Bytes.toBytes("jjj")
};
byte [] enabledTable = Bytes.toBytes("enabledTable");
HTableDescriptor htdEnabled = new HTableDescriptor(enabledTable);
htdEnabled.addFamily(new HColumnDescriptor(FAMILY));
List<HRegionInfo> enabledRegions = TEST_UTIL.createMultiRegionsInMeta(
TEST_UTIL.getConfiguration(), htdEnabled, SPLIT_KEYS);
byte [] disabledTable = Bytes.toBytes("disabledTable");
HTableDescriptor htdDisabled = new HTableDescriptor(disabledTable);
htdDisabled.addFamily(new HColumnDescriptor(FAMILY));
List<HRegionInfo> disabledRegions = TEST_UTIL.createMultiRegionsInMeta(
TEST_UTIL.getConfiguration(), htdDisabled, SPLIT_KEYS);
log("Regions in META have been created");
// at this point we only expect 2 regions to be assigned out (catalogs)
assertEquals(2, cluster.countServedRegions());
// Let's just assign everything to first RS
HRegionServer hrs = cluster.getRegionServer(0);
String serverName = hrs.getServerName();
HServerInfo hsiAlive = hrs.getServerInfo();
// we'll need some regions to already be assigned out properly on live RS
List<HRegionInfo> enabledAndAssignedRegions = new ArrayList<HRegionInfo>();
enabledAndAssignedRegions.add(enabledRegions.remove(0));
enabledAndAssignedRegions.add(enabledRegions.remove(0));
List<HRegionInfo> disabledAndAssignedRegions = new ArrayList<HRegionInfo>();
disabledAndAssignedRegions.add(disabledRegions.remove(0));
disabledAndAssignedRegions.add(disabledRegions.remove(0));
// now actually assign them
for (HRegionInfo hri : enabledAndAssignedRegions) {
master.assignmentManager.regionPlans.put(hri.getEncodedName(),
new RegionPlan(hri, null, hsiAlive));
master.assignRegion(hri);
}
for (HRegionInfo hri : disabledAndAssignedRegions) {
master.assignmentManager.regionPlans.put(hri.getEncodedName(),
new RegionPlan(hri, null, hsiAlive));
master.assignRegion(hri);
}
// wait for no more RIT
log("Waiting for assignment to finish");
ZKAssign.blockUntilNoRIT(zkw);
log("Assignment completed");
// Stop the master
log("Aborting master");
cluster.abortMaster(0);
cluster.waitOnMaster(0);
log("Master has aborted");
/*
* Now, let's start mocking up some weird states as described in the method
* javadoc.
*/
List<HRegionInfo> regionsThatShouldBeOnline = new ArrayList<HRegionInfo>();
List<HRegionInfo> regionsThatShouldBeOffline = new ArrayList<HRegionInfo>();
log("Beginning to mock scenarios");
// Disable the disabledTable in ZK
ZKTable zktable = new ZKTable(zkw);
zktable.setDisabledTable(Bytes.toString(disabledTable));
/*
* ZK = OFFLINE
*/
// Region that should be assigned but is not and is in ZK as OFFLINE
HRegionInfo region = enabledRegions.remove(0);
regionsThatShouldBeOnline.add(region);
ZKAssign.createNodeOffline(zkw, region, serverName);
/*
* ZK = CLOSING
*/
// Disabled test of CLOSING. This case is invalid after HBASE-3181.
// How can an RS stop a CLOSING w/o deleting the node? If it did ever fail
// and left the node in CLOSING, the RS would have aborted and we'd process
// these regions in server shutdown
//
// // Region of enabled table being closed but not complete
// // Region is already assigned, don't say anything to RS but set ZK closing
// region = enabledAndAssignedRegions.remove(0);
// regionsThatShouldBeOnline.add(region);
// ZKAssign.createNodeClosing(zkw, region, serverName);
//
// // Region of disabled table being closed but not complete
// // Region is already assigned, don't say anything to RS but set ZK closing
// region = disabledAndAssignedRegions.remove(0);
// regionsThatShouldBeOffline.add(region);
// ZKAssign.createNodeClosing(zkw, region, serverName);
/*
* ZK = CLOSED
*/
// Region of enabled table closed but not ack
region = enabledRegions.remove(0);
regionsThatShouldBeOnline.add(region);
int version = ZKAssign.createNodeClosing(zkw, region, serverName);
ZKAssign.transitionNodeClosed(zkw, region, serverName, version);
// Region of disabled table closed but not ack
region = disabledRegions.remove(0);
regionsThatShouldBeOffline.add(region);
version = ZKAssign.createNodeClosing(zkw, region, serverName);
ZKAssign.transitionNodeClosed(zkw, region, serverName, version);
/*
* ZK = OPENING
*/
// RS was opening a region of enabled table but never finishes
region = enabledRegions.remove(0);
regionsThatShouldBeOnline.add(region);
ZKAssign.createNodeOffline(zkw, region, serverName);
ZKAssign.transitionNodeOpening(zkw, region, serverName);
/*
* ZK = OPENED
*/
// Region of enabled table was opened on RS
region = enabledRegions.remove(0);
regionsThatShouldBeOnline.add(region);
ZKAssign.createNodeOffline(zkw, region, serverName);
hrs.openRegion(region);
while (true) {
RegionTransitionData rtd = ZKAssign.getData(zkw, region.getEncodedName());
if (rtd != null && rtd.getEventType() == EventType.RS_ZK_REGION_OPENED) {
break;
}
Thread.sleep(100);
}
// Region of disable table was opened on RS
region = disabledRegions.remove(0);
regionsThatShouldBeOffline.add(region);
ZKAssign.createNodeOffline(zkw, region, serverName);
hrs.openRegion(region);
while (true) {
RegionTransitionData rtd = ZKAssign.getData(zkw, region.getEncodedName());
if (rtd != null && rtd.getEventType() == EventType.RS_ZK_REGION_OPENED) {
break;
}
Thread.sleep(100);
}
/*
* ZK = NONE
*/
/*
* DONE MOCKING
*/
log("Done mocking data up in ZK");
// Start up a new master
log("Starting up a new master");
master = cluster.startMaster().getMaster();
log("Waiting for master to be ready");
cluster.waitForActiveAndReadyMaster();
log("Master is ready");
// Failover should be completed, now wait for no RIT
log("Waiting for no more RIT");
ZKAssign.blockUntilNoRIT(zkw);
log("No more RIT in ZK, now doing final test verification");
// Grab all the regions that are online across RSs
Set<HRegionInfo> onlineRegions = new TreeSet<HRegionInfo>();
for (JVMClusterUtil.RegionServerThread rst :
cluster.getRegionServerThreads()) {
onlineRegions.addAll(rst.getRegionServer().getOnlineRegions());
}
// Now, everything that should be online should be online
for (HRegionInfo hri : regionsThatShouldBeOnline) {
assertTrue(onlineRegions.contains(hri));
}
// Everything that should be offline should not be online
for (HRegionInfo hri : regionsThatShouldBeOffline) {
assertFalse(onlineRegions.contains(hri));
}
log("Done with verification, all passed, shutting down cluster");
// Done, shutdown the cluster
TEST_UTIL.shutdownMiniCluster();
}
/**
* Complex test of master failover that tests as many permutations of the
* different possible states that regions in transition could be in within ZK
* pointing to an RS that has died while no master is around to process it.
* <p>
* This tests the proper handling of these states by the failed-over master
* and includes a thorough testing of the timeout code as well.
* <p>
* Starts with a single master and two regionservers.
* <p>
* Creates two tables, enabledTable and disabledTable, each containing 5
* regions. The disabledTable is then disabled.
* <p>
* After reaching steady-state, the master is killed. We then mock several
* states in ZK. And one of the RS will be killed.
* <p>
* After mocking them and killing an RS, we will startup a new master which
* should become the active master and also detect that it is a failover. The
* primary test passing condition will be that all regions of the enabled
* table are assigned and all the regions of the disabled table are not
* assigned.
* <p>
* The different scenarios to be tested are below:
* <p>
* <b>ZK State: CLOSING</b>
* <p>A node can get into CLOSING state if</p>
* <ul>
* <li>An RS has begun to close a region
* </ul>
* <p>We will mock the scenarios</p>
* <ul>
* <li>Region was being closed but the RS died before finishing the close
* </ul>
* <b>ZK State: OPENED</b>
* <p>A node can get into OPENED state if</p>
* <ul>
* <li>An RS has finished opening a region but not acknowledged by master yet
* </ul>
* <p>We will mock the scenarios</p>
* <ul>
* <li>Region of a table that should be enabled was opened by a now-dead RS
* <li>Region of a table that should be disabled was opened by a now-dead RS
* </ul>
* <p>
* <b>ZK State: NONE</b>
* <p>A region could not have a transition node if</p>
* <ul>
* <li>The server hosting the region died and no master processed it
* </ul>
* <p>We will mock the scenarios</p>
* <ul>
* <li>Region of enabled table was on a dead RS that was not yet processed
* <li>Region of disabled table was on a dead RS that was not yet processed
* </ul>
* @throws Exception
*/
@Test (timeout=180000)
public void testMasterFailoverWithMockedRITOnDeadRS() throws Exception {
final int NUM_MASTERS = 1;
final int NUM_RS = 2;
// Create config to use for this cluster
Configuration conf = HBaseConfiguration.create();
// Need to drop the timeout much lower
conf.setInt("hbase.master.assignment.timeoutmonitor.period", 2000);
conf.setInt("hbase.master.assignment.timeoutmonitor.timeout", 4000);
conf.setInt("hbase.master.wait.on.regionservers.mintostart", 1);
conf.setInt("hbase.master.wait.on.regionservers.maxtostart", 2);
// Create and start the cluster
HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
log("Cluster started");
// Create a ZKW to use in the test
ZooKeeperWatcher zkw = new ZooKeeperWatcher(TEST_UTIL.getConfiguration(),
"unittest", new Abortable() {
@Override
public void abort(String why, Throwable e) {
LOG.error("Fatal ZK Error: " + why, e);
org.junit.Assert.assertFalse("Fatal ZK error", true);
}
});
// get all the master threads
List<MasterThread> masterThreads = cluster.getMasterThreads();
assertEquals(1, masterThreads.size());
// only one master thread, let's wait for it to be initialized
assertTrue(cluster.waitForActiveAndReadyMaster());
HMaster master = masterThreads.get(0).getMaster();
assertTrue(master.isActiveMaster());
assertTrue(master.isInitialized());
// disable load balancing on this master
master.balanceSwitch(false);
// create two tables in META, each with 10 regions
byte [] FAMILY = Bytes.toBytes("family");
byte [][] SPLIT_KEYS = new byte [][] {
new byte[0], Bytes.toBytes("aaa"), Bytes.toBytes("bbb"),
Bytes.toBytes("ccc"), Bytes.toBytes("ddd"), Bytes.toBytes("eee"),
Bytes.toBytes("fff"), Bytes.toBytes("ggg"), Bytes.toBytes("hhh"),
Bytes.toBytes("iii"), Bytes.toBytes("jjj")
};
byte [] enabledTable = Bytes.toBytes("enabledTable");
HTableDescriptor htdEnabled = new HTableDescriptor(enabledTable);
htdEnabled.addFamily(new HColumnDescriptor(FAMILY));
List<HRegionInfo> enabledRegions = TEST_UTIL.createMultiRegionsInMeta(
TEST_UTIL.getConfiguration(), htdEnabled, SPLIT_KEYS);
byte [] disabledTable = Bytes.toBytes("disabledTable");
HTableDescriptor htdDisabled = new HTableDescriptor(disabledTable);
htdDisabled.addFamily(new HColumnDescriptor(FAMILY));
List<HRegionInfo> disabledRegions = TEST_UTIL.createMultiRegionsInMeta(
TEST_UTIL.getConfiguration(), htdDisabled, SPLIT_KEYS);
log("Regions in META have been created");
// at this point we only expect 2 regions to be assigned out (catalogs)
assertEquals(2, cluster.countServedRegions());
// The first RS will stay online
HRegionServer hrs = cluster.getRegionServer(0);
HServerInfo hsiAlive = hrs.getServerInfo();
// The second RS is going to be hard-killed
HRegionServer hrsDead = cluster.getRegionServer(1);
String deadServerName = hrsDead.getServerName();
HServerInfo hsiDead = hrsDead.getServerInfo();
// we'll need some regions to already be assigned out properly on live RS
List<HRegionInfo> enabledAndAssignedRegions = new ArrayList<HRegionInfo>();
enabledAndAssignedRegions.add(enabledRegions.remove(0));
enabledAndAssignedRegions.add(enabledRegions.remove(0));
List<HRegionInfo> disabledAndAssignedRegions = new ArrayList<HRegionInfo>();
disabledAndAssignedRegions.add(disabledRegions.remove(0));
disabledAndAssignedRegions.add(disabledRegions.remove(0));
// now actually assign them
for (HRegionInfo hri : enabledAndAssignedRegions) {
master.assignmentManager.regionPlans.put(hri.getEncodedName(),
new RegionPlan(hri, null, hsiAlive));
master.assignRegion(hri);
}
for (HRegionInfo hri : disabledAndAssignedRegions) {
master.assignmentManager.regionPlans.put(hri.getEncodedName(),
new RegionPlan(hri, null, hsiAlive));
master.assignRegion(hri);
}
// we also need regions assigned out on the dead server
List<HRegionInfo> enabledAndOnDeadRegions = new ArrayList<HRegionInfo>();
enabledAndOnDeadRegions.add(enabledRegions.remove(0));
enabledAndOnDeadRegions.add(enabledRegions.remove(0));
List<HRegionInfo> disabledAndOnDeadRegions = new ArrayList<HRegionInfo>();
disabledAndOnDeadRegions.add(disabledRegions.remove(0));
disabledAndOnDeadRegions.add(disabledRegions.remove(0));
// set region plan to server to be killed and trigger assign
for (HRegionInfo hri : enabledAndOnDeadRegions) {
master.assignmentManager.regionPlans.put(hri.getEncodedName(),
new RegionPlan(hri, null, hsiDead));
master.assignRegion(hri);
}
for (HRegionInfo hri : disabledAndOnDeadRegions) {
master.assignmentManager.regionPlans.put(hri.getEncodedName(),
new RegionPlan(hri, null, hsiDead));
master.assignRegion(hri);
}
// wait for no more RIT
log("Waiting for assignment to finish");
ZKAssign.blockUntilNoRIT(zkw);
log("Assignment completed");
// Stop the master
log("Aborting master");
cluster.abortMaster(0);
cluster.waitOnMaster(0);
log("Master has aborted");
/*
* Now, let's start mocking up some weird states as described in the method
* javadoc.
*/
List<HRegionInfo> regionsThatShouldBeOnline = new ArrayList<HRegionInfo>();
List<HRegionInfo> regionsThatShouldBeOffline = new ArrayList<HRegionInfo>();
log("Beginning to mock scenarios");
// Disable the disabledTable in ZK
ZKTable zktable = new ZKTable(zkw);
zktable.setDisabledTable(Bytes.toString(disabledTable));
/*
* ZK = CLOSING
*/
// Region of enabled table being closed on dead RS but not finished
HRegionInfo region = enabledAndOnDeadRegions.remove(0);
regionsThatShouldBeOnline.add(region);
ZKAssign.createNodeClosing(zkw, region, deadServerName);
LOG.debug("\n\nRegion of enabled table was CLOSING on dead RS\n" +
region + "\n\n");
// Region of disabled table being closed on dead RS but not finished
region = disabledAndOnDeadRegions.remove(0);
regionsThatShouldBeOffline.add(region);
ZKAssign.createNodeClosing(zkw, region, deadServerName);
LOG.debug("\n\nRegion of disabled table was CLOSING on dead RS\n" +
region + "\n\n");
/*
* ZK = CLOSED
*/
// Region of enabled on dead server gets closed but not ack'd by master
region = enabledAndOnDeadRegions.remove(0);
regionsThatShouldBeOnline.add(region);
int version = ZKAssign.createNodeClosing(zkw, region, deadServerName);
ZKAssign.transitionNodeClosed(zkw, region, deadServerName, version);
LOG.debug("\n\nRegion of enabled table was CLOSED on dead RS\n" +
region + "\n\n");
// Region of disabled on dead server gets closed but not ack'd by master
region = disabledAndOnDeadRegions.remove(0);
regionsThatShouldBeOffline.add(region);
version = ZKAssign.createNodeClosing(zkw, region, deadServerName);
ZKAssign.transitionNodeClosed(zkw, region, deadServerName, version);
LOG.debug("\n\nRegion of disabled table was CLOSED on dead RS\n" +
region + "\n\n");
/*
* ZK = OPENING
*/
// RS was opening a region of enabled table then died
region = enabledRegions.remove(0);
regionsThatShouldBeOnline.add(region);
ZKAssign.createNodeOffline(zkw, region, deadServerName);
ZKAssign.transitionNodeOpening(zkw, region, deadServerName);
LOG.debug("\n\nRegion of enabled table was OPENING on dead RS\n" +
region + "\n\n");
// RS was opening a region of disabled table then died
region = disabledRegions.remove(0);
regionsThatShouldBeOffline.add(region);
ZKAssign.createNodeOffline(zkw, region, deadServerName);
ZKAssign.transitionNodeOpening(zkw, region, deadServerName);
LOG.debug("\n\nRegion of disabled table was OPENING on dead RS\n" +
region + "\n\n");
/*
* ZK = OPENED
*/
// Region of enabled table was opened on dead RS
region = enabledRegions.remove(0);
regionsThatShouldBeOnline.add(region);
ZKAssign.createNodeOffline(zkw, region, deadServerName);
hrsDead.openRegion(region);
while (true) {
RegionTransitionData rtd = ZKAssign.getData(zkw, region.getEncodedName());
if (rtd != null && rtd.getEventType() == EventType.RS_ZK_REGION_OPENED) {
break;
}
Thread.sleep(100);
}
LOG.debug("\n\nRegion of enabled table was OPENED on dead RS\n" +
region + "\n\n");
// Region of disabled table was opened on dead RS
region = disabledRegions.remove(0);
regionsThatShouldBeOffline.add(region);
ZKAssign.createNodeOffline(zkw, region, deadServerName);
hrsDead.openRegion(region);
while (true) {
RegionTransitionData rtd = ZKAssign.getData(zkw, region.getEncodedName());
if (rtd != null && rtd.getEventType() == EventType.RS_ZK_REGION_OPENED) {
break;
}
Thread.sleep(100);
}
LOG.debug("\n\nRegion of disabled table was OPENED on dead RS\n" +
region + "\n\n");
/*
* ZK = NONE
*/
// Region of enabled table was open at steady-state on dead RS
region = enabledRegions.remove(0);
regionsThatShouldBeOnline.add(region);
ZKAssign.createNodeOffline(zkw, region, deadServerName);
hrsDead.openRegion(region);
while (true) {
RegionTransitionData rtd = ZKAssign.getData(zkw, region.getEncodedName());
if (rtd != null && rtd.getEventType() == EventType.RS_ZK_REGION_OPENED) {
ZKAssign.deleteOpenedNode(zkw, region.getEncodedName());
break;
}
Thread.sleep(100);
}
LOG.debug("\n\nRegion of enabled table was open at steady-state on dead RS"
+ "\n" + region + "\n\n");
// Region of disabled table was open at steady-state on dead RS
region = disabledRegions.remove(0);
regionsThatShouldBeOffline.add(region);
ZKAssign.createNodeOffline(zkw, region, deadServerName);
hrsDead.openRegion(region);
while (true) {
RegionTransitionData rtd = ZKAssign.getData(zkw, region.getEncodedName());
if (rtd != null && rtd.getEventType() == EventType.RS_ZK_REGION_OPENED) {
ZKAssign.deleteOpenedNode(zkw, region.getEncodedName());
break;
}
Thread.sleep(100);
}
LOG.debug("\n\nRegion of disabled table was open at steady-state on dead RS"
+ "\n" + region + "\n\n");
/*
* DONE MOCKING
*/
log("Done mocking data up in ZK");
// Kill the RS that had a hard death
log("Killing RS " + deadServerName);
hrsDead.abort("Killing for unit test");
log("RS " + deadServerName + " killed");
// Start up a new master
log("Starting up a new master");
master = cluster.startMaster().getMaster();
log("Waiting for master to be ready");
cluster.waitForActiveAndReadyMaster();
log("Master is ready");
// Let's add some weird states to master in-memory state
// After HBASE-3181, we need to have some ZK state if we're PENDING_OPEN
// b/c it is impossible for us to get into this state w/o a zk node
// this is not true of PENDING_CLOSE
// PENDING_OPEN and enabled
region = enabledRegions.remove(0);
regionsThatShouldBeOnline.add(region);
master.assignmentManager.regionsInTransition.put(region.getEncodedName(),
new RegionState(region, RegionState.State.PENDING_OPEN, 0));
ZKAssign.createNodeOffline(zkw, region, master.getServerName());
// PENDING_OPEN and disabled
region = disabledRegions.remove(0);
regionsThatShouldBeOffline.add(region);
master.assignmentManager.regionsInTransition.put(region.getEncodedName(),
new RegionState(region, RegionState.State.PENDING_OPEN, 0));
ZKAssign.createNodeOffline(zkw, region, master.getServerName());
// This test is bad. It puts up a PENDING_CLOSE but doesn't say what
// server we were PENDING_CLOSE against -- i.e. an entry in
// AssignmentManager#regions. W/o a server, we NPE trying to resend close.
// In past, there was wonky logic that had us reassign region if no server
// at tail of the unassign. This was removed. Commenting out for now.
// TODO: Remove completely.
/*
// PENDING_CLOSE and enabled
region = enabledRegions.remove(0);
LOG.info("Setting PENDING_CLOSE enabled " + region.getEncodedName());
regionsThatShouldBeOnline.add(region);
master.assignmentManager.regionsInTransition.put(region.getEncodedName(),
new RegionState(region, RegionState.State.PENDING_CLOSE, 0));
// PENDING_CLOSE and disabled
region = disabledRegions.remove(0);
LOG.info("Setting PENDING_CLOSE disabled " + region.getEncodedName());
regionsThatShouldBeOffline.add(region);
master.assignmentManager.regionsInTransition.put(region.getEncodedName(),
new RegionState(region, RegionState.State.PENDING_CLOSE, 0));
*/
// Failover should be completed, now wait for no RIT
log("Waiting for no more RIT");
ZKAssign.blockUntilNoRIT(zkw);
log("No more RIT in ZK");
long now = System.currentTimeMillis();
final long maxTime = 120000;
boolean done = master.assignmentManager.waitUntilNoRegionsInTransition(maxTime);
if (!done) {
LOG.info("rit=" + master.assignmentManager.getRegionsInTransition());
}
long elapsed = System.currentTimeMillis() - now;
assertTrue("Elapsed=" + elapsed + ", maxTime=" + maxTime + ", done=" + done,
elapsed < maxTime);
log("No more RIT in RIT map, doing final test verification");
// Grab all the regions that are online across RSs
Set<HRegionInfo> onlineRegions = new TreeSet<HRegionInfo>();
for (JVMClusterUtil.RegionServerThread rst :
cluster.getRegionServerThreads()) {
onlineRegions.addAll(rst.getRegionServer().getOnlineRegions());
}
// Now, everything that should be online should be online
for (HRegionInfo hri : regionsThatShouldBeOnline) {
assertTrue("region=" + hri.getRegionNameAsString(), onlineRegions.contains(hri));
}
// Everything that should be offline should not be online
for (HRegionInfo hri : regionsThatShouldBeOffline) {
assertFalse(onlineRegions.contains(hri));
}
log("Done with verification, all passed, shutting down cluster");
// Done, shutdown the cluster
TEST_UTIL.shutdownMiniCluster();
}
// TODO: Next test to add is with testing permutations of the RIT or the RS
// killed are hosting ROOT and META regions.
private void log(String string) {
LOG.info("\n\n" + string + " \n\n");
}
}