| /** |
| * Copyright 2010 The Apache Software Foundation |
| * |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.hadoop.hbase.master; |
| |
| import static org.junit.Assert.assertEquals; |
| import static org.junit.Assert.assertFalse; |
| import static org.junit.Assert.assertTrue; |
| |
| import java.util.ArrayList; |
| import java.util.List; |
| import java.util.Set; |
| import java.util.TreeSet; |
| |
| import org.apache.commons.logging.Log; |
| import org.apache.commons.logging.LogFactory; |
| import org.apache.hadoop.conf.Configuration; |
| import org.apache.hadoop.hbase.Abortable; |
| import org.apache.hadoop.hbase.HBaseConfiguration; |
| import org.apache.hadoop.hbase.HBaseTestingUtility; |
| import org.apache.hadoop.hbase.HColumnDescriptor; |
| import org.apache.hadoop.hbase.HRegionInfo; |
| import org.apache.hadoop.hbase.HServerInfo; |
| import org.apache.hadoop.hbase.HTableDescriptor; |
| import org.apache.hadoop.hbase.MiniHBaseCluster; |
| import org.apache.hadoop.hbase.executor.RegionTransitionData; |
| import org.apache.hadoop.hbase.executor.EventHandler.EventType; |
| import org.apache.hadoop.hbase.master.AssignmentManager.RegionState; |
| import org.apache.hadoop.hbase.master.LoadBalancer.RegionPlan; |
| import org.apache.hadoop.hbase.regionserver.HRegionServer; |
| import org.apache.hadoop.hbase.util.Bytes; |
| import org.apache.hadoop.hbase.util.JVMClusterUtil; |
| import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread; |
| import org.apache.hadoop.hbase.zookeeper.ZKAssign; |
| import org.apache.hadoop.hbase.zookeeper.ZKTable; |
| import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; |
| import org.junit.Test; |
| |
| public class TestMasterFailover { |
| private static final Log LOG = LogFactory.getLog(TestMasterFailover.class); |
| |
| /** |
| * Simple test of master failover. |
| * <p> |
| * Starts with three masters. Kills a backup master. Then kills the active |
| * master. Ensures the final master becomes active and we can still contact |
| * the cluster. |
| * @throws Exception |
| */ |
| @Test (timeout=180000) |
| public void testSimpleMasterFailover() throws Exception { |
| |
| final int NUM_MASTERS = 3; |
| final int NUM_RS = 3; |
| |
| // Create config to use for this cluster |
| Configuration conf = HBaseConfiguration.create(); |
| conf.setInt("hbase.master.wait.on.regionservers.mintostart", 3); |
| conf.setInt("hbase.master.wait.on.regionservers.maxtostart", 3); |
| |
| // Start the cluster |
| HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf); |
| TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS); |
| MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); |
| |
| // get all the master threads |
| List<MasterThread> masterThreads = cluster.getMasterThreads(); |
| |
| // wait for each to come online |
| for (MasterThread mt : masterThreads) { |
| assertTrue(mt.isAlive()); |
| } |
| |
| // verify only one is the active master and we have right number |
| int numActive = 0; |
| int activeIndex = -1; |
| String activeName = null; |
| for (int i = 0; i < masterThreads.size(); i++) { |
| if (masterThreads.get(i).getMaster().isActiveMaster()) { |
| numActive++; |
| activeIndex = i; |
| activeName = masterThreads.get(i).getMaster().getServerName(); |
| } |
| } |
| assertEquals(1, numActive); |
| assertEquals(NUM_MASTERS, masterThreads.size()); |
| |
| // attempt to stop one of the inactive masters |
| LOG.debug("\n\nStopping a backup master\n"); |
| int backupIndex = (activeIndex == 0 ? 1 : activeIndex - 1); |
| cluster.stopMaster(backupIndex, false); |
| cluster.waitOnMaster(backupIndex); |
| |
| // verify still one active master and it's the same |
| for (int i = 0; i < masterThreads.size(); i++) { |
| if (masterThreads.get(i).getMaster().isActiveMaster()) { |
| assertTrue(activeName.equals( |
| masterThreads.get(i).getMaster().getServerName())); |
| activeIndex = i; |
| } |
| } |
| assertEquals(1, numActive); |
| assertEquals(2, masterThreads.size()); |
| |
| // kill the active master |
| LOG.debug("\n\nStopping the active master\n"); |
| cluster.stopMaster(activeIndex, false); |
| cluster.waitOnMaster(activeIndex); |
| |
| // wait for an active master to show up and be ready |
| assertTrue(cluster.waitForActiveAndReadyMaster()); |
| |
| LOG.debug("\n\nVerifying backup master is now active\n"); |
| // should only have one master now |
| assertEquals(1, masterThreads.size()); |
| // and he should be active |
| assertTrue(masterThreads.get(0).getMaster().isActiveMaster()); |
| |
| // Stop the cluster |
| TEST_UTIL.shutdownMiniCluster(); |
| } |
| |
| /** |
| * Complex test of master failover that tests as many permutations of the |
| * different possible states that regions in transition could be in within ZK. |
| * <p> |
| * This tests the proper handling of these states by the failed-over master |
| * and includes a thorough testing of the timeout code as well. |
| * <p> |
| * Starts with a single master and three regionservers. |
| * <p> |
| * Creates two tables, enabledTable and disabledTable, each containing 5 |
| * regions. The disabledTable is then disabled. |
| * <p> |
| * After reaching steady-state, the master is killed. We then mock several |
| * states in ZK. |
| * <p> |
| * After mocking them, we will startup a new master which should become the |
| * active master and also detect that it is a failover. The primary test |
| * passing condition will be that all regions of the enabled table are |
| * assigned and all the regions of the disabled table are not assigned. |
| * <p> |
| * The different scenarios to be tested are below: |
| * <p> |
| * <b>ZK State: OFFLINE</b> |
| * <p>A node can get into OFFLINE state if</p> |
| * <ul> |
| * <li>An RS fails to open a region, so it reverts the state back to OFFLINE |
| * <li>The Master is assigning the region to a RS before it sends RPC |
| * </ul> |
| * <p>We will mock the scenarios</p> |
| * <ul> |
| * <li>Master has assigned an enabled region but RS failed so a region is |
| * not assigned anywhere and is sitting in ZK as OFFLINE</li> |
| * <li>This seems to cover both cases?</li> |
| * </ul> |
| * <p> |
| * <b>ZK State: CLOSING</b> |
| * <p>A node can get into CLOSING state if</p> |
| * <ul> |
| * <li>An RS has begun to close a region |
| * </ul> |
| * <p>We will mock the scenarios</p> |
| * <ul> |
| * <li>Region of enabled table was being closed but did not complete |
| * <li>Region of disabled table was being closed but did not complete |
| * </ul> |
| * <p> |
| * <b>ZK State: CLOSED</b> |
| * <p>A node can get into CLOSED state if</p> |
| * <ul> |
| * <li>An RS has completed closing a region but not acknowledged by master yet |
| * </ul> |
| * <p>We will mock the scenarios</p> |
| * <ul> |
| * <li>Region of a table that should be enabled was closed on an RS |
| * <li>Region of a table that should be disabled was closed on an RS |
| * </ul> |
| * <p> |
| * <b>ZK State: OPENING</b> |
| * <p>A node can get into OPENING state if</p> |
| * <ul> |
| * <li>An RS has begun to open a region |
| * </ul> |
| * <p>We will mock the scenarios</p> |
| * <ul> |
| * <li>RS was opening a region of enabled table but never finishes |
| * </ul> |
| * <p> |
| * <b>ZK State: OPENED</b> |
| * <p>A node can get into OPENED state if</p> |
| * <ul> |
| * <li>An RS has finished opening a region but not acknowledged by master yet |
| * </ul> |
| * <p>We will mock the scenarios</p> |
| * <ul> |
| * <li>Region of a table that should be enabled was opened on an RS |
| * <li>Region of a table that should be disabled was opened on an RS |
| * </ul> |
| * @throws Exception |
| */ |
| @Test (timeout=180000) |
| public void testMasterFailoverWithMockedRIT() throws Exception { |
| |
| final int NUM_MASTERS = 1; |
| final int NUM_RS = 3; |
| |
| // Create config to use for this cluster |
| Configuration conf = HBaseConfiguration.create(); |
| // Need to drop the timeout much lower |
| conf.setInt("hbase.master.assignment.timeoutmonitor.period", 2000); |
| conf.setInt("hbase.master.assignment.timeoutmonitor.timeout", 4000); |
| conf.setInt("hbase.master.wait.on.regionservers.mintostart", 3); |
| conf.setInt("hbase.master.wait.on.regionservers.maxtostart", 3); |
| |
| // Start the cluster |
| HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf); |
| TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS); |
| MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); |
| log("Cluster started"); |
| |
| // Create a ZKW to use in the test |
| ZooKeeperWatcher zkw = new ZooKeeperWatcher(TEST_UTIL.getConfiguration(), |
| "unittest", new Abortable() { |
| @Override |
| public void abort(String why, Throwable e) { |
| throw new RuntimeException("Fatal ZK error, why=" + why, e); |
| } |
| }); |
| |
| // get all the master threads |
| List<MasterThread> masterThreads = cluster.getMasterThreads(); |
| assertEquals(1, masterThreads.size()); |
| |
| // only one master thread, let's wait for it to be initialized |
| assertTrue(cluster.waitForActiveAndReadyMaster()); |
| HMaster master = masterThreads.get(0).getMaster(); |
| assertTrue(master.isActiveMaster()); |
| assertTrue(master.isInitialized()); |
| |
| // disable load balancing on this master |
| master.balanceSwitch(false); |
| |
| // create two tables in META, each with 10 regions |
| byte [] FAMILY = Bytes.toBytes("family"); |
| byte [][] SPLIT_KEYS = new byte [][] { |
| new byte[0], Bytes.toBytes("aaa"), Bytes.toBytes("bbb"), |
| Bytes.toBytes("ccc"), Bytes.toBytes("ddd"), Bytes.toBytes("eee"), |
| Bytes.toBytes("fff"), Bytes.toBytes("ggg"), Bytes.toBytes("hhh"), |
| Bytes.toBytes("iii"), Bytes.toBytes("jjj") |
| }; |
| |
| byte [] enabledTable = Bytes.toBytes("enabledTable"); |
| HTableDescriptor htdEnabled = new HTableDescriptor(enabledTable); |
| htdEnabled.addFamily(new HColumnDescriptor(FAMILY)); |
| List<HRegionInfo> enabledRegions = TEST_UTIL.createMultiRegionsInMeta( |
| TEST_UTIL.getConfiguration(), htdEnabled, SPLIT_KEYS); |
| |
| byte [] disabledTable = Bytes.toBytes("disabledTable"); |
| HTableDescriptor htdDisabled = new HTableDescriptor(disabledTable); |
| htdDisabled.addFamily(new HColumnDescriptor(FAMILY)); |
| List<HRegionInfo> disabledRegions = TEST_UTIL.createMultiRegionsInMeta( |
| TEST_UTIL.getConfiguration(), htdDisabled, SPLIT_KEYS); |
| |
| log("Regions in META have been created"); |
| |
| // at this point we only expect 2 regions to be assigned out (catalogs) |
| assertEquals(2, cluster.countServedRegions()); |
| |
| // Let's just assign everything to first RS |
| HRegionServer hrs = cluster.getRegionServer(0); |
| String serverName = hrs.getServerName(); |
| HServerInfo hsiAlive = hrs.getServerInfo(); |
| |
| // we'll need some regions to already be assigned out properly on live RS |
| List<HRegionInfo> enabledAndAssignedRegions = new ArrayList<HRegionInfo>(); |
| enabledAndAssignedRegions.add(enabledRegions.remove(0)); |
| enabledAndAssignedRegions.add(enabledRegions.remove(0)); |
| List<HRegionInfo> disabledAndAssignedRegions = new ArrayList<HRegionInfo>(); |
| disabledAndAssignedRegions.add(disabledRegions.remove(0)); |
| disabledAndAssignedRegions.add(disabledRegions.remove(0)); |
| |
| // now actually assign them |
| for (HRegionInfo hri : enabledAndAssignedRegions) { |
| master.assignmentManager.regionPlans.put(hri.getEncodedName(), |
| new RegionPlan(hri, null, hsiAlive)); |
| master.assignRegion(hri); |
| } |
| for (HRegionInfo hri : disabledAndAssignedRegions) { |
| master.assignmentManager.regionPlans.put(hri.getEncodedName(), |
| new RegionPlan(hri, null, hsiAlive)); |
| master.assignRegion(hri); |
| } |
| |
| // wait for no more RIT |
| log("Waiting for assignment to finish"); |
| ZKAssign.blockUntilNoRIT(zkw); |
| log("Assignment completed"); |
| |
| // Stop the master |
| log("Aborting master"); |
| cluster.abortMaster(0); |
| cluster.waitOnMaster(0); |
| log("Master has aborted"); |
| |
| /* |
| * Now, let's start mocking up some weird states as described in the method |
| * javadoc. |
| */ |
| |
| List<HRegionInfo> regionsThatShouldBeOnline = new ArrayList<HRegionInfo>(); |
| List<HRegionInfo> regionsThatShouldBeOffline = new ArrayList<HRegionInfo>(); |
| |
| log("Beginning to mock scenarios"); |
| |
| // Disable the disabledTable in ZK |
| ZKTable zktable = new ZKTable(zkw); |
| zktable.setDisabledTable(Bytes.toString(disabledTable)); |
| |
| /* |
| * ZK = OFFLINE |
| */ |
| |
| // Region that should be assigned but is not and is in ZK as OFFLINE |
| HRegionInfo region = enabledRegions.remove(0); |
| regionsThatShouldBeOnline.add(region); |
| ZKAssign.createNodeOffline(zkw, region, serverName); |
| |
| /* |
| * ZK = CLOSING |
| */ |
| |
| // Disabled test of CLOSING. This case is invalid after HBASE-3181. |
| // How can an RS stop a CLOSING w/o deleting the node? If it did ever fail |
| // and left the node in CLOSING, the RS would have aborted and we'd process |
| // these regions in server shutdown |
| // |
| // // Region of enabled table being closed but not complete |
| // // Region is already assigned, don't say anything to RS but set ZK closing |
| // region = enabledAndAssignedRegions.remove(0); |
| // regionsThatShouldBeOnline.add(region); |
| // ZKAssign.createNodeClosing(zkw, region, serverName); |
| // |
| // // Region of disabled table being closed but not complete |
| // // Region is already assigned, don't say anything to RS but set ZK closing |
| // region = disabledAndAssignedRegions.remove(0); |
| // regionsThatShouldBeOffline.add(region); |
| // ZKAssign.createNodeClosing(zkw, region, serverName); |
| |
| /* |
| * ZK = CLOSED |
| */ |
| |
| // Region of enabled table closed but not ack |
| region = enabledRegions.remove(0); |
| regionsThatShouldBeOnline.add(region); |
| int version = ZKAssign.createNodeClosing(zkw, region, serverName); |
| ZKAssign.transitionNodeClosed(zkw, region, serverName, version); |
| |
| // Region of disabled table closed but not ack |
| region = disabledRegions.remove(0); |
| regionsThatShouldBeOffline.add(region); |
| version = ZKAssign.createNodeClosing(zkw, region, serverName); |
| ZKAssign.transitionNodeClosed(zkw, region, serverName, version); |
| |
| /* |
| * ZK = OPENING |
| */ |
| |
| // RS was opening a region of enabled table but never finishes |
| region = enabledRegions.remove(0); |
| regionsThatShouldBeOnline.add(region); |
| ZKAssign.createNodeOffline(zkw, region, serverName); |
| ZKAssign.transitionNodeOpening(zkw, region, serverName); |
| |
| /* |
| * ZK = OPENED |
| */ |
| |
| // Region of enabled table was opened on RS |
| region = enabledRegions.remove(0); |
| regionsThatShouldBeOnline.add(region); |
| ZKAssign.createNodeOffline(zkw, region, serverName); |
| hrs.openRegion(region); |
| while (true) { |
| RegionTransitionData rtd = ZKAssign.getData(zkw, region.getEncodedName()); |
| if (rtd != null && rtd.getEventType() == EventType.RS_ZK_REGION_OPENED) { |
| break; |
| } |
| Thread.sleep(100); |
| } |
| |
| // Region of disable table was opened on RS |
| region = disabledRegions.remove(0); |
| regionsThatShouldBeOffline.add(region); |
| ZKAssign.createNodeOffline(zkw, region, serverName); |
| hrs.openRegion(region); |
| while (true) { |
| RegionTransitionData rtd = ZKAssign.getData(zkw, region.getEncodedName()); |
| if (rtd != null && rtd.getEventType() == EventType.RS_ZK_REGION_OPENED) { |
| break; |
| } |
| Thread.sleep(100); |
| } |
| |
| /* |
| * ZK = NONE |
| */ |
| |
| /* |
| * DONE MOCKING |
| */ |
| |
| log("Done mocking data up in ZK"); |
| |
| // Start up a new master |
| log("Starting up a new master"); |
| master = cluster.startMaster().getMaster(); |
| log("Waiting for master to be ready"); |
| cluster.waitForActiveAndReadyMaster(); |
| log("Master is ready"); |
| |
| // Failover should be completed, now wait for no RIT |
| log("Waiting for no more RIT"); |
| ZKAssign.blockUntilNoRIT(zkw); |
| log("No more RIT in ZK, now doing final test verification"); |
| |
| // Grab all the regions that are online across RSs |
| Set<HRegionInfo> onlineRegions = new TreeSet<HRegionInfo>(); |
| for (JVMClusterUtil.RegionServerThread rst : |
| cluster.getRegionServerThreads()) { |
| onlineRegions.addAll(rst.getRegionServer().getOnlineRegions()); |
| } |
| |
| // Now, everything that should be online should be online |
| for (HRegionInfo hri : regionsThatShouldBeOnline) { |
| assertTrue(onlineRegions.contains(hri)); |
| } |
| |
| // Everything that should be offline should not be online |
| for (HRegionInfo hri : regionsThatShouldBeOffline) { |
| assertFalse(onlineRegions.contains(hri)); |
| } |
| |
| log("Done with verification, all passed, shutting down cluster"); |
| |
| // Done, shutdown the cluster |
| TEST_UTIL.shutdownMiniCluster(); |
| } |
| |
| |
| /** |
| * Complex test of master failover that tests as many permutations of the |
| * different possible states that regions in transition could be in within ZK |
| * pointing to an RS that has died while no master is around to process it. |
| * <p> |
| * This tests the proper handling of these states by the failed-over master |
| * and includes a thorough testing of the timeout code as well. |
| * <p> |
| * Starts with a single master and two regionservers. |
| * <p> |
| * Creates two tables, enabledTable and disabledTable, each containing 5 |
| * regions. The disabledTable is then disabled. |
| * <p> |
| * After reaching steady-state, the master is killed. We then mock several |
| * states in ZK. And one of the RS will be killed. |
| * <p> |
| * After mocking them and killing an RS, we will startup a new master which |
| * should become the active master and also detect that it is a failover. The |
| * primary test passing condition will be that all regions of the enabled |
| * table are assigned and all the regions of the disabled table are not |
| * assigned. |
| * <p> |
| * The different scenarios to be tested are below: |
| * <p> |
| * <b>ZK State: CLOSING</b> |
| * <p>A node can get into CLOSING state if</p> |
| * <ul> |
| * <li>An RS has begun to close a region |
| * </ul> |
| * <p>We will mock the scenarios</p> |
| * <ul> |
| * <li>Region was being closed but the RS died before finishing the close |
| * </ul> |
| * <b>ZK State: OPENED</b> |
| * <p>A node can get into OPENED state if</p> |
| * <ul> |
| * <li>An RS has finished opening a region but not acknowledged by master yet |
| * </ul> |
| * <p>We will mock the scenarios</p> |
| * <ul> |
| * <li>Region of a table that should be enabled was opened by a now-dead RS |
| * <li>Region of a table that should be disabled was opened by a now-dead RS |
| * </ul> |
| * <p> |
| * <b>ZK State: NONE</b> |
| * <p>A region could not have a transition node if</p> |
| * <ul> |
| * <li>The server hosting the region died and no master processed it |
| * </ul> |
| * <p>We will mock the scenarios</p> |
| * <ul> |
| * <li>Region of enabled table was on a dead RS that was not yet processed |
| * <li>Region of disabled table was on a dead RS that was not yet processed |
| * </ul> |
| * @throws Exception |
| */ |
| @Test (timeout=180000) |
| public void testMasterFailoverWithMockedRITOnDeadRS() throws Exception { |
| |
| final int NUM_MASTERS = 1; |
| final int NUM_RS = 2; |
| |
| // Create config to use for this cluster |
| Configuration conf = HBaseConfiguration.create(); |
| // Need to drop the timeout much lower |
| conf.setInt("hbase.master.assignment.timeoutmonitor.period", 2000); |
| conf.setInt("hbase.master.assignment.timeoutmonitor.timeout", 4000); |
| conf.setInt("hbase.master.wait.on.regionservers.mintostart", 1); |
| conf.setInt("hbase.master.wait.on.regionservers.maxtostart", 2); |
| |
| // Create and start the cluster |
| HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf); |
| TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS); |
| MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); |
| log("Cluster started"); |
| |
| // Create a ZKW to use in the test |
| ZooKeeperWatcher zkw = new ZooKeeperWatcher(TEST_UTIL.getConfiguration(), |
| "unittest", new Abortable() { |
| @Override |
| public void abort(String why, Throwable e) { |
| LOG.error("Fatal ZK Error: " + why, e); |
| org.junit.Assert.assertFalse("Fatal ZK error", true); |
| } |
| }); |
| |
| // get all the master threads |
| List<MasterThread> masterThreads = cluster.getMasterThreads(); |
| assertEquals(1, masterThreads.size()); |
| |
| // only one master thread, let's wait for it to be initialized |
| assertTrue(cluster.waitForActiveAndReadyMaster()); |
| HMaster master = masterThreads.get(0).getMaster(); |
| assertTrue(master.isActiveMaster()); |
| assertTrue(master.isInitialized()); |
| |
| // disable load balancing on this master |
| master.balanceSwitch(false); |
| |
| // create two tables in META, each with 10 regions |
| byte [] FAMILY = Bytes.toBytes("family"); |
| byte [][] SPLIT_KEYS = new byte [][] { |
| new byte[0], Bytes.toBytes("aaa"), Bytes.toBytes("bbb"), |
| Bytes.toBytes("ccc"), Bytes.toBytes("ddd"), Bytes.toBytes("eee"), |
| Bytes.toBytes("fff"), Bytes.toBytes("ggg"), Bytes.toBytes("hhh"), |
| Bytes.toBytes("iii"), Bytes.toBytes("jjj") |
| }; |
| |
| byte [] enabledTable = Bytes.toBytes("enabledTable"); |
| HTableDescriptor htdEnabled = new HTableDescriptor(enabledTable); |
| htdEnabled.addFamily(new HColumnDescriptor(FAMILY)); |
| List<HRegionInfo> enabledRegions = TEST_UTIL.createMultiRegionsInMeta( |
| TEST_UTIL.getConfiguration(), htdEnabled, SPLIT_KEYS); |
| |
| byte [] disabledTable = Bytes.toBytes("disabledTable"); |
| HTableDescriptor htdDisabled = new HTableDescriptor(disabledTable); |
| htdDisabled.addFamily(new HColumnDescriptor(FAMILY)); |
| List<HRegionInfo> disabledRegions = TEST_UTIL.createMultiRegionsInMeta( |
| TEST_UTIL.getConfiguration(), htdDisabled, SPLIT_KEYS); |
| |
| log("Regions in META have been created"); |
| |
| // at this point we only expect 2 regions to be assigned out (catalogs) |
| assertEquals(2, cluster.countServedRegions()); |
| |
| // The first RS will stay online |
| HRegionServer hrs = cluster.getRegionServer(0); |
| HServerInfo hsiAlive = hrs.getServerInfo(); |
| |
| // The second RS is going to be hard-killed |
| HRegionServer hrsDead = cluster.getRegionServer(1); |
| String deadServerName = hrsDead.getServerName(); |
| HServerInfo hsiDead = hrsDead.getServerInfo(); |
| |
| // we'll need some regions to already be assigned out properly on live RS |
| List<HRegionInfo> enabledAndAssignedRegions = new ArrayList<HRegionInfo>(); |
| enabledAndAssignedRegions.add(enabledRegions.remove(0)); |
| enabledAndAssignedRegions.add(enabledRegions.remove(0)); |
| List<HRegionInfo> disabledAndAssignedRegions = new ArrayList<HRegionInfo>(); |
| disabledAndAssignedRegions.add(disabledRegions.remove(0)); |
| disabledAndAssignedRegions.add(disabledRegions.remove(0)); |
| |
| // now actually assign them |
| for (HRegionInfo hri : enabledAndAssignedRegions) { |
| master.assignmentManager.regionPlans.put(hri.getEncodedName(), |
| new RegionPlan(hri, null, hsiAlive)); |
| master.assignRegion(hri); |
| } |
| for (HRegionInfo hri : disabledAndAssignedRegions) { |
| master.assignmentManager.regionPlans.put(hri.getEncodedName(), |
| new RegionPlan(hri, null, hsiAlive)); |
| master.assignRegion(hri); |
| } |
| |
| // we also need regions assigned out on the dead server |
| List<HRegionInfo> enabledAndOnDeadRegions = new ArrayList<HRegionInfo>(); |
| enabledAndOnDeadRegions.add(enabledRegions.remove(0)); |
| enabledAndOnDeadRegions.add(enabledRegions.remove(0)); |
| List<HRegionInfo> disabledAndOnDeadRegions = new ArrayList<HRegionInfo>(); |
| disabledAndOnDeadRegions.add(disabledRegions.remove(0)); |
| disabledAndOnDeadRegions.add(disabledRegions.remove(0)); |
| |
| // set region plan to server to be killed and trigger assign |
| for (HRegionInfo hri : enabledAndOnDeadRegions) { |
| master.assignmentManager.regionPlans.put(hri.getEncodedName(), |
| new RegionPlan(hri, null, hsiDead)); |
| master.assignRegion(hri); |
| } |
| for (HRegionInfo hri : disabledAndOnDeadRegions) { |
| master.assignmentManager.regionPlans.put(hri.getEncodedName(), |
| new RegionPlan(hri, null, hsiDead)); |
| master.assignRegion(hri); |
| } |
| |
| // wait for no more RIT |
| log("Waiting for assignment to finish"); |
| ZKAssign.blockUntilNoRIT(zkw); |
| log("Assignment completed"); |
| |
| // Stop the master |
| log("Aborting master"); |
| cluster.abortMaster(0); |
| cluster.waitOnMaster(0); |
| log("Master has aborted"); |
| |
| /* |
| * Now, let's start mocking up some weird states as described in the method |
| * javadoc. |
| */ |
| |
| List<HRegionInfo> regionsThatShouldBeOnline = new ArrayList<HRegionInfo>(); |
| List<HRegionInfo> regionsThatShouldBeOffline = new ArrayList<HRegionInfo>(); |
| |
| log("Beginning to mock scenarios"); |
| |
| // Disable the disabledTable in ZK |
| ZKTable zktable = new ZKTable(zkw); |
| zktable.setDisabledTable(Bytes.toString(disabledTable)); |
| |
| /* |
| * ZK = CLOSING |
| */ |
| |
| // Region of enabled table being closed on dead RS but not finished |
| HRegionInfo region = enabledAndOnDeadRegions.remove(0); |
| regionsThatShouldBeOnline.add(region); |
| ZKAssign.createNodeClosing(zkw, region, deadServerName); |
| LOG.debug("\n\nRegion of enabled table was CLOSING on dead RS\n" + |
| region + "\n\n"); |
| |
| // Region of disabled table being closed on dead RS but not finished |
| region = disabledAndOnDeadRegions.remove(0); |
| regionsThatShouldBeOffline.add(region); |
| ZKAssign.createNodeClosing(zkw, region, deadServerName); |
| LOG.debug("\n\nRegion of disabled table was CLOSING on dead RS\n" + |
| region + "\n\n"); |
| |
| /* |
| * ZK = CLOSED |
| */ |
| |
| // Region of enabled on dead server gets closed but not ack'd by master |
| region = enabledAndOnDeadRegions.remove(0); |
| regionsThatShouldBeOnline.add(region); |
| int version = ZKAssign.createNodeClosing(zkw, region, deadServerName); |
| ZKAssign.transitionNodeClosed(zkw, region, deadServerName, version); |
| LOG.debug("\n\nRegion of enabled table was CLOSED on dead RS\n" + |
| region + "\n\n"); |
| |
| // Region of disabled on dead server gets closed but not ack'd by master |
| region = disabledAndOnDeadRegions.remove(0); |
| regionsThatShouldBeOffline.add(region); |
| version = ZKAssign.createNodeClosing(zkw, region, deadServerName); |
| ZKAssign.transitionNodeClosed(zkw, region, deadServerName, version); |
| LOG.debug("\n\nRegion of disabled table was CLOSED on dead RS\n" + |
| region + "\n\n"); |
| |
| /* |
| * ZK = OPENING |
| */ |
| |
| // RS was opening a region of enabled table then died |
| region = enabledRegions.remove(0); |
| regionsThatShouldBeOnline.add(region); |
| ZKAssign.createNodeOffline(zkw, region, deadServerName); |
| ZKAssign.transitionNodeOpening(zkw, region, deadServerName); |
| LOG.debug("\n\nRegion of enabled table was OPENING on dead RS\n" + |
| region + "\n\n"); |
| |
| // RS was opening a region of disabled table then died |
| region = disabledRegions.remove(0); |
| regionsThatShouldBeOffline.add(region); |
| ZKAssign.createNodeOffline(zkw, region, deadServerName); |
| ZKAssign.transitionNodeOpening(zkw, region, deadServerName); |
| LOG.debug("\n\nRegion of disabled table was OPENING on dead RS\n" + |
| region + "\n\n"); |
| |
| /* |
| * ZK = OPENED |
| */ |
| |
| // Region of enabled table was opened on dead RS |
| region = enabledRegions.remove(0); |
| regionsThatShouldBeOnline.add(region); |
| ZKAssign.createNodeOffline(zkw, region, deadServerName); |
| hrsDead.openRegion(region); |
| while (true) { |
| RegionTransitionData rtd = ZKAssign.getData(zkw, region.getEncodedName()); |
| if (rtd != null && rtd.getEventType() == EventType.RS_ZK_REGION_OPENED) { |
| break; |
| } |
| Thread.sleep(100); |
| } |
| LOG.debug("\n\nRegion of enabled table was OPENED on dead RS\n" + |
| region + "\n\n"); |
| |
| // Region of disabled table was opened on dead RS |
| region = disabledRegions.remove(0); |
| regionsThatShouldBeOffline.add(region); |
| ZKAssign.createNodeOffline(zkw, region, deadServerName); |
| hrsDead.openRegion(region); |
| while (true) { |
| RegionTransitionData rtd = ZKAssign.getData(zkw, region.getEncodedName()); |
| if (rtd != null && rtd.getEventType() == EventType.RS_ZK_REGION_OPENED) { |
| break; |
| } |
| Thread.sleep(100); |
| } |
| LOG.debug("\n\nRegion of disabled table was OPENED on dead RS\n" + |
| region + "\n\n"); |
| |
| /* |
| * ZK = NONE |
| */ |
| |
| // Region of enabled table was open at steady-state on dead RS |
| region = enabledRegions.remove(0); |
| regionsThatShouldBeOnline.add(region); |
| ZKAssign.createNodeOffline(zkw, region, deadServerName); |
| hrsDead.openRegion(region); |
| while (true) { |
| RegionTransitionData rtd = ZKAssign.getData(zkw, region.getEncodedName()); |
| if (rtd != null && rtd.getEventType() == EventType.RS_ZK_REGION_OPENED) { |
| ZKAssign.deleteOpenedNode(zkw, region.getEncodedName()); |
| break; |
| } |
| Thread.sleep(100); |
| } |
| LOG.debug("\n\nRegion of enabled table was open at steady-state on dead RS" |
| + "\n" + region + "\n\n"); |
| |
| // Region of disabled table was open at steady-state on dead RS |
| region = disabledRegions.remove(0); |
| regionsThatShouldBeOffline.add(region); |
| ZKAssign.createNodeOffline(zkw, region, deadServerName); |
| hrsDead.openRegion(region); |
| while (true) { |
| RegionTransitionData rtd = ZKAssign.getData(zkw, region.getEncodedName()); |
| if (rtd != null && rtd.getEventType() == EventType.RS_ZK_REGION_OPENED) { |
| ZKAssign.deleteOpenedNode(zkw, region.getEncodedName()); |
| break; |
| } |
| Thread.sleep(100); |
| } |
| LOG.debug("\n\nRegion of disabled table was open at steady-state on dead RS" |
| + "\n" + region + "\n\n"); |
| |
| /* |
| * DONE MOCKING |
| */ |
| |
| log("Done mocking data up in ZK"); |
| |
| // Kill the RS that had a hard death |
| log("Killing RS " + deadServerName); |
| hrsDead.abort("Killing for unit test"); |
| log("RS " + deadServerName + " killed"); |
| |
| // Start up a new master |
| log("Starting up a new master"); |
| master = cluster.startMaster().getMaster(); |
| log("Waiting for master to be ready"); |
| cluster.waitForActiveAndReadyMaster(); |
| log("Master is ready"); |
| |
| // Let's add some weird states to master in-memory state |
| |
| // After HBASE-3181, we need to have some ZK state if we're PENDING_OPEN |
| // b/c it is impossible for us to get into this state w/o a zk node |
| // this is not true of PENDING_CLOSE |
| |
| // PENDING_OPEN and enabled |
| region = enabledRegions.remove(0); |
| regionsThatShouldBeOnline.add(region); |
| master.assignmentManager.regionsInTransition.put(region.getEncodedName(), |
| new RegionState(region, RegionState.State.PENDING_OPEN, 0)); |
| ZKAssign.createNodeOffline(zkw, region, master.getServerName()); |
| // PENDING_OPEN and disabled |
| region = disabledRegions.remove(0); |
| regionsThatShouldBeOffline.add(region); |
| master.assignmentManager.regionsInTransition.put(region.getEncodedName(), |
| new RegionState(region, RegionState.State.PENDING_OPEN, 0)); |
| ZKAssign.createNodeOffline(zkw, region, master.getServerName()); |
| // This test is bad. It puts up a PENDING_CLOSE but doesn't say what |
| // server we were PENDING_CLOSE against -- i.e. an entry in |
| // AssignmentManager#regions. W/o a server, we NPE trying to resend close. |
| // In past, there was wonky logic that had us reassign region if no server |
| // at tail of the unassign. This was removed. Commenting out for now. |
| // TODO: Remove completely. |
| /* |
| // PENDING_CLOSE and enabled |
| region = enabledRegions.remove(0); |
| LOG.info("Setting PENDING_CLOSE enabled " + region.getEncodedName()); |
| regionsThatShouldBeOnline.add(region); |
| master.assignmentManager.regionsInTransition.put(region.getEncodedName(), |
| new RegionState(region, RegionState.State.PENDING_CLOSE, 0)); |
| // PENDING_CLOSE and disabled |
| region = disabledRegions.remove(0); |
| LOG.info("Setting PENDING_CLOSE disabled " + region.getEncodedName()); |
| regionsThatShouldBeOffline.add(region); |
| master.assignmentManager.regionsInTransition.put(region.getEncodedName(), |
| new RegionState(region, RegionState.State.PENDING_CLOSE, 0)); |
| */ |
| |
| // Failover should be completed, now wait for no RIT |
| log("Waiting for no more RIT"); |
| ZKAssign.blockUntilNoRIT(zkw); |
| log("No more RIT in ZK"); |
| long now = System.currentTimeMillis(); |
| final long maxTime = 120000; |
| boolean done = master.assignmentManager.waitUntilNoRegionsInTransition(maxTime); |
| if (!done) { |
| LOG.info("rit=" + master.assignmentManager.getRegionsInTransition()); |
| } |
| long elapsed = System.currentTimeMillis() - now; |
| assertTrue("Elapsed=" + elapsed + ", maxTime=" + maxTime + ", done=" + done, |
| elapsed < maxTime); |
| log("No more RIT in RIT map, doing final test verification"); |
| |
| // Grab all the regions that are online across RSs |
| Set<HRegionInfo> onlineRegions = new TreeSet<HRegionInfo>(); |
| for (JVMClusterUtil.RegionServerThread rst : |
| cluster.getRegionServerThreads()) { |
| onlineRegions.addAll(rst.getRegionServer().getOnlineRegions()); |
| } |
| |
| // Now, everything that should be online should be online |
| for (HRegionInfo hri : regionsThatShouldBeOnline) { |
| assertTrue("region=" + hri.getRegionNameAsString(), onlineRegions.contains(hri)); |
| } |
| |
| // Everything that should be offline should not be online |
| for (HRegionInfo hri : regionsThatShouldBeOffline) { |
| assertFalse(onlineRegions.contains(hri)); |
| } |
| |
| log("Done with verification, all passed, shutting down cluster"); |
| |
| // Done, shutdown the cluster |
| TEST_UTIL.shutdownMiniCluster(); |
| } |
| |
| // TODO: Next test to add is with testing permutations of the RIT or the RS |
| // killed are hosting ROOT and META regions. |
| |
| private void log(String string) { |
| LOG.info("\n\n" + string + " \n\n"); |
| } |
| } |