hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java - hbase - Git at Google

 /**
  *
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.hadoop.hbase.master;

 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertTrue;

 import java.io.IOException;
 import java.util.List;

 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.ClusterStatus;
 import org.apache.hadoop.hbase.HBaseConfiguration;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
 import org.apache.hadoop.hbase.HColumnDescriptor;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HRegionInfo;
 import org.apache.hadoop.hbase.HTableDescriptor;
 import org.apache.hadoop.hbase.MetaTableAccessor;
 import org.apache.hadoop.hbase.MiniHBaseCluster;
 import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.client.RegionLocator;
 import org.apache.hadoop.hbase.client.Table;
 import org.apache.hadoop.hbase.master.RegionState.State;
 import org.apache.hadoop.hbase.protobuf.RequestConverter;
 import org.apache.hadoop.hbase.regionserver.HRegion;
 import org.apache.hadoop.hbase.regionserver.HRegionServer;
 import org.apache.hadoop.hbase.regionserver.Region;
 import org.apache.hadoop.hbase.testclassification.FlakeyTests;
 import org.apache.hadoop.hbase.testclassification.LargeTests;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.FSTableDescriptors;
 import org.apache.hadoop.hbase.util.FSUtils;
 import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
 import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;

 @Category({FlakeyTests.class, LargeTests.class})
 public class TestMasterFailover {
   private static final Log LOG = LogFactory.getLog(TestMasterFailover.class);

   HRegion createRegion(final HRegionInfo  hri, final Path rootdir, final Configuration c,
       final HTableDescriptor htd)
   throws IOException {
     HRegion r = HBaseTestingUtility.createRegionAndWAL(hri, rootdir, c, htd);
     // The above call to create a region will create an wal file.  Each
     // log file create will also create a running thread to do syncing.  We need
     // to close out this log else we will have a running thread trying to sync
     // the file system continuously which is ugly when dfs is taken away at the
     // end of the test.
     HBaseTestingUtility.closeRegionAndWAL(r);
     return r;
   }

   // TODO: Next test to add is with testing permutations of the RIT or the RS
   //       killed are hosting ROOT and hbase:meta regions.

   private void log(String string) {
     LOG.info("\n\n" + string + " \n\n");
   }

   /**
    * Simple test of master failover.
    * <p>
    * Starts with three masters.  Kills a backup master.  Then kills the active
    * master.  Ensures the final master becomes active and we can still contact
    * the cluster.
    * @throws Exception
    */
   @Test (timeout=240000)
   public void testSimpleMasterFailover() throws Exception {

     final int NUM_MASTERS = 3;
     final int NUM_RS = 3;

     // Start the cluster
     HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();

     TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();

     // get all the master threads
     List<MasterThread> masterThreads = cluster.getMasterThreads();

     // wait for each to come online
     for (MasterThread mt : masterThreads) {
       assertTrue(mt.isAlive());
     }

     // verify only one is the active master and we have right number
     int numActive = 0;
     int activeIndex = -1;
     ServerName activeName = null;
     HMaster active = null;
     for (int i = 0; i < masterThreads.size(); i++) {
       if (masterThreads.get(i).getMaster().isActiveMaster()) {
         numActive++;
         activeIndex = i;
         active = masterThreads.get(activeIndex).getMaster();
         activeName = active.getServerName();
       }
     }
     assertEquals(1, numActive);
     assertEquals(NUM_MASTERS, masterThreads.size());
     LOG.info("Active master " + activeName);

     // Check that ClusterStatus reports the correct active and backup masters
     assertNotNull(active);
     ClusterStatus status = active.getClusterStatus();
     assertTrue(status.getMaster().equals(activeName));
     assertEquals(2, status.getBackupMastersSize());
     assertEquals(2, status.getBackupMasters().size());

     // attempt to stop one of the inactive masters
     int backupIndex = (activeIndex == 0 ? 1 : activeIndex - 1);
     HMaster master = cluster.getMaster(backupIndex);
     LOG.debug("\n\nStopping a backup master: " + master.getServerName() + "\n");
     cluster.stopMaster(backupIndex, false);
     cluster.waitOnMaster(backupIndex);

     // Verify still one active master and it's the same
     for (int i = 0; i < masterThreads.size(); i++) {
       if (masterThreads.get(i).getMaster().isActiveMaster()) {
         assertTrue(activeName.equals(masterThreads.get(i).getMaster().getServerName()));
         activeIndex = i;
         active = masterThreads.get(activeIndex).getMaster();
       }
     }
     assertEquals(1, numActive);
     assertEquals(2, masterThreads.size());
     int rsCount = masterThreads.get(activeIndex).getMaster().getClusterStatus().getServersSize();
     LOG.info("Active master " + active.getServerName() + " managing " + rsCount +  " regions servers");
     assertEquals(4, rsCount);

     // Check that ClusterStatus reports the correct active and backup masters
     assertNotNull(active);
     status = active.getClusterStatus();
     assertTrue(status.getMaster().equals(activeName));
     assertEquals(1, status.getBackupMastersSize());
     assertEquals(1, status.getBackupMasters().size());

     // kill the active master
     LOG.debug("\n\nStopping the active master " + active.getServerName() + "\n");
     cluster.stopMaster(activeIndex, false);
     cluster.waitOnMaster(activeIndex);

     // wait for an active master to show up and be ready
     assertTrue(cluster.waitForActiveAndReadyMaster());

     LOG.debug("\n\nVerifying backup master is now active\n");
     // should only have one master now
     assertEquals(1, masterThreads.size());

     // and he should be active
     active = masterThreads.get(0).getMaster();
     assertNotNull(active);
     status = active.getClusterStatus();
     ServerName mastername = status.getMaster();
     assertTrue(mastername.equals(active.getServerName()));
     assertTrue(active.isActiveMaster());
     assertEquals(0, status.getBackupMastersSize());
     assertEquals(0, status.getBackupMasters().size());
     int rss = status.getServersSize();
     LOG.info("Active master " + mastername.getServerName() + " managing " +
       rss +  " region servers");
     assertEquals(4, rss);

     // Stop the cluster
     TEST_UTIL.shutdownMiniCluster();
   }

   /**
    * Test region in pending_open/close when master failover
    */
   @Test (timeout=180000)
   public void testPendingOpenOrCloseWhenMasterFailover() throws Exception {
     final int NUM_MASTERS = 1;
     final int NUM_RS = 1;

     // Create config to use for this cluster
     Configuration conf = HBaseConfiguration.create();

     // Start the cluster
     HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
     TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
     log("Cluster started");

     // get all the master threads
     List<MasterThread> masterThreads = cluster.getMasterThreads();
     assertEquals(1, masterThreads.size());

     // only one master thread, let's wait for it to be initialized
     assertTrue(cluster.waitForActiveAndReadyMaster());
     HMaster master = masterThreads.get(0).getMaster();
     assertTrue(master.isActiveMaster());
     assertTrue(master.isInitialized());

     // Create a table with a region online
     Table onlineTable = TEST_UTIL.createTable(TableName.valueOf("onlineTable"), "family");
     onlineTable.close();
     // Create a table in META, so it has a region offline
     HTableDescriptor offlineTable = new HTableDescriptor(
       TableName.valueOf(Bytes.toBytes("offlineTable")));
     offlineTable.addFamily(new HColumnDescriptor(Bytes.toBytes("family")));

     FileSystem filesystem = FileSystem.get(conf);
     Path rootdir = FSUtils.getRootDir(conf);
     FSTableDescriptors fstd = new FSTableDescriptors(conf, filesystem, rootdir);
     fstd.createTableDescriptor(offlineTable);

     HRegionInfo hriOffline = new HRegionInfo(offlineTable.getTableName(), null, null);
     createRegion(hriOffline, rootdir, conf, offlineTable);
     MetaTableAccessor.addRegionToMeta(master.getConnection(), hriOffline);

     log("Regions in hbase:meta and namespace have been created");

     // at this point we only expect 3 regions to be assigned out
     // (catalogs and namespace, + 1 online region)
     assertEquals(3, cluster.countServedRegions());
     HRegionInfo hriOnline = null;
     try (RegionLocator locator =
         TEST_UTIL.getConnection().getRegionLocator(TableName.valueOf("onlineTable"))) {
       hriOnline = locator.getRegionLocation(HConstants.EMPTY_START_ROW).getRegionInfo();
     }
     RegionStates regionStates = master.getAssignmentManager().getRegionStates();
     RegionStateStore stateStore = master.getAssignmentManager().getRegionStateStore();

     // Put the online region in pending_close. It is actually already opened.
     // This is to simulate that the region close RPC is not sent out before failover
     RegionState oldState = regionStates.getRegionState(hriOnline);
     RegionState newState = new RegionState(
       hriOnline, State.PENDING_CLOSE, oldState.getServerName());
     stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);

     // Put the offline region in pending_open. It is actually not opened yet.
     // This is to simulate that the region open RPC is not sent out before failover
     oldState = new RegionState(hriOffline, State.OFFLINE);
     newState = new RegionState(hriOffline, State.PENDING_OPEN, newState.getServerName());
     stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);

     HRegionInfo failedClose = new HRegionInfo(offlineTable.getTableName(), null, null);
     createRegion(failedClose, rootdir, conf, offlineTable);
     MetaTableAccessor.addRegionToMeta(master.getConnection(), failedClose);

     oldState = new RegionState(failedClose, State.PENDING_CLOSE);
     newState = new RegionState(failedClose, State.FAILED_CLOSE, newState.getServerName());
     stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);

     HRegionInfo failedOpen = new HRegionInfo(offlineTable.getTableName(), null, null);
     createRegion(failedOpen, rootdir, conf, offlineTable);
     MetaTableAccessor.addRegionToMeta(master.getConnection(), failedOpen);

     // Simulate a region transitioning to failed open when the region server reports the
     // transition as FAILED_OPEN
     oldState = new RegionState(failedOpen, State.PENDING_OPEN);
     newState = new RegionState(failedOpen, State.FAILED_OPEN, newState.getServerName());
     stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);

     HRegionInfo failedOpenNullServer = new HRegionInfo(offlineTable.getTableName(), null, null);
     LOG.info("Failed open NUll server " + failedOpenNullServer.getEncodedName());
     createRegion(failedOpenNullServer, rootdir, conf, offlineTable);
     MetaTableAccessor.addRegionToMeta(master.getConnection(), failedOpenNullServer);

     // Simulate a region transitioning to failed open when the master couldn't find a plan for
     // the region
     oldState = new RegionState(failedOpenNullServer, State.OFFLINE);
     newState = new RegionState(failedOpenNullServer, State.FAILED_OPEN, null);
     stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);

     // Stop the master
     log("Aborting master");
     cluster.abortMaster(0);
     cluster.waitOnMaster(0);
     log("Master has aborted");

     // Start up a new master
     log("Starting up a new master");
     master = cluster.startMaster().getMaster();
     log("Waiting for master to be ready");
     cluster.waitForActiveAndReadyMaster();
     log("Master is ready");

     // Wait till no region in transition any more
     TEST_UTIL.waitUntilNoRegionsInTransition(60000);

     // Get new region states since master restarted
     regionStates = master.getAssignmentManager().getRegionStates();

     // Both pending_open (RPC sent/not yet) regions should be online
     assertTrue(regionStates.isRegionOnline(hriOffline));
     assertTrue(regionStates.isRegionOnline(hriOnline));
     assertTrue(regionStates.isRegionOnline(failedClose));
     assertTrue(regionStates.isRegionOnline(failedOpenNullServer));
     assertTrue(regionStates.isRegionOnline(failedOpen));

     log("Done with verification, shutting down cluster");

     // Done, shutdown the cluster
     TEST_UTIL.shutdownMiniCluster();
   }

   /**
    * Test meta in transition when master failover
    */
   @Test(timeout = 180000)
   public void testMetaInTransitionWhenMasterFailover() throws Exception {
     final int NUM_MASTERS = 1;
     final int NUM_RS = 1;

     // Start the cluster
     HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
     TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
     log("Cluster started");

     log("Moving meta off the master");
     HMaster activeMaster = cluster.getMaster();
     HRegionServer rs = cluster.getRegionServer(0);
     ServerName metaServerName = cluster.getLiveRegionServerThreads()
       .get(0).getRegionServer().getServerName();
     activeMaster.move(HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes(),
       Bytes.toBytes(metaServerName.getServerName()));
     TEST_UTIL.waitUntilNoRegionsInTransition(60000);
     assertEquals("Meta should be assigned on expected regionserver",
       metaServerName, activeMaster.getMetaTableLocator()
         .getMetaRegionLocation(activeMaster.getZooKeeper()));

     // Now kill master, meta should remain on rs, where we placed it before.
     log("Aborting master");
     activeMaster.abort("test-kill");
     cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
     log("Master has aborted");

     // meta should remain where it was
     RegionState metaState =
       MetaTableLocator.getMetaRegionState(rs.getZooKeeper());
     assertEquals("hbase:meta should be onlined on RS",
       metaState.getServerName(), rs.getServerName());
     assertEquals("hbase:meta should be onlined on RS",
       metaState.getState(), State.OPEN);

     // Start up a new master
     log("Starting up a new master");
     activeMaster = cluster.startMaster().getMaster();
     log("Waiting for master to be ready");
     cluster.waitForActiveAndReadyMaster();
     log("Master is ready");

     // ensure meta is still deployed on RS
     metaState =
       MetaTableLocator.getMetaRegionState(activeMaster.getZooKeeper());
     assertEquals("hbase:meta should be onlined on RS",
       metaState.getServerName(), rs.getServerName());
     assertEquals("hbase:meta should be onlined on RS",
       metaState.getState(), State.OPEN);

     // Update meta state as PENDING_OPEN, then kill master
     // that simulates, that RS successfully deployed, but
     // RPC was lost right before failure.
     // region server should expire (how it can be verified?)
     MetaTableLocator.setMetaLocation(activeMaster.getZooKeeper(),
       rs.getServerName(), State.PENDING_OPEN);
     Region meta = rs.getFromOnlineRegions(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
     rs.removeFromOnlineRegions(meta, null);
     ((HRegion)meta).close();

     log("Aborting master");
     activeMaster.abort("test-kill");
     cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
     log("Master has aborted");

     // Start up a new master
     log("Starting up a new master");
     activeMaster = cluster.startMaster().getMaster();
     log("Waiting for master to be ready");
     cluster.waitForActiveAndReadyMaster();
     log("Master is ready");

     TEST_UTIL.waitUntilNoRegionsInTransition(60000);
     log("Meta was assigned");

     metaState =
       MetaTableLocator.getMetaRegionState(activeMaster.getZooKeeper());
     assertEquals("hbase:meta should be onlined on RS",
       metaState.getServerName(), rs.getServerName());
     assertEquals("hbase:meta should be onlined on RS",
       metaState.getState(), State.OPEN);

     // Update meta state as PENDING_CLOSE, then kill master
     // that simulates, that RS successfully deployed, but
     // RPC was lost right before failure.
     // region server should expire (how it can be verified?)
     MetaTableLocator.setMetaLocation(activeMaster.getZooKeeper(),
       rs.getServerName(), State.PENDING_CLOSE);

     log("Aborting master");
     activeMaster.abort("test-kill");
     cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
     log("Master has aborted");

     rs.getRSRpcServices().closeRegion(null, RequestConverter.buildCloseRegionRequest(
       rs.getServerName(), HRegionInfo.FIRST_META_REGIONINFO.getEncodedName()));

     // Start up a new master
     log("Starting up a new master");
     activeMaster = cluster.startMaster().getMaster();
     log("Waiting for master to be ready");
     cluster.waitForActiveAndReadyMaster();
     log("Master is ready");

     TEST_UTIL.waitUntilNoRegionsInTransition(60000);
     log("Meta was assigned");

     // Done, shutdown the cluster
     TEST_UTIL.shutdownMiniCluster();
   }
 }
	/**
	*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.hadoop.hbase.master;

	import static org.junit.Assert.assertEquals;
	import static org.junit.Assert.assertNotNull;
	import static org.junit.Assert.assertTrue;

	import java.io.IOException;
	import java.util.List;

	import org.apache.commons.logging.Log;
	import org.apache.commons.logging.LogFactory;
	import org.apache.hadoop.conf.Configuration;
	import org.apache.hadoop.fs.FileSystem;
	import org.apache.hadoop.fs.Path;
	import org.apache.hadoop.hbase.ClusterStatus;
	import org.apache.hadoop.hbase.HBaseConfiguration;
	import org.apache.hadoop.hbase.HBaseTestingUtility;
	import org.apache.hadoop.hbase.HColumnDescriptor;
	import org.apache.hadoop.hbase.HConstants;
	import org.apache.hadoop.hbase.HRegionInfo;
	import org.apache.hadoop.hbase.HTableDescriptor;
	import org.apache.hadoop.hbase.MetaTableAccessor;
	import org.apache.hadoop.hbase.MiniHBaseCluster;
	import org.apache.hadoop.hbase.ServerName;
	import org.apache.hadoop.hbase.TableName;
	import org.apache.hadoop.hbase.client.RegionLocator;
	import org.apache.hadoop.hbase.client.Table;
	import org.apache.hadoop.hbase.master.RegionState.State;
	import org.apache.hadoop.hbase.protobuf.RequestConverter;
	import org.apache.hadoop.hbase.regionserver.HRegion;
	import org.apache.hadoop.hbase.regionserver.HRegionServer;
	import org.apache.hadoop.hbase.regionserver.Region;
	import org.apache.hadoop.hbase.testclassification.FlakeyTests;
	import org.apache.hadoop.hbase.testclassification.LargeTests;
	import org.apache.hadoop.hbase.util.Bytes;
	import org.apache.hadoop.hbase.util.FSTableDescriptors;
	import org.apache.hadoop.hbase.util.FSUtils;
	import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
	import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
	import org.junit.Test;
	import org.junit.experimental.categories.Category;

	@Category({FlakeyTests.class, LargeTests.class})
	public class TestMasterFailover {
	private static final Log LOG = LogFactory.getLog(TestMasterFailover.class);

	HRegion createRegion(final HRegionInfo hri, final Path rootdir, final Configuration c,
	final HTableDescriptor htd)
	throws IOException {
	HRegion r = HBaseTestingUtility.createRegionAndWAL(hri, rootdir, c, htd);
	// The above call to create a region will create an wal file. Each
	// log file create will also create a running thread to do syncing. We need
	// to close out this log else we will have a running thread trying to sync
	// the file system continuously which is ugly when dfs is taken away at the
	// end of the test.
	HBaseTestingUtility.closeRegionAndWAL(r);
	return r;
	}

	// TODO: Next test to add is with testing permutations of the RIT or the RS
	// killed are hosting ROOT and hbase:meta regions.

	private void log(String string) {
	LOG.info("\n\n" + string + " \n\n");
	}

	/**
	* Simple test of master failover.
	* <p>
	* Starts with three masters. Kills a backup master. Then kills the active
	* master. Ensures the final master becomes active and we can still contact
	* the cluster.
	* @throws Exception
	*/
	@Test (timeout=240000)
	public void testSimpleMasterFailover() throws Exception {

	final int NUM_MASTERS = 3;
	final int NUM_RS = 3;

	// Start the cluster
	HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();

	TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
	MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();

	// get all the master threads
	List<MasterThread> masterThreads = cluster.getMasterThreads();

	// wait for each to come online
	for (MasterThread mt : masterThreads) {
	assertTrue(mt.isAlive());
	}

	// verify only one is the active master and we have right number
	int numActive = 0;
	int activeIndex = -1;
	ServerName activeName = null;
	HMaster active = null;
	for (int i = 0; i < masterThreads.size(); i++) {
	if (masterThreads.get(i).getMaster().isActiveMaster()) {
	numActive++;
	activeIndex = i;
	active = masterThreads.get(activeIndex).getMaster();
	activeName = active.getServerName();
	}
	}
	assertEquals(1, numActive);
	assertEquals(NUM_MASTERS, masterThreads.size());
	LOG.info("Active master " + activeName);

	// Check that ClusterStatus reports the correct active and backup masters
	assertNotNull(active);
	ClusterStatus status = active.getClusterStatus();
	assertTrue(status.getMaster().equals(activeName));
	assertEquals(2, status.getBackupMastersSize());
	assertEquals(2, status.getBackupMasters().size());

	// attempt to stop one of the inactive masters
	int backupIndex = (activeIndex == 0 ? 1 : activeIndex - 1);
	HMaster master = cluster.getMaster(backupIndex);
	LOG.debug("\n\nStopping a backup master: " + master.getServerName() + "\n");
	cluster.stopMaster(backupIndex, false);
	cluster.waitOnMaster(backupIndex);

	// Verify still one active master and it's the same
	for (int i = 0; i < masterThreads.size(); i++) {
	if (masterThreads.get(i).getMaster().isActiveMaster()) {
	assertTrue(activeName.equals(masterThreads.get(i).getMaster().getServerName()));
	activeIndex = i;
	active = masterThreads.get(activeIndex).getMaster();
	}
	}
	assertEquals(1, numActive);
	assertEquals(2, masterThreads.size());
	int rsCount = masterThreads.get(activeIndex).getMaster().getClusterStatus().getServersSize();
	LOG.info("Active master " + active.getServerName() + " managing " + rsCount + " regions servers");
	assertEquals(4, rsCount);

	// Check that ClusterStatus reports the correct active and backup masters
	assertNotNull(active);
	status = active.getClusterStatus();
	assertTrue(status.getMaster().equals(activeName));
	assertEquals(1, status.getBackupMastersSize());
	assertEquals(1, status.getBackupMasters().size());

	// kill the active master
	LOG.debug("\n\nStopping the active master " + active.getServerName() + "\n");
	cluster.stopMaster(activeIndex, false);
	cluster.waitOnMaster(activeIndex);

	// wait for an active master to show up and be ready
	assertTrue(cluster.waitForActiveAndReadyMaster());

	LOG.debug("\n\nVerifying backup master is now active\n");
	// should only have one master now
	assertEquals(1, masterThreads.size());

	// and he should be active
	active = masterThreads.get(0).getMaster();
	assertNotNull(active);
	status = active.getClusterStatus();
	ServerName mastername = status.getMaster();
	assertTrue(mastername.equals(active.getServerName()));
	assertTrue(active.isActiveMaster());
	assertEquals(0, status.getBackupMastersSize());
	assertEquals(0, status.getBackupMasters().size());
	int rss = status.getServersSize();
	LOG.info("Active master " + mastername.getServerName() + " managing " +
	rss + " region servers");
	assertEquals(4, rss);

	// Stop the cluster
	TEST_UTIL.shutdownMiniCluster();
	}

	/**
	* Test region in pending_open/close when master failover
	*/
	@Test (timeout=180000)
	public void testPendingOpenOrCloseWhenMasterFailover() throws Exception {
	final int NUM_MASTERS = 1;
	final int NUM_RS = 1;

	// Create config to use for this cluster
	Configuration conf = HBaseConfiguration.create();

	// Start the cluster
	HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
	TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
	MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
	log("Cluster started");

	// get all the master threads
	List<MasterThread> masterThreads = cluster.getMasterThreads();
	assertEquals(1, masterThreads.size());

	// only one master thread, let's wait for it to be initialized
	assertTrue(cluster.waitForActiveAndReadyMaster());
	HMaster master = masterThreads.get(0).getMaster();
	assertTrue(master.isActiveMaster());
	assertTrue(master.isInitialized());

	// Create a table with a region online
	Table onlineTable = TEST_UTIL.createTable(TableName.valueOf("onlineTable"), "family");
	onlineTable.close();
	// Create a table in META, so it has a region offline
	HTableDescriptor offlineTable = new HTableDescriptor(
	TableName.valueOf(Bytes.toBytes("offlineTable")));
	offlineTable.addFamily(new HColumnDescriptor(Bytes.toBytes("family")));

	FileSystem filesystem = FileSystem.get(conf);
	Path rootdir = FSUtils.getRootDir(conf);
	FSTableDescriptors fstd = new FSTableDescriptors(conf, filesystem, rootdir);
	fstd.createTableDescriptor(offlineTable);

	HRegionInfo hriOffline = new HRegionInfo(offlineTable.getTableName(), null, null);
	createRegion(hriOffline, rootdir, conf, offlineTable);
	MetaTableAccessor.addRegionToMeta(master.getConnection(), hriOffline);

	log("Regions in hbase:meta and namespace have been created");

	// at this point we only expect 3 regions to be assigned out
	// (catalogs and namespace, + 1 online region)
	assertEquals(3, cluster.countServedRegions());
	HRegionInfo hriOnline = null;
	try (RegionLocator locator =
	TEST_UTIL.getConnection().getRegionLocator(TableName.valueOf("onlineTable"))) {
	hriOnline = locator.getRegionLocation(HConstants.EMPTY_START_ROW).getRegionInfo();
	}
	RegionStates regionStates = master.getAssignmentManager().getRegionStates();
	RegionStateStore stateStore = master.getAssignmentManager().getRegionStateStore();

	// Put the online region in pending_close. It is actually already opened.
	// This is to simulate that the region close RPC is not sent out before failover
	RegionState oldState = regionStates.getRegionState(hriOnline);
	RegionState newState = new RegionState(
	hriOnline, State.PENDING_CLOSE, oldState.getServerName());
	stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);

	// Put the offline region in pending_open. It is actually not opened yet.
	// This is to simulate that the region open RPC is not sent out before failover
	oldState = new RegionState(hriOffline, State.OFFLINE);
	newState = new RegionState(hriOffline, State.PENDING_OPEN, newState.getServerName());
	stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);

	HRegionInfo failedClose = new HRegionInfo(offlineTable.getTableName(), null, null);
	createRegion(failedClose, rootdir, conf, offlineTable);
	MetaTableAccessor.addRegionToMeta(master.getConnection(), failedClose);

	oldState = new RegionState(failedClose, State.PENDING_CLOSE);
	newState = new RegionState(failedClose, State.FAILED_CLOSE, newState.getServerName());
	stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);

	HRegionInfo failedOpen = new HRegionInfo(offlineTable.getTableName(), null, null);
	createRegion(failedOpen, rootdir, conf, offlineTable);
	MetaTableAccessor.addRegionToMeta(master.getConnection(), failedOpen);

	// Simulate a region transitioning to failed open when the region server reports the
	// transition as FAILED_OPEN
	oldState = new RegionState(failedOpen, State.PENDING_OPEN);
	newState = new RegionState(failedOpen, State.FAILED_OPEN, newState.getServerName());
	stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);

	HRegionInfo failedOpenNullServer = new HRegionInfo(offlineTable.getTableName(), null, null);
	LOG.info("Failed open NUll server " + failedOpenNullServer.getEncodedName());
	createRegion(failedOpenNullServer, rootdir, conf, offlineTable);
	MetaTableAccessor.addRegionToMeta(master.getConnection(), failedOpenNullServer);

	// Simulate a region transitioning to failed open when the master couldn't find a plan for
	// the region
	oldState = new RegionState(failedOpenNullServer, State.OFFLINE);
	newState = new RegionState(failedOpenNullServer, State.FAILED_OPEN, null);
	stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);

	// Stop the master
	log("Aborting master");
	cluster.abortMaster(0);
	cluster.waitOnMaster(0);
	log("Master has aborted");

	// Start up a new master
	log("Starting up a new master");
	master = cluster.startMaster().getMaster();
	log("Waiting for master to be ready");
	cluster.waitForActiveAndReadyMaster();
	log("Master is ready");

	// Wait till no region in transition any more
	TEST_UTIL.waitUntilNoRegionsInTransition(60000);

	// Get new region states since master restarted
	regionStates = master.getAssignmentManager().getRegionStates();

	// Both pending_open (RPC sent/not yet) regions should be online
	assertTrue(regionStates.isRegionOnline(hriOffline));
	assertTrue(regionStates.isRegionOnline(hriOnline));
	assertTrue(regionStates.isRegionOnline(failedClose));
	assertTrue(regionStates.isRegionOnline(failedOpenNullServer));
	assertTrue(regionStates.isRegionOnline(failedOpen));

	log("Done with verification, shutting down cluster");

	// Done, shutdown the cluster
	TEST_UTIL.shutdownMiniCluster();
	}

	/**
	* Test meta in transition when master failover
	*/
	@Test(timeout = 180000)
	public void testMetaInTransitionWhenMasterFailover() throws Exception {
	final int NUM_MASTERS = 1;
	final int NUM_RS = 1;

	// Start the cluster
	HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
	TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
	MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
	log("Cluster started");

	log("Moving meta off the master");
	HMaster activeMaster = cluster.getMaster();
	HRegionServer rs = cluster.getRegionServer(0);
	ServerName metaServerName = cluster.getLiveRegionServerThreads()
	.get(0).getRegionServer().getServerName();
	activeMaster.move(HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes(),
	Bytes.toBytes(metaServerName.getServerName()));
	TEST_UTIL.waitUntilNoRegionsInTransition(60000);
	assertEquals("Meta should be assigned on expected regionserver",
	metaServerName, activeMaster.getMetaTableLocator()
	.getMetaRegionLocation(activeMaster.getZooKeeper()));

	// Now kill master, meta should remain on rs, where we placed it before.
	log("Aborting master");
	activeMaster.abort("test-kill");
	cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
	log("Master has aborted");

	// meta should remain where it was
	RegionState metaState =
	MetaTableLocator.getMetaRegionState(rs.getZooKeeper());
	assertEquals("hbase:meta should be onlined on RS",
	metaState.getServerName(), rs.getServerName());
	assertEquals("hbase:meta should be onlined on RS",
	metaState.getState(), State.OPEN);

	// Start up a new master
	log("Starting up a new master");
	activeMaster = cluster.startMaster().getMaster();
	log("Waiting for master to be ready");
	cluster.waitForActiveAndReadyMaster();
	log("Master is ready");

	// ensure meta is still deployed on RS
	metaState =
	MetaTableLocator.getMetaRegionState(activeMaster.getZooKeeper());
	assertEquals("hbase:meta should be onlined on RS",
	metaState.getServerName(), rs.getServerName());
	assertEquals("hbase:meta should be onlined on RS",
	metaState.getState(), State.OPEN);

	// Update meta state as PENDING_OPEN, then kill master
	// that simulates, that RS successfully deployed, but
	// RPC was lost right before failure.
	// region server should expire (how it can be verified?)
	MetaTableLocator.setMetaLocation(activeMaster.getZooKeeper(),
	rs.getServerName(), State.PENDING_OPEN);
	Region meta = rs.getFromOnlineRegions(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
	rs.removeFromOnlineRegions(meta, null);
	((HRegion)meta).close();

	log("Aborting master");
	activeMaster.abort("test-kill");
	cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
	log("Master has aborted");

	// Start up a new master
	log("Starting up a new master");
	activeMaster = cluster.startMaster().getMaster();
	log("Waiting for master to be ready");
	cluster.waitForActiveAndReadyMaster();
	log("Master is ready");

	TEST_UTIL.waitUntilNoRegionsInTransition(60000);
	log("Meta was assigned");

	metaState =
	MetaTableLocator.getMetaRegionState(activeMaster.getZooKeeper());
	assertEquals("hbase:meta should be onlined on RS",
	metaState.getServerName(), rs.getServerName());
	assertEquals("hbase:meta should be onlined on RS",
	metaState.getState(), State.OPEN);

	// Update meta state as PENDING_CLOSE, then kill master
	// that simulates, that RS successfully deployed, but
	// RPC was lost right before failure.
	// region server should expire (how it can be verified?)
	MetaTableLocator.setMetaLocation(activeMaster.getZooKeeper(),
	rs.getServerName(), State.PENDING_CLOSE);

	log("Aborting master");
	activeMaster.abort("test-kill");
	cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
	log("Master has aborted");

	rs.getRSRpcServices().closeRegion(null, RequestConverter.buildCloseRegionRequest(
	rs.getServerName(), HRegionInfo.FIRST_META_REGIONINFO.getEncodedName()));

	// Start up a new master
	log("Starting up a new master");
	activeMaster = cluster.startMaster().getMaster();
	log("Waiting for master to be ready");
	cluster.waitForActiveAndReadyMaster();
	log("Master is ready");

	TEST_UTIL.waitUntilNoRegionsInTransition(60000);
	log("Meta was assigned");

	// Done, shutdown the cluster
	TEST_UTIL.shutdownMiniCluster();
	}
	}