hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterMetaBootstrap.java - hbase - Git at Google

 /**
  *
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.hadoop.hbase.master;

 import java.io.IOException;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
 import java.util.HashSet;
 import java.util.Set;

 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HRegionInfo;
 import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.classification.InterfaceAudience;
 import org.apache.hadoop.hbase.client.RegionReplicaUtil;
 import org.apache.hadoop.hbase.client.TableState;
 import org.apache.hadoop.hbase.monitoring.MonitoredTask;
 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode;
 import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
 import org.apache.zookeeper.KeeperException;

 /**
  * Used by the HMaster on startup to split meta logs and assign the meta table.
  */
 @InterfaceAudience.Private
 public class MasterMetaBootstrap {
   private static final Log LOG = LogFactory.getLog(MasterMetaBootstrap.class);

   private final MonitoredTask status;
   private final HMaster master;

   private Set<ServerName> previouslyFailedServers;
   private Set<ServerName> previouslyFailedMetaRSs;

   public MasterMetaBootstrap(final HMaster master, final MonitoredTask status) {
     this.master = master;
     this.status = status;
   }

   public void splitMetaLogsBeforeAssignment() throws IOException, KeeperException {
     // get a list for previously failed RS which need log splitting work
     // we recover hbase:meta region servers inside master initialization and
     // handle other failed servers in SSH in order to start up master node ASAP
     previouslyFailedServers = master.getMasterWalManager().getFailedServersFromLogFolders();

     // log splitting for hbase:meta server
     ServerName oldMetaServerLocation = master.getMetaTableLocator()
         .getMetaRegionLocation(master.getZooKeeper());
     if (oldMetaServerLocation != null && previouslyFailedServers.contains(oldMetaServerLocation)) {
       splitMetaLogBeforeAssignment(oldMetaServerLocation);
       // Note: we can't remove oldMetaServerLocation from previousFailedServers list because it
       // may also host user regions
     }
     previouslyFailedMetaRSs = getPreviouselyFailedMetaServersFromZK();
     // need to use union of previouslyFailedMetaRSs recorded in ZK and previouslyFailedServers
     // instead of previouslyFailedMetaRSs alone to address the following two situations:
     // 1) the chained failure situation(recovery failed multiple times in a row).
     // 2) master get killed right before it could delete the recovering hbase:meta from ZK while the
     // same server still has non-meta wals to be replayed so that
     // removeStaleRecoveringRegionsFromZK can't delete the stale hbase:meta region
     // Passing more servers into splitMetaLog is all right. If a server doesn't have hbase:meta wal,
     // there is no op for the server.
     previouslyFailedMetaRSs.addAll(previouslyFailedServers);
   }

   public void assignMeta() throws InterruptedException, IOException, KeeperException {
     assignMeta(previouslyFailedMetaRSs, HRegionInfo.DEFAULT_REPLICA_ID);
   }

   public void processDeadServers() throws IOException {
     // Master has recovered hbase:meta region server and we put
     // other failed region servers in a queue to be handled later by SSH
     for (ServerName tmpServer : previouslyFailedServers) {
       master.getServerManager().processDeadServer(tmpServer, true);
     }
   }

   public void assignMetaReplicas()
       throws IOException, InterruptedException, KeeperException {
     int numReplicas = master.getConfiguration().getInt(HConstants.META_REPLICAS_NUM,
            HConstants.DEFAULT_META_REPLICA_NUM);
     final Set<ServerName> EMPTY_SET = new HashSet<ServerName>();
     for (int i = 1; i < numReplicas; i++) {
       assignMeta(EMPTY_SET, i);
     }
     unassignExcessMetaReplica(numReplicas);
   }

   private void splitMetaLogBeforeAssignment(ServerName currentMetaServer) throws IOException {
     if (RecoveryMode.LOG_REPLAY == master.getMasterWalManager().getLogRecoveryMode()) {
       // In log replay mode, we mark hbase:meta region as recovering in ZK
       master.getMasterWalManager().prepareLogReplay(currentMetaServer,
         Collections.<HRegionInfo>singleton(HRegionInfo.FIRST_META_REGIONINFO));
     } else {
       // In recovered.edits mode: create recovered edits file for hbase:meta server
       master.getMasterWalManager().splitMetaLog(currentMetaServer);
     }
   }

   private void unassignExcessMetaReplica(int numMetaReplicasConfigured) {
     final ZooKeeperWatcher zooKeeper = master.getZooKeeper();
     // unassign the unneeded replicas (for e.g., if the previous master was configured
     // with a replication of 3 and now it is 2, we need to unassign the 1 unneeded replica)
     try {
       List<String> metaReplicaZnodes = zooKeeper.getMetaReplicaNodes();
       for (String metaReplicaZnode : metaReplicaZnodes) {
         int replicaId = zooKeeper.getMetaReplicaIdFromZnode(metaReplicaZnode);
         if (replicaId >= numMetaReplicasConfigured) {
           RegionState r = MetaTableLocator.getMetaRegionState(zooKeeper, replicaId);
           LOG.info("Closing excess replica of meta region " + r.getRegion());
           // send a close and wait for a max of 30 seconds
           ServerManager.closeRegionSilentlyAndWait(master.getClusterConnection(),
               r.getServerName(), r.getRegion(), 30000);
           ZKUtil.deleteNode(zooKeeper, zooKeeper.getZNodeForReplica(replicaId));
         }
       }
     } catch (Exception ex) {
       // ignore the exception since we don't want the master to be wedged due to potential
       // issues in the cleanup of the extra regions. We can do that cleanup via hbck or manually
       LOG.warn("Ignoring exception " + ex);
     }
   }

   /**
    * Check <code>hbase:meta</code> is assigned. If not, assign it.
    */
   protected void assignMeta(Set<ServerName> previouslyFailedMetaRSs, int replicaId)
       throws InterruptedException, IOException, KeeperException {
     final AssignmentManager assignmentManager = master.getAssignmentManager();

     // Work on meta region
     int assigned = 0;
     long timeout = master.getConfiguration().getLong("hbase.catalog.verification.timeout", 1000);
     if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) {
       status.setStatus("Assigning hbase:meta region");
     } else {
       status.setStatus("Assigning hbase:meta region, replicaId " + replicaId);
     }

     // Get current meta state from zk.
     RegionState metaState = MetaTableLocator.getMetaRegionState(master.getZooKeeper(), replicaId);
     HRegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(HRegionInfo.FIRST_META_REGIONINFO,
         replicaId);
     RegionStates regionStates = assignmentManager.getRegionStates();
     regionStates.createRegionState(hri, metaState.getState(),
         metaState.getServerName(), null);

     if (!metaState.isOpened() || !master.getMetaTableLocator().verifyMetaRegionLocation(
         master.getClusterConnection(), master.getZooKeeper(), timeout, replicaId)) {
       ServerName currentMetaServer = metaState.getServerName();
       if (master.getServerManager().isServerOnline(currentMetaServer)) {
         if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) {
           LOG.info("Meta was in transition on " + currentMetaServer);
         } else {
           LOG.info("Meta with replicaId " + replicaId + " was in transition on " +
                     currentMetaServer);
         }
         assignmentManager.processRegionsInTransition(Collections.singletonList(metaState));
       } else {
         if (currentMetaServer != null) {
           if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) {
             splitMetaLogBeforeAssignment(currentMetaServer);
             regionStates.logSplit(HRegionInfo.FIRST_META_REGIONINFO);
             previouslyFailedMetaRSs.add(currentMetaServer);
           }
         }
         LOG.info("Re-assigning hbase:meta with replicaId, " + replicaId +
             " it was on " + currentMetaServer);
         assignmentManager.assignMeta(hri);
       }
       assigned++;
     }

     if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) {
       // TODO: should we prevent from using state manager before meta was initialized?
       // tableStateManager.start();
       master.getTableStateManager()
         .setTableState(TableName.META_TABLE_NAME, TableState.State.ENABLED);
     }

     if ((RecoveryMode.LOG_REPLAY == master.getMasterWalManager().getLogRecoveryMode())
         && (!previouslyFailedMetaRSs.isEmpty())) {
       // replay WAL edits mode need new hbase:meta RS is assigned firstly
       status.setStatus("replaying log for Meta Region");
       master.getMasterWalManager().splitMetaLog(previouslyFailedMetaRSs);
     }

     assignmentManager.setEnabledTable(TableName.META_TABLE_NAME);
     master.getTableStateManager().start();

     // Make sure a hbase:meta location is set. We need to enable SSH here since
     // if the meta region server is died at this time, we need it to be re-assigned
     // by SSH so that system tables can be assigned.
     // No need to wait for meta is assigned = 0 when meta is just verified.
     if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) enableCrashedServerProcessing(assigned != 0);
     LOG.info("hbase:meta with replicaId " + replicaId + " assigned=" + assigned + ", location="
       + master.getMetaTableLocator().getMetaRegionLocation(master.getZooKeeper(), replicaId));
     status.setStatus("META assigned.");
   }

   private void enableCrashedServerProcessing(final boolean waitForMeta)
       throws IOException, InterruptedException {
     // If crashed server processing is disabled, we enable it and expire those dead but not expired
     // servers. This is required so that if meta is assigning to a server which dies after
     // assignMeta starts assignment, ServerCrashProcedure can re-assign it. Otherwise, we will be
     // stuck here waiting forever if waitForMeta is specified.
     if (!master.isServerCrashProcessingEnabled()) {
       master.setServerCrashProcessingEnabled(true);
       master.getServerManager().processQueuedDeadServers();
     }

     if (waitForMeta) {
       master.getMetaTableLocator().waitMetaRegionLocation(master.getZooKeeper());
     }
   }

   /**
    * This function returns a set of region server names under hbase:meta recovering region ZK node
    * @return Set of meta server names which were recorded in ZK
    */
   private Set<ServerName> getPreviouselyFailedMetaServersFromZK() throws KeeperException {
     final ZooKeeperWatcher zooKeeper = master.getZooKeeper();
     Set<ServerName> result = new HashSet<ServerName>();
     String metaRecoveringZNode = ZKUtil.joinZNode(zooKeeper.recoveringRegionsZNode,
       HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
     List<String> regionFailedServers = ZKUtil.listChildrenNoWatch(zooKeeper, metaRecoveringZNode);
     if (regionFailedServers == null) return result;

     for (String failedServer : regionFailedServers) {
       ServerName server = ServerName.parseServerName(failedServer);
       result.add(server);
     }
     return result;
   }
 }
	/**
	*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.hadoop.hbase.master;

	import java.io.IOException;
	import java.util.Arrays;
	import java.util.Collections;
	import java.util.List;
	import java.util.HashSet;
	import java.util.Set;

	import org.apache.commons.logging.Log;
	import org.apache.commons.logging.LogFactory;
	import org.apache.hadoop.hbase.HConstants;
	import org.apache.hadoop.hbase.HRegionInfo;
	import org.apache.hadoop.hbase.ServerName;
	import org.apache.hadoop.hbase.TableName;
	import org.apache.hadoop.hbase.classification.InterfaceAudience;
	import org.apache.hadoop.hbase.client.RegionReplicaUtil;
	import org.apache.hadoop.hbase.client.TableState;
	import org.apache.hadoop.hbase.monitoring.MonitoredTask;
	import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode;
	import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
	import org.apache.hadoop.hbase.zookeeper.ZKUtil;
	import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
	import org.apache.zookeeper.KeeperException;

	/**
	* Used by the HMaster on startup to split meta logs and assign the meta table.
	*/
	@InterfaceAudience.Private
	public class MasterMetaBootstrap {
	private static final Log LOG = LogFactory.getLog(MasterMetaBootstrap.class);

	private final MonitoredTask status;
	private final HMaster master;

	private Set<ServerName> previouslyFailedServers;
	private Set<ServerName> previouslyFailedMetaRSs;

	public MasterMetaBootstrap(final HMaster master, final MonitoredTask status) {
	this.master = master;
	this.status = status;
	}

	public void splitMetaLogsBeforeAssignment() throws IOException, KeeperException {
	// get a list for previously failed RS which need log splitting work
	// we recover hbase:meta region servers inside master initialization and
	// handle other failed servers in SSH in order to start up master node ASAP
	previouslyFailedServers = master.getMasterWalManager().getFailedServersFromLogFolders();

	// log splitting for hbase:meta server
	ServerName oldMetaServerLocation = master.getMetaTableLocator()
	.getMetaRegionLocation(master.getZooKeeper());
	if (oldMetaServerLocation != null && previouslyFailedServers.contains(oldMetaServerLocation)) {
	splitMetaLogBeforeAssignment(oldMetaServerLocation);
	// Note: we can't remove oldMetaServerLocation from previousFailedServers list because it
	// may also host user regions
	}
	previouslyFailedMetaRSs = getPreviouselyFailedMetaServersFromZK();
	// need to use union of previouslyFailedMetaRSs recorded in ZK and previouslyFailedServers
	// instead of previouslyFailedMetaRSs alone to address the following two situations:
	// 1) the chained failure situation(recovery failed multiple times in a row).
	// 2) master get killed right before it could delete the recovering hbase:meta from ZK while the
	// same server still has non-meta wals to be replayed so that
	// removeStaleRecoveringRegionsFromZK can't delete the stale hbase:meta region
	// Passing more servers into splitMetaLog is all right. If a server doesn't have hbase:meta wal,
	// there is no op for the server.
	previouslyFailedMetaRSs.addAll(previouslyFailedServers);
	}

	public void assignMeta() throws InterruptedException, IOException, KeeperException {
	assignMeta(previouslyFailedMetaRSs, HRegionInfo.DEFAULT_REPLICA_ID);
	}

	public void processDeadServers() throws IOException {
	// Master has recovered hbase:meta region server and we put
	// other failed region servers in a queue to be handled later by SSH
	for (ServerName tmpServer : previouslyFailedServers) {
	master.getServerManager().processDeadServer(tmpServer, true);
	}
	}

	public void assignMetaReplicas()
	throws IOException, InterruptedException, KeeperException {
	int numReplicas = master.getConfiguration().getInt(HConstants.META_REPLICAS_NUM,
	HConstants.DEFAULT_META_REPLICA_NUM);
	final Set<ServerName> EMPTY_SET = new HashSet<ServerName>();
	for (int i = 1; i < numReplicas; i++) {
	assignMeta(EMPTY_SET, i);
	}
	unassignExcessMetaReplica(numReplicas);
	}

	private void splitMetaLogBeforeAssignment(ServerName currentMetaServer) throws IOException {
	if (RecoveryMode.LOG_REPLAY == master.getMasterWalManager().getLogRecoveryMode()) {
	// In log replay mode, we mark hbase:meta region as recovering in ZK
	master.getMasterWalManager().prepareLogReplay(currentMetaServer,
	Collections.<HRegionInfo>singleton(HRegionInfo.FIRST_META_REGIONINFO));
	} else {
	// In recovered.edits mode: create recovered edits file for hbase:meta server
	master.getMasterWalManager().splitMetaLog(currentMetaServer);
	}
	}

	private void unassignExcessMetaReplica(int numMetaReplicasConfigured) {
	final ZooKeeperWatcher zooKeeper = master.getZooKeeper();
	// unassign the unneeded replicas (for e.g., if the previous master was configured
	// with a replication of 3 and now it is 2, we need to unassign the 1 unneeded replica)
	try {
	List<String> metaReplicaZnodes = zooKeeper.getMetaReplicaNodes();
	for (String metaReplicaZnode : metaReplicaZnodes) {
	int replicaId = zooKeeper.getMetaReplicaIdFromZnode(metaReplicaZnode);
	if (replicaId >= numMetaReplicasConfigured) {
	RegionState r = MetaTableLocator.getMetaRegionState(zooKeeper, replicaId);
	LOG.info("Closing excess replica of meta region " + r.getRegion());
	// send a close and wait for a max of 30 seconds
	ServerManager.closeRegionSilentlyAndWait(master.getClusterConnection(),
	r.getServerName(), r.getRegion(), 30000);
	ZKUtil.deleteNode(zooKeeper, zooKeeper.getZNodeForReplica(replicaId));
	}
	}
	} catch (Exception ex) {
	// ignore the exception since we don't want the master to be wedged due to potential
	// issues in the cleanup of the extra regions. We can do that cleanup via hbck or manually
	LOG.warn("Ignoring exception " + ex);
	}
	}

	/**
	* Check <code>hbase:meta</code> is assigned. If not, assign it.
	*/
	protected void assignMeta(Set<ServerName> previouslyFailedMetaRSs, int replicaId)
	throws InterruptedException, IOException, KeeperException {
	final AssignmentManager assignmentManager = master.getAssignmentManager();

	// Work on meta region
	int assigned = 0;
	long timeout = master.getConfiguration().getLong("hbase.catalog.verification.timeout", 1000);
	if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) {
	status.setStatus("Assigning hbase:meta region");
	} else {
	status.setStatus("Assigning hbase:meta region, replicaId " + replicaId);
	}

	// Get current meta state from zk.
	RegionState metaState = MetaTableLocator.getMetaRegionState(master.getZooKeeper(), replicaId);
	HRegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(HRegionInfo.FIRST_META_REGIONINFO,
	replicaId);
	RegionStates regionStates = assignmentManager.getRegionStates();
	regionStates.createRegionState(hri, metaState.getState(),
	metaState.getServerName(), null);

	if (!metaState.isOpened() \|\| !master.getMetaTableLocator().verifyMetaRegionLocation(
	master.getClusterConnection(), master.getZooKeeper(), timeout, replicaId)) {
	ServerName currentMetaServer = metaState.getServerName();
	if (master.getServerManager().isServerOnline(currentMetaServer)) {
	if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) {
	LOG.info("Meta was in transition on " + currentMetaServer);
	} else {
	LOG.info("Meta with replicaId " + replicaId + " was in transition on " +
	currentMetaServer);
	}
	assignmentManager.processRegionsInTransition(Collections.singletonList(metaState));
	} else {
	if (currentMetaServer != null) {
	if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) {
	splitMetaLogBeforeAssignment(currentMetaServer);
	regionStates.logSplit(HRegionInfo.FIRST_META_REGIONINFO);
	previouslyFailedMetaRSs.add(currentMetaServer);
	}
	}
	LOG.info("Re-assigning hbase:meta with replicaId, " + replicaId +
	" it was on " + currentMetaServer);
	assignmentManager.assignMeta(hri);
	}
	assigned++;
	}

	if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) {
	// TODO: should we prevent from using state manager before meta was initialized?
	// tableStateManager.start();
	master.getTableStateManager()
	.setTableState(TableName.META_TABLE_NAME, TableState.State.ENABLED);
	}

	if ((RecoveryMode.LOG_REPLAY == master.getMasterWalManager().getLogRecoveryMode())
	&& (!previouslyFailedMetaRSs.isEmpty())) {
	// replay WAL edits mode need new hbase:meta RS is assigned firstly
	status.setStatus("replaying log for Meta Region");
	master.getMasterWalManager().splitMetaLog(previouslyFailedMetaRSs);
	}

	assignmentManager.setEnabledTable(TableName.META_TABLE_NAME);
	master.getTableStateManager().start();

	// Make sure a hbase:meta location is set. We need to enable SSH here since
	// if the meta region server is died at this time, we need it to be re-assigned
	// by SSH so that system tables can be assigned.
	// No need to wait for meta is assigned = 0 when meta is just verified.
	if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) enableCrashedServerProcessing(assigned != 0);
	LOG.info("hbase:meta with replicaId " + replicaId + " assigned=" + assigned + ", location="
	+ master.getMetaTableLocator().getMetaRegionLocation(master.getZooKeeper(), replicaId));
	status.setStatus("META assigned.");
	}

	private void enableCrashedServerProcessing(final boolean waitForMeta)
	throws IOException, InterruptedException {
	// If crashed server processing is disabled, we enable it and expire those dead but not expired
	// servers. This is required so that if meta is assigning to a server which dies after
	// assignMeta starts assignment, ServerCrashProcedure can re-assign it. Otherwise, we will be
	// stuck here waiting forever if waitForMeta is specified.
	if (!master.isServerCrashProcessingEnabled()) {
	master.setServerCrashProcessingEnabled(true);
	master.getServerManager().processQueuedDeadServers();
	}

	if (waitForMeta) {
	master.getMetaTableLocator().waitMetaRegionLocation(master.getZooKeeper());
	}
	}

	/**
	* This function returns a set of region server names under hbase:meta recovering region ZK node
	* @return Set of meta server names which were recorded in ZK
	*/
	private Set<ServerName> getPreviouselyFailedMetaServersFromZK() throws KeeperException {
	final ZooKeeperWatcher zooKeeper = master.getZooKeeper();
	Set<ServerName> result = new HashSet<ServerName>();
	String metaRecoveringZNode = ZKUtil.joinZNode(zooKeeper.recoveringRegionsZNode,
	HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
	List<String> regionFailedServers = ZKUtil.listChildrenNoWatch(zooKeeper, metaRecoveringZNode);
	if (regionFailedServers == null) return result;

	for (String failedServer : regionFailedServers) {
	ServerName server = ServerName.parseServerName(failedServer);
	result.add(server);
	}
	return result;
	}
	}