blob: efb1cfd6107aba525c91f36570a8c5e8e86fa28f [file] [log] [blame]
/**
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.master;
import java.io.IOException;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.Server;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.ZNodeClearer;
import org.apache.hadoop.hbase.exceptions.DeserializationException;
import org.apache.hadoop.hbase.monitoring.MonitoredTask;
import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker;
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperListener;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
import org.apache.zookeeper.KeeperException;
/**
* Handles everything on master-side related to master election.
*
* <p>Listens and responds to ZooKeeper notifications on the master znode,
* both <code>nodeCreated</code> and <code>nodeDeleted</code>.
*
* <p>Contains blocking methods which will hold up backup masters, waiting
* for the active master to fail.
*
* <p>This class is instantiated in the HMaster constructor and the method
* #blockUntilBecomingActiveMaster() is called to wait until becoming
* the active master of the cluster.
*/
@InterfaceAudience.Private
public class ActiveMasterManager extends ZooKeeperListener {
private static final Log LOG = LogFactory.getLog(ActiveMasterManager.class);
final AtomicBoolean clusterHasActiveMaster = new AtomicBoolean(false);
final AtomicBoolean clusterShutDown = new AtomicBoolean(false);
private final ServerName sn;
private int infoPort;
private final Server master;
/**
* @param watcher
* @param sn ServerName
* @param master In an instance of a Master.
*/
ActiveMasterManager(ZooKeeperWatcher watcher, ServerName sn, Server master) {
super(watcher);
watcher.registerListener(this);
this.sn = sn;
this.master = master;
}
// will be set after jetty server is started
public void setInfoPort(int infoPort) {
this.infoPort = infoPort;
}
@Override
public void nodeCreated(String path) {
handle(path);
}
@Override
public void nodeDeleted(String path) {
// We need to keep track of the cluster's shutdown status while
// we wait on the current master. We consider that, if the cluster
// was already in a "shutdown" state when we started, that this master
// is part of a new cluster that was started shortly after the old cluster
// shut down, so that state is now irrelevant. This means that the shutdown
// state must be set while we wait on the active master in order
// to shutdown this master. See HBASE-8519.
if(path.equals(watcher.clusterStateZNode) && !master.isStopped()) {
clusterShutDown.set(true);
}
handle(path);
}
void handle(final String path) {
if (path.equals(watcher.getMasterAddressZNode()) && !master.isStopped()) {
handleMasterNodeChange();
}
}
/**
* Handle a change in the master node. Doesn't matter whether this was called
* from a nodeCreated or nodeDeleted event because there are no guarantees
* that the current state of the master node matches the event at the time of
* our next ZK request.
*
* <p>Uses the watchAndCheckExists method which watches the master address node
* regardless of whether it exists or not. If it does exist (there is an
* active master), it returns true. Otherwise it returns false.
*
* <p>A watcher is set which guarantees that this method will get called again if
* there is another change in the master node.
*/
private void handleMasterNodeChange() {
// Watch the node and check if it exists.
try {
synchronized(clusterHasActiveMaster) {
if (ZKUtil.watchAndCheckExists(watcher, watcher.getMasterAddressZNode())) {
// A master node exists, there is an active master
LOG.debug("A master is now available");
clusterHasActiveMaster.set(true);
} else {
// Node is no longer there, cluster does not have an active master
LOG.debug("No master available. Notifying waiting threads");
clusterHasActiveMaster.set(false);
// Notify any thread waiting to become the active master
clusterHasActiveMaster.notifyAll();
}
}
} catch (KeeperException ke) {
master.abort("Received an unexpected KeeperException, aborting", ke);
}
}
/**
* Block until becoming the active master.
*
* Method blocks until there is not another active master and our attempt
* to become the new active master is successful.
*
* This also makes sure that we are watching the master znode so will be
* notified if another master dies.
* @param checkInterval the interval to check if the master is stopped
* @param startupStatus the monitor status to track the progress
* @return True if no issue becoming active master else false if another
* master was running or if some other problem (zookeeper, stop flag has been
* set on this Master)
*/
boolean blockUntilBecomingActiveMaster(
int checkInterval, MonitoredTask startupStatus) {
String backupZNode = ZKUtil.joinZNode(
this.watcher.backupMasterAddressesZNode, this.sn.toString());
while (!(master.isAborted() || master.isStopped())) {
startupStatus.setStatus("Trying to register in ZK as active master");
// Try to become the active master, watch if there is another master.
// Write out our ServerName as versioned bytes.
try {
if (MasterAddressTracker.setMasterAddress(this.watcher,
this.watcher.getMasterAddressZNode(), this.sn, infoPort)) {
// If we were a backup master before, delete our ZNode from the backup
// master directory since we are the active now)
if (ZKUtil.checkExists(this.watcher, backupZNode) != -1) {
LOG.info("Deleting ZNode for " + backupZNode + " from backup master directory");
ZKUtil.deleteNodeFailSilent(this.watcher, backupZNode);
}
// Save the znode in a file, this will allow to check if we crash in the launch scripts
ZNodeClearer.writeMyEphemeralNodeOnDisk(this.sn.toString());
// We are the master, return
startupStatus.setStatus("Successfully registered as active master.");
this.clusterHasActiveMaster.set(true);
LOG.info("Registered Active Master=" + this.sn);
return true;
}
// There is another active master running elsewhere or this is a restart
// and the master ephemeral node has not expired yet.
this.clusterHasActiveMaster.set(true);
String msg;
byte[] bytes =
ZKUtil.getDataAndWatch(this.watcher, this.watcher.getMasterAddressZNode());
if (bytes == null) {
msg = ("A master was detected, but went down before its address " +
"could be read. Attempting to become the next active master");
} else {
ServerName currentMaster;
try {
currentMaster = ServerName.parseFrom(bytes);
} catch (DeserializationException e) {
LOG.warn("Failed parse", e);
// Hopefully next time around we won't fail the parse. Dangerous.
continue;
}
if (ServerName.isSameHostnameAndPort(currentMaster, this.sn)) {
msg = ("Current master has this master's address, " +
currentMaster + "; master was restarted? Deleting node.");
// Hurry along the expiration of the znode.
ZKUtil.deleteNode(this.watcher, this.watcher.getMasterAddressZNode());
// We may have failed to delete the znode at the previous step, but
// we delete the file anyway: a second attempt to delete the znode is likely to fail again.
ZNodeClearer.deleteMyEphemeralNodeOnDisk();
} else {
msg = "Another master is the active master, " + currentMaster +
"; waiting to become the next active master";
}
}
LOG.info(msg);
startupStatus.setStatus(msg);
} catch (KeeperException ke) {
master.abort("Received an unexpected KeeperException, aborting", ke);
return false;
}
synchronized (this.clusterHasActiveMaster) {
while (clusterHasActiveMaster.get() && !master.isStopped()) {
try {
clusterHasActiveMaster.wait(checkInterval);
} catch (InterruptedException e) {
// We expect to be interrupted when a master dies,
// will fall out if so
LOG.debug("Interrupted waiting for master to die", e);
}
}
if (clusterShutDown.get()) {
this.master.stop(
"Cluster went down before this master became active");
}
}
}
return false;
}
/**
* @return True if cluster has an active master.
*/
boolean hasActiveMaster() {
try {
if (ZKUtil.checkExists(watcher, watcher.getMasterAddressZNode()) >= 0) {
return true;
}
}
catch (KeeperException ke) {
LOG.info("Received an unexpected KeeperException when checking " +
"isActiveMaster : "+ ke);
}
return false;
}
public void stop() {
try {
synchronized (clusterHasActiveMaster) {
// Master is already stopped, wake up the manager
// thread so that it can shutdown soon.
clusterHasActiveMaster.notifyAll();
}
// If our address is in ZK, delete it on our way out
ServerName activeMaster = null;
try {
activeMaster = MasterAddressTracker.getMasterAddress(this.watcher);
} catch (IOException e) {
LOG.warn("Failed get of master address: " + e.toString());
}
if (activeMaster != null && activeMaster.equals(this.sn)) {
ZKUtil.deleteNode(watcher, watcher.getMasterAddressZNode());
// We may have failed to delete the znode at the previous step, but
// we delete the file anyway: a second attempt to delete the znode is likely to fail again.
ZNodeClearer.deleteMyEphemeralNodeOnDisk();
}
} catch (KeeperException e) {
LOG.error(this.watcher.prefix("Error deleting our own master address node"), e);
}
}
}