blob: 652cbc0eef97fed272a4bfc7b4a691acb98e3fa0 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer;
import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker;
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
import org.apache.zookeeper.KeeperException;
/**
* <p>Contains a set of methods for the collaboration between the start/stop scripts and the
* servers. It allows to delete immediately the znode when the master or the regions server crashes.
* The region server / master writes a specific file when it starts / becomes main master. When they
* end properly, they delete the file.</p>
* <p>In the script, we check for the existence of these files when the program ends. If they still
* exist we conclude that the server crashed, likely without deleting their znode. To have a faster
* recovery we delete immediately the znode.</p>
* <p>The strategy depends on the server type. For a region server we store the znode path in the
* file, and use it to delete it. for a master, as the znode path constant whatever the server, we
* check its content to make sure that the backup server is not now in charge.</p>
*/
public class ZNodeClearer {
private static final Log LOG = LogFactory.getLog(ZNodeClearer.class);
private ZNodeClearer() {}
/**
* Logs the errors without failing on exception.
*/
public static void writeMyEphemeralNodeOnDisk(String fileContent) {
String fileName = ZNodeClearer.getMyEphemeralNodeFileName();
if (fileName == null) {
LOG.warn("Environment variable HBASE_ZNODE_FILE not set; znodes will not be cleared " +
"on crash by start scripts (Longer MTTR!)");
return;
}
FileWriter fstream;
try {
fstream = new FileWriter(fileName);
} catch (IOException e) {
LOG.warn("Can't write znode file "+fileName, e);
return;
}
BufferedWriter out = new BufferedWriter(fstream);
try {
try {
out.write(fileContent + "\n");
} finally {
try {
out.close();
} finally {
fstream.close();
}
}
} catch (IOException e) {
LOG.warn("Can't write znode file "+fileName, e);
}
}
/**
* read the content of znode file, expects a single line.
*/
public static String readMyEphemeralNodeOnDisk() throws IOException {
String fileName = getMyEphemeralNodeFileName();
if (fileName == null){
throw new FileNotFoundException("No filename; set environment variable HBASE_ZNODE_FILE");
}
FileReader znodeFile = new FileReader(fileName);
BufferedReader br = null;
try {
br = new BufferedReader(znodeFile);
String file_content = br.readLine();
return file_content;
} finally {
if (br != null) br.close();
}
}
/**
* Get the name of the file used to store the znode contents
*/
public static String getMyEphemeralNodeFileName() {
return System.getenv().get("HBASE_ZNODE_FILE");
}
/**
* delete the znode file
*/
public static void deleteMyEphemeralNodeOnDisk() {
String fileName = getMyEphemeralNodeFileName();
if (fileName != null) {
new File(fileName).delete();
}
}
/**
* See HBASE-14861. We are extracting master ServerName from rsZnodePath
* example: "/hbase/rs/server.example.com,16020,1448266496481"
* @param rsZnodePath from HBASE_ZNODE_FILE
* @return String representation of ServerName or null if fails
*/
public static String parseMasterServerName(String rsZnodePath) {
String masterServerName = null;
try {
String[] rsZnodeParts = rsZnodePath.split("/");
masterServerName = rsZnodeParts[rsZnodeParts.length -1];
} catch (IndexOutOfBoundsException e) {
LOG.warn("String " + rsZnodePath + " has wrong fromat", e);
}
return masterServerName;
}
/**
*
* @return true if cluster is configured with master-rs collocation
*/
private static boolean tablesOnMaster(Configuration conf) {
boolean tablesOnMaster = true;
String confValue = conf.get(BaseLoadBalancer.TABLES_ON_MASTER);
if (confValue != null && confValue.equalsIgnoreCase("none")) {
tablesOnMaster = false;
}
return tablesOnMaster;
}
/**
* Delete the master znode if its content (ServerName string) is the same
* as the one in the znode file. (env: HBASE_ZNODE_FILE). I case of master-rs
* colloaction we extract ServerName string from rsZnode path.(HBASE-14861)
* @return true on successful deletion, false otherwise.
*/
public static boolean clear(Configuration conf) {
Configuration tempConf = new Configuration(conf);
tempConf.setInt("zookeeper.recovery.retry", 0);
ZooKeeperWatcher zkw;
try {
zkw = new ZooKeeperWatcher(tempConf, "clean znode for master",
new Abortable() {
@Override public void abort(String why, Throwable e) {}
@Override public boolean isAborted() { return false; }
});
} catch (IOException e) {
LOG.warn("Can't connect to zookeeper to read the master znode", e);
return false;
}
String znodeFileContent;
try {
znodeFileContent = ZNodeClearer.readMyEphemeralNodeOnDisk();
if(ZNodeClearer.tablesOnMaster(conf)) {
//In case of master crash also remove rsZnode since master is also regionserver
ZKUtil.deleteNodeFailSilent(zkw, znodeFileContent);
return MasterAddressTracker.deleteIfEquals(zkw,
ZNodeClearer.parseMasterServerName(znodeFileContent));
} else {
return MasterAddressTracker.deleteIfEquals(zkw, znodeFileContent);
}
} catch (FileNotFoundException fnfe) {
// If no file, just keep going -- return success.
LOG.warn("Can't find the znode file; presume non-fatal", fnfe);
return true;
} catch (IOException e) {
LOG.warn("Can't read the content of the znode file", e);
return false;
} catch (KeeperException e) {
LOG.warn("ZooKeeper exception deleting znode", e);
return false;
} finally {
zkw.close();
}
}
}