blob: dc113949d5859c60e8d8b83ebdb8722e9bcbc760 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.contrib.failmon;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Properties;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.logging.*;
import org.apache.log4j.PropertyConfigurator;
/**********************************************************
* This class provides various methods for interaction with
* the configuration and the operating system environment. Also
* provides some helper methods for use by other classes in
* the package.
**********************************************************/
public class Environment {
public static final int DEFAULT_LOG_INTERVAL = 3600;
public static final int DEFAULT_POLL_INTERVAL = 360;
public static int MIN_INTERVAL = 5;
public static final int MAX_OUTPUT_LENGTH = 51200;
public static Log LOG;
static Properties fmProperties = new Properties();
static boolean superuser = false;
static boolean ready = false;
/**
* Initializes structures needed by other methods. Also determines
* whether the executing user has superuser privileges.
*
*/
public static void prepare(String fname) {
if (!"Linux".equalsIgnoreCase(System.getProperty("os.name"))) {
System.err.println("Linux system required for FailMon. Exiting...");
System.exit(0);
}
System.setProperty("log4j.configuration", "conf/log4j.properties");
PropertyConfigurator.configure("conf/log4j.properties");
LOG = LogFactory.getLog("org.apache.hadoop.contrib.failmon");
logInfo("********** FailMon started ***********");
// read parseState file
PersistentState.readState("conf/parsing.state");
try {
FileInputStream propFile = new FileInputStream(fname);
fmProperties.load(propFile);
propFile.close();
} catch (FileNotFoundException e1) {
e1.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
ready = true;
try {
String sudo_prompt = "passwd_needed:";
String echo_txt = "access_ok";
Process p = Runtime.getRuntime().exec("sudo -S -p " + sudo_prompt + " echo " + echo_txt );
InputStream inps = p.getInputStream();
InputStream errs = p.getErrorStream();
while (inps.available() < echo_txt.length() && errs.available() < sudo_prompt.length())
Thread.sleep(100);
byte [] buf;
String s;
if (inps.available() >= echo_txt.length()) {
buf = new byte[inps.available()];
inps.read(buf);
s = new String(buf);
if (s.startsWith(echo_txt)) {
superuser = true;
logInfo("Superuser privileges found!");
} else {
// no need to read errs
superuser = false;
logInfo("Superuser privileges not found.");
}
}
} catch (IOException e) {
e.printStackTrace();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
/**
* Fetches the value of a property from the configuration file.
*
* @param key the name of the property
*
* @return the value of the property, if it exists and
* null otherwise
*/
public static String getProperty(String key) {
if (!ready)
prepare("conf/failmon.properties");
return fmProperties.getProperty(key);
}
/**
* Sets the value of a property inthe configuration file.
*
* @param key the name of the property
* @param value the new value for the property
*
*/
public static void setProperty(String key, String value) {
fmProperties.setProperty(key, value);
}
/**
* Scans the configuration file to determine which monitoring
* utilities are available in the system. For each one of them, a
* job is created. All such jobs are scheduled and executed by
* Executor.
*
* @return an ArrayList that contains jobs to be executed by theExecutor.
*/
public static ArrayList<MonitorJob> getJobs() {
ArrayList<MonitorJob> monitors = new ArrayList<MonitorJob>();
int timeInt = 0;
// for Hadoop Log parsing
String [] fnames_r = getProperty("log.hadoop.filenames").split(",\\s*");
String tmp = getProperty("log.hadoop.enabled");
String [] fnames = expandDirs(fnames_r, ".*(.log).*");
timeInt = setValue("log.hadoop.interval", DEFAULT_LOG_INTERVAL);
if ("true".equalsIgnoreCase(tmp) && fnames[0] != null)
for (String fname : fnames) {
File f = new File(fname);
if (f.exists() && f.canRead()) {
monitors.add(new MonitorJob(new HadoopLogParser(fname), "hadoopLog", timeInt));
logInfo("Created Monitor for Hadoop log file: " + f.getAbsolutePath());
} else if (!f.exists())
logInfo("Skipping Hadoop log file " + fname + " (file not found)");
else
logInfo("Skipping Hadoop log file " + fname + " (permission denied)");
}
// for System Log parsing
fnames_r = getProperty("log.system.filenames").split(",\\s*");
tmp = getProperty("log.system.enabled");
fnames = expandDirs(fnames_r, ".*(messages).*");
timeInt = setValue("log.system.interval", DEFAULT_LOG_INTERVAL);
if ("true".equalsIgnoreCase(tmp))
for (String fname : fnames) {
File f = new File(fname);
if (f.exists() && f.canRead()) {
monitors.add(new MonitorJob(new SystemLogParser(fname), "systemLog", timeInt));
logInfo("Created Monitor for System log file: " + f.getAbsolutePath());
} else if (!f.exists())
logInfo("Skipping system log file " + fname + " (file not found)");
else
logInfo("Skipping system log file " + fname + " (permission denied)");
}
// for network interfaces
tmp = getProperty("nic.enabled");
timeInt = setValue("nics.interval", DEFAULT_POLL_INTERVAL);
if ("true".equalsIgnoreCase(tmp)) {
monitors.add(new MonitorJob(new NICParser(), "nics", timeInt));
logInfo("Created Monitor for NICs");
}
// for cpu
tmp = getProperty("cpu.enabled");
timeInt = setValue("cpu.interval", DEFAULT_POLL_INTERVAL);
if ("true".equalsIgnoreCase(tmp)) {
monitors.add(new MonitorJob(new CPUParser(), "cpu", timeInt));
logInfo("Created Monitor for CPUs");
}
// for disks
tmp = getProperty("disks.enabled");
timeInt = setValue("disks.interval", DEFAULT_POLL_INTERVAL);
if ("true".equalsIgnoreCase(tmp)) {
// check privileges if a disk with no disks./dev/xxx/.source is found
boolean smart_present = checkExistence("smartctl");
int disks_ok = 0;
String devicesStr = getProperty("disks.list");
String[] devices = null;
if (devicesStr != null)
devices = devicesStr.split(",\\s*");
for (int i = 0; i< devices.length; i++) {
boolean file_present = false;
boolean disk_present = false;
String fileloc = getProperty("disks." + devices[i] + ".source");
if (fileloc != null && fileloc.equalsIgnoreCase("true"))
file_present = true;
if (!file_present)
if (superuser) {
StringBuffer sb = runCommand("sudo smartctl -i " + devices[i]);
String patternStr = "[(failed)(device not supported)]";
Pattern pattern = Pattern.compile(patternStr);
Matcher matcher = pattern.matcher(sb.toString());
if (matcher.find(0))
disk_present = false;
else
disk_present = true;
}
if (file_present || (disk_present && smart_present)) {
disks_ok++;
} else
devices[i] = null;
}
// now remove disks that dont exist
StringBuffer resetSB = new StringBuffer();
for (int j = 0; j < devices.length; j++) {
resetSB.append(devices[j] == null ? "" : devices[j] + ", ");
if (devices[j] != null)
logInfo("Found S.M.A.R.T. attributes for disk " + devices[j]);
}
// fix the property
if (resetSB.length() >= 2)
setProperty("disks.list", resetSB.substring(0, resetSB.length() - 2));
if (disks_ok > 0) {
monitors.add(new MonitorJob(new SMARTParser(), "disks", timeInt));
logInfo("Created Monitor for S.M.A.R.T disk attributes");
}
}
// for lm-sensors
tmp = getProperty("sensors.enabled");
timeInt = setValue("sensors.interval", DEFAULT_POLL_INTERVAL);
if ("true".equalsIgnoreCase(tmp) && checkExistence("sensors")) {
monitors.add(new MonitorJob(new SensorsParser(), "sensors", timeInt));
logInfo("Created Monitor for lm-sensors output");
}
return monitors;
}
/**
* Determines the minimum interval at which the executor thread
* needs to wake upto execute jobs. Essentially, this is interval
* equals the GCD of intervals of all scheduled jobs.
*
* @param monitors the list of scheduled jobs
*
* @return the minimum interval between two scheduled jobs
*/
public static int getInterval(ArrayList<MonitorJob> monitors) {
String tmp = getProperty("executor.interval.min");
if (tmp != null)
MIN_INTERVAL = Integer.parseInt(tmp);
int[] monIntervals = new int[monitors.size()];
for (int i = 0; i < monitors.size(); i++)
monIntervals[i] = monitors.get(i).interval;
return Math.max(MIN_INTERVAL, gcd(monIntervals));
}
/**
* Checks whether a specific shell command is available
* in the system.
*
* @param cmd the command to check against
*
* @return true, if the command is availble, false otherwise
*/
public static boolean checkExistence(String cmd) {
StringBuffer sb = runCommand("which " + cmd);
if (sb.length() > 1)
return true;
return false;
}
/**
* Runs a shell command in the system and provides a StringBuffer
* with the output of the command.
*
* @param cmd an array of string that form the command to run
*
* @return a StringBuffer that contains the output of the command
*/
public static StringBuffer runCommand(String[] cmd) {
StringBuffer retval = new StringBuffer(MAX_OUTPUT_LENGTH);
Process p;
try {
p = Runtime.getRuntime().exec(cmd);
InputStream tmp = p.getInputStream();
p.waitFor();
int c;
while ((c = tmp.read()) != -1)
retval.append((char) c);
} catch (IOException e) {
e.printStackTrace();
} catch (InterruptedException e) {
e.printStackTrace();
}
return retval;
}
/**
* Runs a shell command in the system and provides a StringBuffer
* with the output of the command.
*
* @param cmd the command to run
*
* @return a StringBuffer that contains the output of the command
*/
public static StringBuffer runCommand(String cmd) {
return runCommand(cmd.split("\\s+"));
}
/**
* Determines the greatest common divisor (GCD) of two integers.
*
* @param m the first integer
* @param n the second integer
*
* @return the greatest common divisor of m and n
*/
public static int gcd(int m, int n) {
if (m == 0 && n == 0)
return 0;
if (m < n) {
int t = m;
m = n;
n = t;
}
int r = m % n;
if (r == 0) {
return n;
} else {
return gcd(n, r);
}
}
/**
* Determines the greatest common divisor (GCD) of a list
* of integers.
*
* @param numbers the list of integers to process
*
* @return the greatest common divisor of all numbers
*/
public static int gcd(int[] numbers) {
if (numbers.length == 1)
return numbers[0];
int g = gcd(numbers[0], numbers[1]);
for (int i = 2; i < numbers.length; i++)
g = gcd(g, numbers[i]);
return g;
}
private static String [] expandDirs(String [] input, String patternStr) {
ArrayList<String> fnames = new ArrayList<String>();
Pattern pattern = Pattern.compile(patternStr);
Matcher matcher;
File f;
for (String fname : input) {
f = new File(fname);
if (f.exists()) {
if (f.isDirectory()) {
// add all matching files
File [] fcs = f.listFiles();
for (File fc : fcs) {
matcher = pattern.matcher(fc.getName());
if (matcher.find() && fc.isFile())
fnames.add(fc.getAbsolutePath());
}
} else {
// normal file, just add to output
fnames.add(f.getAbsolutePath());
}
}
}
return fnames.toArray(input);
}
private static int setValue(String propname, int defaultValue) {
String v = getProperty(propname);
if (v != null)
return Integer.parseInt(v);
else
return defaultValue;
}
public static void logInfo(String str) {
LOG.info(str);
}
}