| /** |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.hadoop.contrib.failmon; |
| |
| import java.io.File; |
| import java.io.FileInputStream; |
| import java.io.FileNotFoundException; |
| import java.io.IOException; |
| import java.io.InputStream; |
| import java.util.ArrayList; |
| import java.util.Properties; |
| import java.util.regex.Matcher; |
| import java.util.regex.Pattern; |
| |
| import org.apache.commons.logging.*; |
| import org.apache.log4j.PropertyConfigurator; |
| |
| /********************************************************** |
| * This class provides various methods for interaction with |
| * the configuration and the operating system environment. Also |
| * provides some helper methods for use by other classes in |
| * the package. |
| **********************************************************/ |
| |
| public class Environment { |
| |
| public static final int DEFAULT_LOG_INTERVAL = 3600; |
| |
| public static final int DEFAULT_POLL_INTERVAL = 360; |
| |
| public static int MIN_INTERVAL = 5; |
| |
| public static final int MAX_OUTPUT_LENGTH = 51200; |
| |
| public static Log LOG; |
| |
| static Properties fmProperties = new Properties(); |
| |
| static boolean superuser = false; |
| |
| static boolean ready = false; |
| |
| /** |
| * Initializes structures needed by other methods. Also determines |
| * whether the executing user has superuser privileges. |
| * |
| */ |
| public static void prepare(String fname) { |
| |
| if (!"Linux".equalsIgnoreCase(System.getProperty("os.name"))) { |
| System.err.println("Linux system required for FailMon. Exiting..."); |
| System.exit(0); |
| } |
| |
| System.setProperty("log4j.configuration", "conf/log4j.properties"); |
| PropertyConfigurator.configure("conf/log4j.properties"); |
| LOG = LogFactory.getLog("org.apache.hadoop.contrib.failmon"); |
| logInfo("********** FailMon started ***********"); |
| |
| // read parseState file |
| PersistentState.readState("conf/parsing.state"); |
| |
| try { |
| FileInputStream propFile = new FileInputStream(fname); |
| fmProperties.load(propFile); |
| propFile.close(); |
| } catch (FileNotFoundException e1) { |
| e1.printStackTrace(); |
| } catch (IOException e) { |
| e.printStackTrace(); |
| } |
| ready = true; |
| |
| try { |
| String sudo_prompt = "passwd_needed:"; |
| String echo_txt = "access_ok"; |
| |
| Process p = Runtime.getRuntime().exec("sudo -S -p " + sudo_prompt + " echo " + echo_txt ); |
| InputStream inps = p.getInputStream(); |
| InputStream errs = p.getErrorStream(); |
| |
| while (inps.available() < echo_txt.length() && errs.available() < sudo_prompt.length()) |
| Thread.sleep(100); |
| |
| byte [] buf; |
| String s; |
| |
| if (inps.available() >= echo_txt.length()) { |
| buf = new byte[inps.available()]; |
| inps.read(buf); |
| s = new String(buf); |
| if (s.startsWith(echo_txt)) { |
| superuser = true; |
| logInfo("Superuser privileges found!"); |
| } else { |
| // no need to read errs |
| superuser = false; |
| logInfo("Superuser privileges not found."); |
| } |
| } |
| } catch (IOException e) { |
| e.printStackTrace(); |
| } catch (InterruptedException e) { |
| e.printStackTrace(); |
| } |
| } |
| |
| /** |
| * Fetches the value of a property from the configuration file. |
| * |
| * @param key the name of the property |
| * |
| * @return the value of the property, if it exists and |
| * null otherwise |
| */ |
| public static String getProperty(String key) { |
| if (!ready) |
| prepare("conf/failmon.properties"); |
| return fmProperties.getProperty(key); |
| } |
| |
| /** |
| * Sets the value of a property inthe configuration file. |
| * |
| * @param key the name of the property |
| * @param value the new value for the property |
| * |
| */ |
| |
| public static void setProperty(String key, String value) { |
| fmProperties.setProperty(key, value); |
| } |
| |
| /** |
| * Scans the configuration file to determine which monitoring |
| * utilities are available in the system. For each one of them, a |
| * job is created. All such jobs are scheduled and executed by |
| * Executor. |
| * |
| * @return an ArrayList that contains jobs to be executed by theExecutor. |
| */ |
| public static ArrayList<MonitorJob> getJobs() { |
| |
| ArrayList<MonitorJob> monitors = new ArrayList<MonitorJob>(); |
| int timeInt = 0; |
| |
| // for Hadoop Log parsing |
| String [] fnames_r = getProperty("log.hadoop.filenames").split(",\\s*"); |
| String tmp = getProperty("log.hadoop.enabled"); |
| |
| String [] fnames = expandDirs(fnames_r, ".*(.log).*"); |
| |
| timeInt = setValue("log.hadoop.interval", DEFAULT_LOG_INTERVAL); |
| |
| if ("true".equalsIgnoreCase(tmp) && fnames[0] != null) |
| for (String fname : fnames) { |
| File f = new File(fname); |
| if (f.exists() && f.canRead()) { |
| monitors.add(new MonitorJob(new HadoopLogParser(fname), "hadoopLog", timeInt)); |
| logInfo("Created Monitor for Hadoop log file: " + f.getAbsolutePath()); |
| } else if (!f.exists()) |
| logInfo("Skipping Hadoop log file " + fname + " (file not found)"); |
| else |
| logInfo("Skipping Hadoop log file " + fname + " (permission denied)"); |
| } |
| |
| |
| // for System Log parsing |
| fnames_r = getProperty("log.system.filenames").split(",\\s*"); |
| tmp = getProperty("log.system.enabled"); |
| |
| fnames = expandDirs(fnames_r, ".*(messages).*"); |
| |
| timeInt = setValue("log.system.interval", DEFAULT_LOG_INTERVAL); |
| |
| if ("true".equalsIgnoreCase(tmp)) |
| for (String fname : fnames) { |
| File f = new File(fname); |
| if (f.exists() && f.canRead()) { |
| monitors.add(new MonitorJob(new SystemLogParser(fname), "systemLog", timeInt)); |
| logInfo("Created Monitor for System log file: " + f.getAbsolutePath()); |
| } else if (!f.exists()) |
| logInfo("Skipping system log file " + fname + " (file not found)"); |
| else |
| logInfo("Skipping system log file " + fname + " (permission denied)"); |
| } |
| |
| |
| // for network interfaces |
| tmp = getProperty("nic.enabled"); |
| |
| timeInt = setValue("nics.interval", DEFAULT_POLL_INTERVAL); |
| |
| if ("true".equalsIgnoreCase(tmp)) { |
| monitors.add(new MonitorJob(new NICParser(), "nics", timeInt)); |
| logInfo("Created Monitor for NICs"); |
| } |
| |
| // for cpu |
| tmp = getProperty("cpu.enabled"); |
| |
| timeInt = setValue("cpu.interval", DEFAULT_POLL_INTERVAL); |
| |
| if ("true".equalsIgnoreCase(tmp)) { |
| monitors.add(new MonitorJob(new CPUParser(), "cpu", timeInt)); |
| logInfo("Created Monitor for CPUs"); |
| } |
| |
| // for disks |
| tmp = getProperty("disks.enabled"); |
| |
| timeInt = setValue("disks.interval", DEFAULT_POLL_INTERVAL); |
| |
| if ("true".equalsIgnoreCase(tmp)) { |
| // check privileges if a disk with no disks./dev/xxx/.source is found |
| boolean smart_present = checkExistence("smartctl"); |
| int disks_ok = 0; |
| String devicesStr = getProperty("disks.list"); |
| String[] devices = null; |
| |
| if (devicesStr != null) |
| devices = devicesStr.split(",\\s*"); |
| |
| for (int i = 0; i< devices.length; i++) { |
| boolean file_present = false; |
| boolean disk_present = false; |
| |
| String fileloc = getProperty("disks." + devices[i] + ".source"); |
| if (fileloc != null && fileloc.equalsIgnoreCase("true")) |
| file_present = true; |
| |
| if (!file_present) |
| if (superuser) { |
| StringBuffer sb = runCommand("sudo smartctl -i " + devices[i]); |
| String patternStr = "[(failed)(device not supported)]"; |
| Pattern pattern = Pattern.compile(patternStr); |
| Matcher matcher = pattern.matcher(sb.toString()); |
| if (matcher.find(0)) |
| disk_present = false; |
| else |
| disk_present = true; |
| } |
| if (file_present || (disk_present && smart_present)) { |
| disks_ok++; |
| } else |
| devices[i] = null; |
| } |
| |
| // now remove disks that dont exist |
| StringBuffer resetSB = new StringBuffer(); |
| for (int j = 0; j < devices.length; j++) { |
| resetSB.append(devices[j] == null ? "" : devices[j] + ", "); |
| if (devices[j] != null) |
| logInfo("Found S.M.A.R.T. attributes for disk " + devices[j]); |
| } |
| // fix the property |
| if (resetSB.length() >= 2) |
| setProperty("disks.list", resetSB.substring(0, resetSB.length() - 2)); |
| |
| if (disks_ok > 0) { |
| monitors.add(new MonitorJob(new SMARTParser(), "disks", timeInt)); |
| logInfo("Created Monitor for S.M.A.R.T disk attributes"); |
| } |
| } |
| |
| // for lm-sensors |
| tmp = getProperty("sensors.enabled"); |
| |
| timeInt = setValue("sensors.interval", DEFAULT_POLL_INTERVAL); |
| |
| if ("true".equalsIgnoreCase(tmp) && checkExistence("sensors")) { |
| monitors.add(new MonitorJob(new SensorsParser(), "sensors", timeInt)); |
| logInfo("Created Monitor for lm-sensors output"); |
| } |
| |
| return monitors; |
| } |
| |
| /** |
| * Determines the minimum interval at which the executor thread |
| * needs to wake upto execute jobs. Essentially, this is interval |
| * equals the GCD of intervals of all scheduled jobs. |
| * |
| * @param monitors the list of scheduled jobs |
| * |
| * @return the minimum interval between two scheduled jobs |
| */ |
| public static int getInterval(ArrayList<MonitorJob> monitors) { |
| String tmp = getProperty("executor.interval.min"); |
| if (tmp != null) |
| MIN_INTERVAL = Integer.parseInt(tmp); |
| |
| int[] monIntervals = new int[monitors.size()]; |
| |
| for (int i = 0; i < monitors.size(); i++) |
| monIntervals[i] = monitors.get(i).interval; |
| |
| return Math.max(MIN_INTERVAL, gcd(monIntervals)); |
| } |
| |
| /** |
| * Checks whether a specific shell command is available |
| * in the system. |
| * |
| * @param cmd the command to check against |
| * |
| * @return true, if the command is availble, false otherwise |
| */ |
| public static boolean checkExistence(String cmd) { |
| StringBuffer sb = runCommand("which " + cmd); |
| if (sb.length() > 1) |
| return true; |
| |
| return false; |
| } |
| |
| /** |
| * Runs a shell command in the system and provides a StringBuffer |
| * with the output of the command. |
| * |
| * @param cmd an array of string that form the command to run |
| * |
| * @return a StringBuffer that contains the output of the command |
| */ |
| public static StringBuffer runCommand(String[] cmd) { |
| StringBuffer retval = new StringBuffer(MAX_OUTPUT_LENGTH); |
| Process p; |
| try { |
| p = Runtime.getRuntime().exec(cmd); |
| InputStream tmp = p.getInputStream(); |
| p.waitFor(); |
| int c; |
| while ((c = tmp.read()) != -1) |
| retval.append((char) c); |
| } catch (IOException e) { |
| e.printStackTrace(); |
| } catch (InterruptedException e) { |
| e.printStackTrace(); |
| } |
| |
| return retval; |
| } |
| |
| /** |
| * Runs a shell command in the system and provides a StringBuffer |
| * with the output of the command. |
| * |
| * @param cmd the command to run |
| * |
| * @return a StringBuffer that contains the output of the command |
| */ |
| public static StringBuffer runCommand(String cmd) { |
| return runCommand(cmd.split("\\s+")); |
| } |
| |
| /** |
| * Determines the greatest common divisor (GCD) of two integers. |
| * |
| * @param m the first integer |
| * @param n the second integer |
| * |
| * @return the greatest common divisor of m and n |
| */ |
| public static int gcd(int m, int n) { |
| if (m == 0 && n == 0) |
| return 0; |
| if (m < n) { |
| int t = m; |
| m = n; |
| n = t; |
| } |
| int r = m % n; |
| if (r == 0) { |
| return n; |
| } else { |
| return gcd(n, r); |
| } |
| } |
| |
| /** |
| * Determines the greatest common divisor (GCD) of a list |
| * of integers. |
| * |
| * @param numbers the list of integers to process |
| * |
| * @return the greatest common divisor of all numbers |
| */ |
| public static int gcd(int[] numbers) { |
| |
| if (numbers.length == 1) |
| return numbers[0]; |
| |
| int g = gcd(numbers[0], numbers[1]); |
| |
| for (int i = 2; i < numbers.length; i++) |
| g = gcd(g, numbers[i]); |
| |
| return g; |
| } |
| |
| private static String [] expandDirs(String [] input, String patternStr) { |
| |
| ArrayList<String> fnames = new ArrayList<String>(); |
| Pattern pattern = Pattern.compile(patternStr); |
| Matcher matcher; |
| File f; |
| |
| for (String fname : input) { |
| f = new File(fname); |
| if (f.exists()) { |
| if (f.isDirectory()) { |
| // add all matching files |
| File [] fcs = f.listFiles(); |
| for (File fc : fcs) { |
| matcher = pattern.matcher(fc.getName()); |
| if (matcher.find() && fc.isFile()) |
| fnames.add(fc.getAbsolutePath()); |
| } |
| } else { |
| // normal file, just add to output |
| fnames.add(f.getAbsolutePath()); |
| } |
| } |
| } |
| return fnames.toArray(input); |
| } |
| |
| private static int setValue(String propname, int defaultValue) { |
| |
| String v = getProperty(propname); |
| |
| if (v != null) |
| return Integer.parseInt(v); |
| else |
| return defaultValue; |
| } |
| |
| |
| public static void logInfo(String str) { |
| LOG.info(str); |
| } |
| } |