blob: f50ea1da55b40123e8df609ba6b2b861dbf3cb3c [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.contrib.failmon;
import java.net.InetAddress;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**********************************************************
* Objects of this class parse the output of smartmontools to
* gather information about the state of disks in the system. The
* smartmontools utility reads the S.M.A.R.T. attributes from
* the disk devices and reports them to the user. Note that since
* running smartctl requires superuser provileges, one should
* grand sudo privileges to the running user for the command smartctl
* (without a password). Alternatively, one can set up a cron job that
* periodically dumps the output of smartctl into a user-readable file.
* See the configuration file for details.
*
**********************************************************/
public class SMARTParser extends ShellParser {
String[] devices;
/**
* Constructs a SMARTParser and reads the list of disk
* devices to query
*/
public SMARTParser() {
super();
String devicesStr = Environment.getProperty("disks.list");
System.out.println("skato " + devicesStr);
if (devicesStr != null)
devices = devicesStr.split(",\\s*");
}
/**
* Reads and parses the output of smartctl for a specified disk and
* creates an appropriate EventRecord that holds the desirable
* information for it. Since the output of smartctl is different for
* different kinds of disks, we try to identify as many attributes as
* posssible for all known output formats.
*
* @param device the disk device name to query
*
* @return the EventRecord created
*/
public EventRecord query(String device) throws Exception {
String conf = Environment.getProperty("disks." + device + ".source");
StringBuffer sb;
if (conf == null)
sb = Environment.runCommand("sudo smartctl --all " + device);
else
sb = Environment.runCommand("cat " + conf);
EventRecord retval = new EventRecord(InetAddress.getLocalHost()
.getCanonicalHostName(), InetAddress.getAllByName(InetAddress.getLocalHost()
.getHostName()), Calendar.getInstance(), "SMART", "Unknown",
(conf == null ? "sudo smartctl --all " + device : "file " + conf), "-");
// IBM SCSI disks
retval.set("model", findPattern("Device\\s*:\\s*(.*)", sb.toString(), 1));
retval.set("serial", findPattern("Serial\\s+Number\\s*:\\s*(.*)", sb
.toString(), 1));
retval.set("firmware", findPattern("Firmware\\s+Version\\s*:\\s*(.*)", sb
.toString(), 1));
retval.set("capacity", findPattern("User\\s+Capacity\\s*:\\s*(.*)", sb
.toString(), 1));
retval.set("status", findPattern("SMART\\s*Health\\s*Status:\\s*(.*)", sb
.toString(), 1));
retval.set("current_temperature", findPattern(
"Current\\s+Drive\\s+Temperature\\s*:\\s*(.*)", sb.toString(), 1));
retval.set("trip_temperature", findPattern(
"Drive\\s+Trip\\s+Temperature\\s*:\\s*(.*)", sb.toString(), 1));
retval.set("start_stop_count", findPattern(
"start\\s+stop\\s+count\\s*:\\s*(\\d*)", sb.toString(), 1));
String[] var = { "read", "write", "verify" };
for (String s : var) {
retval.set(s + "_ecc_fast", findPattern(s + "\\s*:\\s*(\\d*)", sb
.toString(), 1));
retval.set(s + "_ecc_delayed", findPattern(s
+ "\\s*:\\s*(\\d+\\s+){1}(\\d+)", sb.toString(), 2));
retval.set(s + "_rereads", findPattern(
s + "\\s*:\\s*(\\d+\\s+){2}(\\d+)", sb.toString(), 2));
retval.set(s + "_GBs", findPattern(s
+ "\\s*:\\s*(\\d+\\s+){5}(\\d+.?\\d*)", sb.toString(), 2));
retval.set(s + "_uncorrected",
findPattern(s + "\\s*:\\s*(\\d+\\s+){5}(\\d+.?\\d*){1}\\s+(\\d+)", sb
.toString(), 3));
}
// Hitachi IDE, SATA
retval.set("model", findPattern("Device\\s*Model\\s*:\\s*(.*)", sb
.toString(), 1));
retval.set("serial", findPattern("Serial\\s+number\\s*:\\s*(.*)", sb
.toString(), 1));
retval.set("protocol", findPattern("Transport\\s+protocol\\s*:\\s*(.*)", sb
.toString(), 1));
retval.set("status", "PASSED".equalsIgnoreCase(findPattern(
"test\\s*result\\s*:\\s*(.*)", sb.toString(), 1)) ? "OK" : "FAILED");
readColumns(retval, sb);
return retval;
}
/**
* Reads attributes in the following format:
*
* ID# ATTRIBUTE_NAME FLAG VALUE WORST THRESH TYPE UPDATED WHEN_FAILED RAW_VALUE
* 3 Spin_Up_Time 0x0027 180 177 063 Pre-fail Always - 10265
* 4 Start_Stop_Count 0x0032 253 253 000 Old_age Always - 34
* 5 Reallocated_Sector_Ct 0x0033 253 253 063 Pre-fail Always - 0
* 6 Read_Channel_Margin 0x0001 253 253 100 Pre-fail Offline - 0
* 7 Seek_Error_Rate 0x000a 253 252 000 Old_age Always - 0
* 8 Seek_Time_Performance 0x0027 250 224 187 Pre-fail Always - 53894
* 9 Power_On_Minutes 0x0032 210 210 000 Old_age Always - 878h+00m
* 10 Spin_Retry_Count 0x002b 253 252 157 Pre-fail Always - 0
* 11 Calibration_Retry_Count 0x002b 253 252 223 Pre-fail Always - 0
* 12 Power_Cycle_Count 0x0032 253 253 000 Old_age Always - 49
* 192 PowerOff_Retract_Count 0x0032 253 253 000 Old_age Always - 0
* 193 Load_Cycle_Count 0x0032 253 253 000 Old_age Always - 0
* 194 Temperature_Celsius 0x0032 037 253 000 Old_age Always - 37
* 195 Hardware_ECC_Recovered 0x000a 253 252 000 Old_age Always - 2645
*
* This format is mostly found in IDE and SATA disks.
*
* @param er the EventRecord in which to store attributes found
* @param sb the StringBuffer with the text to parse
*
* @return the EventRecord in which new attributes are stored.
*/
private EventRecord readColumns(EventRecord er, StringBuffer sb) {
Pattern pattern = Pattern.compile("^\\s{0,2}(\\d{1,3}\\s+.*)$",
Pattern.MULTILINE);
Matcher matcher = pattern.matcher(sb);
while (matcher.find()) {
String[] tokens = matcher.group(1).split("\\s+");
boolean failed = false;
// check if this attribute is a failed one
if (!tokens[8].equals("-"))
failed = true;
er.set(tokens[1].toLowerCase(), (failed ? "FAILED:" : "") + tokens[9]);
}
return er;
}
/**
* Invokes query() to do the parsing and handles parsing errors for
* each one of the disks specified in the configuration.
*
* @return an array of EventRecords that holds one element that represents
* the current state of the disk devices.
*/
public EventRecord[] monitor() {
ArrayList<EventRecord> recs = new ArrayList<EventRecord>();
for (String device : devices) {
try {
recs.add(query(device));
} catch (Exception e) {
e.printStackTrace();
}
}
EventRecord[] T = new EventRecord[recs.size()];
return recs.toArray(T);
}
/**
* Return a String with information about this class
*
* @return A String describing this class
*/
public String getInfo() {
String retval = "S.M.A.R.T. disk attributes parser for disks ";
for (String device : devices)
retval += device + " ";
return retval;
}
}