blob: a0c024754bbdde4c764847a9f1e5a36caa55cec9 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.contrib.failmon;
import java.io.UnsupportedEncodingException;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
/**********************************************************
* This class provides anonymization to SerializedRecord objects. It
* anonymizes all hostnames, ip addresses and file names/paths
* that appear in EventRecords gathered from the logs
* and other system utilities. Such values are hashed using a
* cryptographically safe one-way-hash algorithm (MD5).
*
**********************************************************/
public class Anonymizer {
/**
* Anonymize hostnames, ip addresses and file names/paths
* that appear in fields of a SerializedRecord.
*
* @param sr the input SerializedRecord
*
* @return the anonymized SerializedRecord
*/
public static SerializedRecord anonymize(SerializedRecord sr)
throws Exception {
String hostname = sr.get("hostname");
if (hostname == null)
throw new Exception("Malformed SerializedRecord: no hostname found");
if ("true".equalsIgnoreCase(Environment
.getProperty("anonymizer.hash.hostnames"))) {
// hash the node's hostname
anonymizeField(sr, "message", hostname, "_hn_");
anonymizeField(sr, "hostname", hostname, "_hn_");
// hash all other hostnames
String suffix = Environment.getProperty("anonymizer.hostname.suffix");
if (suffix != null)
anonymizeField(sr, "message", "(\\S+\\.)*" + suffix, "_hn_");
}
if ("true".equalsIgnoreCase(Environment.getProperty("anonymizer.hash.ips"))) {
// hash all ip addresses
String ipPattern = "(\\d{1,3}\\.){3}\\d{1,3}";
anonymizeField(sr, "message", ipPattern, "_ip_");
anonymizeField(sr, "ips", ipPattern, "_ip_");
// if multiple ips are present for a node:
int i = 0;
while (sr.get("ips" + "#" + i) != null)
anonymizeField(sr, "ips" + "#" + i++, ipPattern, "_ip_");
if ("NIC".equalsIgnoreCase(sr.get("type")))
anonymizeField(sr, "ipAddress", ipPattern, "_ip_");
}
if ("true".equalsIgnoreCase(Environment
.getProperty("anonymizer.hash.filenames"))) {
// hash every filename present in messages
anonymizeField(sr, "message", "\\s+/(\\S+/)*[^:\\s]*", " _fn_");
anonymizeField(sr, "message", "\\s+hdfs://(\\S+/)*[^:\\s]*",
" hdfs://_fn_");
}
return sr;
}
/**
* Anonymize hostnames, ip addresses and file names/paths
* that appear in fields of an EventRecord, after it gets
* serialized into a SerializedRecord.
*
* @param er the input EventRecord
*
* @return the anonymized SerializedRecord
*/
public static SerializedRecord anonymize(EventRecord er) throws Exception {
return anonymize(new SerializedRecord(er));
}
private static String anonymizeField(SerializedRecord sr, String fieldName,
String pattern, String prefix) {
String txt = sr.get(fieldName);
if (txt == null)
return null;
else {
String anon = getMD5Hash(pattern);
sr.set(fieldName, txt.replaceAll(pattern, (prefix == null ? "" : prefix)
+ anon));
return txt;
}
}
/**
* Create the MD5 digest of an input text.
*
* @param text the input text
*
* @return the hexadecimal representation of the MD5 digest
*/
public static String getMD5Hash(String text) {
MessageDigest md;
byte[] md5hash = new byte[32];
try {
md = MessageDigest.getInstance("MD5");
md.update(text.getBytes("iso-8859-1"), 0, text.length());
md5hash = md.digest();
} catch (NoSuchAlgorithmException e) {
e.printStackTrace();
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
return convertToHex(md5hash);
}
private static String convertToHex(byte[] data) {
StringBuffer buf = new StringBuffer();
for (int i = 0; i < data.length; i++) {
int halfbyte = (data[i] >>> 4) & 0x0F;
int two_halfs = 0;
do {
if ((0 <= halfbyte) && (halfbyte <= 9))
buf.append((char) ('0' + halfbyte));
else
buf.append((char) ('a' + (halfbyte - 10)));
halfbyte = data[i] & 0x0F;
} while (two_halfs++ < 1);
}
return buf.toString();
}
}