| /** |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.hadoop.blockforensics; |
| |
| import java.io.IOException; |
| import java.util.Iterator; |
| import java.util.LinkedList; |
| import java.util.List; |
| import java.util.StringTokenizer; |
| import org.apache.hadoop.conf.Configuration; |
| import org.apache.hadoop.conf.Configured; |
| import org.apache.hadoop.fs.Path; |
| import org.apache.hadoop.io.IntWritable; |
| import org.apache.hadoop.io.LongWritable; |
| import org.apache.hadoop.io.Text; |
| import org.apache.hadoop.mapreduce.Job; |
| import org.apache.hadoop.mapreduce.Mapper; |
| import org.apache.hadoop.mapreduce.Reducer; |
| import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; |
| import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; |
| import org.apache.hadoop.util.Tool; |
| import org.apache.hadoop.util.ToolRunner; |
| |
| |
| /** |
| * BlockSearch is a mapred job that's designed to search input for appearances |
| * of strings. |
| * |
| * The syntax is: |
| * |
| * bin/hadoop jar [jar location] [hdfs input path] [hdfs output dir] |
| [comma delimited list of block ids] |
| * |
| * All arguments are required. |
| * |
| * This tool is designed to be used to search for one or more block ids in log |
| * files but can be used for general text search, assuming the search strings |
| * don't contain tokens. It assumes only one search string will appear per line. |
| */ |
| public class BlockSearch extends Configured implements Tool { |
| public static class Map extends Mapper<LongWritable, Text, Text, Text> { |
| private Text blockIdText = new Text(); |
| private Text valText = new Text(); |
| private List<String> blockIds = null; |
| |
| protected void setup(Context context) |
| throws IOException, InterruptedException { |
| Configuration conf = context.getConfiguration(); |
| StringTokenizer st = new StringTokenizer(conf.get("blockIds"), ","); |
| blockIds = new LinkedList<String>(); |
| while (st.hasMoreTokens()) { |
| String blockId = st.nextToken(); |
| blockIds.add(blockId); |
| } |
| } |
| |
| |
| public void map(LongWritable key, Text value, Context context) |
| throws IOException, InterruptedException { |
| if (blockIds == null) { |
| System.err.println("Error: No block ids specified"); |
| } else { |
| String valStr = value.toString(); |
| |
| for(String blockId: blockIds) { |
| if (valStr.indexOf(blockId) != -1) { |
| blockIdText.set(blockId); |
| valText.set(valStr); |
| context.write(blockIdText, valText); |
| break; // assume only one block id appears per line |
| } |
| } |
| } |
| |
| } |
| |
| } |
| |
| |
| public static class Reduce extends Reducer<Text, Text, Text, Text> { |
| private Text val = new Text(); |
| public void reduce(Text key, Iterator<Text> values, Context context) |
| throws IOException, InterruptedException { |
| while (values.hasNext()) { |
| context.write(key, values.next()); |
| } |
| } |
| } |
| |
| public int run(String[] args) throws Exception { |
| if (args.length < 3) { |
| System.out.println("BlockSearch <inLogs> <outDir> <comma delimited list of blocks>"); |
| ToolRunner.printGenericCommandUsage(System.out); |
| return 2; |
| } |
| |
| Configuration conf = getConf(); |
| conf.set("blockIds", args[2]); |
| |
| Job job = new Job(conf); |
| |
| job.setCombinerClass(Reduce.class); |
| job.setJarByClass(BlockSearch.class); |
| job.setJobName("BlockSearch"); |
| job.setMapperClass(Map.class); |
| job.setOutputKeyClass(Text.class); |
| job.setOutputValueClass(Text.class); |
| job.setReducerClass(Reduce.class); |
| |
| FileInputFormat.setInputPaths(job, new Path(args[0])); |
| FileOutputFormat.setOutputPath(job, new Path(args[1])); |
| |
| return job.waitForCompletion(true) ? 0 : 1; |
| } |
| |
| public static void main(String[] args) throws Exception { |
| int res = ToolRunner.run(new Configuration(), new BlockSearch(), args); |
| System.exit(res); |
| } |
| } |