src/test/mapred/org/apache/hadoop/fs/slive/SliveTest.java - hadoop-mapreduce - Git at Google

 /**
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.hadoop.fs.slive;

 import java.io.BufferedReader;
 import java.io.DataInputStream;
 import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.io.PrintWriter;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
 import java.util.TreeMap;

 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.slive.ArgumentParser.ParsedOutput;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapred.FileOutputFormat;
 import org.apache.hadoop.mapred.JobClient;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.TextOutputFormat;
 import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.util.ToolRunner;

 /**
  * Slive test entry point + main program
  *
  * This program will output a help message given -help which can be used to
  * determine the program options and configuration which will affect the program
  * runtime. The program will take these options, either from configuration or
  * command line and process them (and merge) and then establish a job which will
  * thereafter run a set of mappers & reducers and then the output of the
  * reduction will be reported on.
  *
  * The number of maps is specified by "slive.maps".
  * The number of reduces is specified by "slive.reduces".
  */
 @SuppressWarnings("deprecation")
 public class SliveTest implements Tool {

   private static final Log LOG = LogFactory.getLog(SliveTest.class);

   // ensures the hdfs configurations are loaded if they exist
   static {
     Configuration.addDefaultResource("hdfs-default.xml");
     Configuration.addDefaultResource("hdfs-site.xml");
   }

   private Configuration base;

   public SliveTest(Configuration base) {
     this.base = base;
   }

   public int run(String[] args) {
     ParsedOutput parsedOpts = null;
     try {
       ArgumentParser argHolder = new ArgumentParser(args);
       parsedOpts = argHolder.parse();
       if (parsedOpts.shouldOutputHelp()) {
         parsedOpts.outputHelp();
         return 1;
       }
     } catch (Exception e) {
       LOG.error("Unable to parse arguments due to error: ", e);
       return 1;
     }
     LOG.info("Running with option list " + Helper.stringifyArray(args, " "));
     ConfigExtractor config = null;
     try {
       ConfigMerger cfgMerger = new ConfigMerger();
       Configuration cfg = cfgMerger.getMerged(parsedOpts,
                                               new Configuration(base));
       if (cfg != null) {
         config = new ConfigExtractor(cfg);
       }
     } catch (Exception e) {
       LOG.error("Unable to merge config due to error: ", e);
       return 1;
     }
     if (config == null) {
       LOG.error("Unable to merge config & options!");
       return 1;
     }
     try {
       LOG.info("Options are:");
       ConfigExtractor.dumpOptions(config);
     } catch (Exception e) {
       LOG.error("Unable to dump options due to error: ", e);
       return 1;
     }
     boolean jobOk = false;
     try {
       LOG.info("Running job:");
       runJob(config);
       jobOk = true;
     } catch (Exception e) {
       LOG.error("Unable to run job due to error: ", e);
     }
     if (jobOk) {
       try {
         LOG.info("Reporting on job:");
         writeReport(config);
       } catch (Exception e) {
         LOG.error("Unable to report on job due to error: ", e);
       }
     }
     // attempt cleanup (not critical)
     boolean cleanUp = getBool(parsedOpts
         .getValue(ConfigOption.CLEANUP.getOpt()));
     if (cleanUp) {
       try {
         LOG.info("Cleaning up job:");
         cleanup(config);
       } catch (Exception e) {
         LOG.error("Unable to cleanup job due to error: ", e);
       }
     }
     // all mostly worked
     if (jobOk) {
       return 0;
     }
     // maybe didn't work
     return 1;
   }

   /**
    * Checks if a string is a boolean or not and what type
    *
    * @param val
    *          val to check
    * @return boolean
    */
   private boolean getBool(String val) {
     if (val == null) {
       return false;
     }
     String cleanupOpt = val.toLowerCase().trim();
     if (cleanupOpt.equals("true") || cleanupOpt.equals("1")) {
       return true;
     } else {
       return false;
     }
   }

   /**
    * Sets up a job conf for the given job using the given config object. Ensures
    * that the correct input format is set, the mapper and and reducer class and
    * the input and output keys and value classes along with any other job
    * configuration.
    *
    * @param config
    * @return JobConf representing the job to be ran
    * @throws IOException
    */
   private JobConf getJob(ConfigExtractor config) throws IOException {
     JobConf job = new JobConf(config.getConfig(), SliveTest.class);
     job.setInputFormat(DummyInputFormat.class);
     FileOutputFormat.setOutputPath(job, config.getOutputPath());
     job.setMapperClass(SliveMapper.class);
     job.setPartitionerClass(SlivePartitioner.class);
     job.setReducerClass(SliveReducer.class);
     job.setOutputKeyClass(Text.class);
     job.setOutputValueClass(Text.class);
     job.setOutputFormat(TextOutputFormat.class);
     TextOutputFormat.setCompressOutput(job, false);
     job.setNumReduceTasks(config.getReducerAmount());
     job.setNumMapTasks(config.getMapAmount());
     return job;
   }

   /**
    * Runs the job given the provided config
    *
    * @param config
    *          the config to run the job with
    *
    * @throws IOException
    *           if can not run the given job
    */
   private void runJob(ConfigExtractor config) throws IOException {
     JobClient.runJob(getJob(config));
   }

   /**
    * Attempts to write the report to the given output using the specified
    * config. It will open up the expected reducer output file and read in its
    * contents and then split up by operation output and sort by operation type
    * and then for each operation type it will generate a report to the specified
    * result file and the console.
    *
    * @param cfg
    *          the config specifying the files and output
    *
    * @throws Exception
    *           if files can not be opened/closed/read or invalid format
    */
   private void writeReport(ConfigExtractor cfg) throws Exception {
     Path dn = cfg.getOutputPath();
     LOG.info("Writing report using contents of " + dn);
     FileSystem fs = FileSystem.get(cfg.getConfig());
     FileStatus[] reduceFiles = fs.listStatus(dn);
     BufferedReader fileReader = null;
     PrintWriter reportWriter = null;
     try {
       List<OperationOutput> noOperations = new ArrayList<OperationOutput>();
       Map<String, List<OperationOutput>> splitTypes = new TreeMap<String, List<OperationOutput>>();
       for(FileStatus fn : reduceFiles) {
         if(!fn.getPath().getName().startsWith("part")) continue;
         fileReader = new BufferedReader(new InputStreamReader(
             new DataInputStream(fs.open(fn.getPath()))));
         String line;
         while ((line = fileReader.readLine()) != null) {
           String pieces[] = line.split("\t", 2);
           if (pieces.length == 2) {
             OperationOutput data = new OperationOutput(pieces[0], pieces[1]);
             String op = (data.getOperationType());
             if (op != null) {
               List<OperationOutput> opList = splitTypes.get(op);
               if (opList == null) {
                 opList = new ArrayList<OperationOutput>();
               }
               opList.add(data);
               splitTypes.put(op, opList);
             } else {
               noOperations.add(data);
             }
           } else {
             throw new IOException("Unparseable line " + line);
           }
         }
         fileReader.close();
         fileReader = null;
       }
       File resFile = null;
       if (cfg.getResultFile() != null) {
         resFile = new File(cfg.getResultFile());
       }
       if (resFile != null) {
         LOG.info("Report results being placed to logging output and to file "
             + resFile.getCanonicalPath());
         reportWriter = new PrintWriter(new FileOutputStream(resFile));
       } else {
         LOG.info("Report results being placed to logging output");
       }
       ReportWriter reporter = new ReportWriter();
       if (!noOperations.isEmpty()) {
         reporter.basicReport(noOperations, reportWriter);
       }
       for (String opType : splitTypes.keySet()) {
         reporter.opReport(opType, splitTypes.get(opType), reportWriter);
       }
     } finally {
       if (fileReader != null) {
         fileReader.close();
       }
       if (reportWriter != null) {
         reportWriter.close();
       }
     }
   }

   /**
    * Cleans up the base directory by removing it
    *
    * @param cfg
    *          ConfigExtractor which has location of base directory
    *
    * @throws IOException
    */
   private void cleanup(ConfigExtractor cfg) throws IOException {
     FileSystem fs = FileSystem.get(cfg.getConfig());
     Path base = cfg.getBaseDirectory();
     if (base != null) {
       LOG.info("Attempting to recursively delete " + base);
       fs.delete(base, true);
     }
   }

   /**
    * The main program entry point. Sets up and parses the command line options,
    * then merges those options and then dumps those options and the runs the
    * corresponding map/reduce job that those operations represent and then
    * writes the report for the output of the run that occurred.
    *
    * @param args
    *          command line options
    */
   public static void main(String[] args) throws Exception {
     Configuration startCfg = new Configuration(true);
     SliveTest runner = new SliveTest(startCfg);
     int ec = ToolRunner.run(runner, args);
     System.exit(ec);
   }

   @Override // Configurable
   public Configuration getConf() {
     return this.base;
   }

   @Override // Configurable
   public void setConf(Configuration conf) {
     this.base = conf;
   }
 }
	/**
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package org.apache.hadoop.fs.slive;

	import java.io.BufferedReader;
	import java.io.DataInputStream;
	import java.io.File;
	import java.io.FileOutputStream;
	import java.io.IOException;
	import java.io.InputStreamReader;
	import java.io.PrintWriter;
	import java.util.ArrayList;
	import java.util.List;
	import java.util.Map;
	import java.util.TreeMap;

	import org.apache.commons.logging.Log;
	import org.apache.commons.logging.LogFactory;
	import org.apache.hadoop.conf.Configuration;
	import org.apache.hadoop.fs.FileStatus;
	import org.apache.hadoop.fs.FileSystem;
	import org.apache.hadoop.fs.Path;
	import org.apache.hadoop.fs.slive.ArgumentParser.ParsedOutput;
	import org.apache.hadoop.io.Text;
	import org.apache.hadoop.mapred.FileOutputFormat;
	import org.apache.hadoop.mapred.JobClient;
	import org.apache.hadoop.mapred.JobConf;
	import org.apache.hadoop.mapred.TextOutputFormat;
	import org.apache.hadoop.util.Tool;
	import org.apache.hadoop.util.ToolRunner;

	/**
	* Slive test entry point + main program
	*
	* This program will output a help message given -help which can be used to
	* determine the program options and configuration which will affect the program
	* runtime. The program will take these options, either from configuration or
	* command line and process them (and merge) and then establish a job which will
	* thereafter run a set of mappers & reducers and then the output of the
	* reduction will be reported on.
	*
	* The number of maps is specified by "slive.maps".
	* The number of reduces is specified by "slive.reduces".
	*/
	@SuppressWarnings("deprecation")
	public class SliveTest implements Tool {

	private static final Log LOG = LogFactory.getLog(SliveTest.class);

	// ensures the hdfs configurations are loaded if they exist
	static {
	Configuration.addDefaultResource("hdfs-default.xml");
	Configuration.addDefaultResource("hdfs-site.xml");
	}

	private Configuration base;

	public SliveTest(Configuration base) {
	this.base = base;
	}

	public int run(String[] args) {
	ParsedOutput parsedOpts = null;
	try {
	ArgumentParser argHolder = new ArgumentParser(args);
	parsedOpts = argHolder.parse();
	if (parsedOpts.shouldOutputHelp()) {
	parsedOpts.outputHelp();
	return 1;
	}
	} catch (Exception e) {
	LOG.error("Unable to parse arguments due to error: ", e);
	return 1;
	}
	LOG.info("Running with option list " + Helper.stringifyArray(args, " "));
	ConfigExtractor config = null;
	try {
	ConfigMerger cfgMerger = new ConfigMerger();
	Configuration cfg = cfgMerger.getMerged(parsedOpts,
	new Configuration(base));
	if (cfg != null) {
	config = new ConfigExtractor(cfg);
	}
	} catch (Exception e) {
	LOG.error("Unable to merge config due to error: ", e);
	return 1;
	}
	if (config == null) {
	LOG.error("Unable to merge config & options!");
	return 1;
	}
	try {
	LOG.info("Options are:");
	ConfigExtractor.dumpOptions(config);
	} catch (Exception e) {
	LOG.error("Unable to dump options due to error: ", e);
	return 1;
	}
	boolean jobOk = false;
	try {
	LOG.info("Running job:");
	runJob(config);
	jobOk = true;
	} catch (Exception e) {
	LOG.error("Unable to run job due to error: ", e);
	}
	if (jobOk) {
	try {
	LOG.info("Reporting on job:");
	writeReport(config);
	} catch (Exception e) {
	LOG.error("Unable to report on job due to error: ", e);
	}
	}
	// attempt cleanup (not critical)
	boolean cleanUp = getBool(parsedOpts
	.getValue(ConfigOption.CLEANUP.getOpt()));
	if (cleanUp) {
	try {
	LOG.info("Cleaning up job:");
	cleanup(config);
	} catch (Exception e) {
	LOG.error("Unable to cleanup job due to error: ", e);
	}
	}
	// all mostly worked
	if (jobOk) {
	return 0;
	}
	// maybe didn't work
	return 1;
	}

	/**
	* Checks if a string is a boolean or not and what type
	*
	* @param val
	* val to check
	* @return boolean
	*/
	private boolean getBool(String val) {
	if (val == null) {
	return false;
	}
	String cleanupOpt = val.toLowerCase().trim();
	if (cleanupOpt.equals("true") \|\| cleanupOpt.equals("1")) {
	return true;
	} else {
	return false;
	}
	}

	/**
	* Sets up a job conf for the given job using the given config object. Ensures
	* that the correct input format is set, the mapper and and reducer class and
	* the input and output keys and value classes along with any other job
	* configuration.
	*
	* @param config
	* @return JobConf representing the job to be ran
	* @throws IOException
	*/
	private JobConf getJob(ConfigExtractor config) throws IOException {
	JobConf job = new JobConf(config.getConfig(), SliveTest.class);
	job.setInputFormat(DummyInputFormat.class);
	FileOutputFormat.setOutputPath(job, config.getOutputPath());
	job.setMapperClass(SliveMapper.class);
	job.setPartitionerClass(SlivePartitioner.class);
	job.setReducerClass(SliveReducer.class);
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(Text.class);
	job.setOutputFormat(TextOutputFormat.class);
	TextOutputFormat.setCompressOutput(job, false);
	job.setNumReduceTasks(config.getReducerAmount());
	job.setNumMapTasks(config.getMapAmount());
	return job;
	}

	/**
	* Runs the job given the provided config
	*
	* @param config
	* the config to run the job with
	*
	* @throws IOException
	* if can not run the given job
	*/
	private void runJob(ConfigExtractor config) throws IOException {
	JobClient.runJob(getJob(config));
	}

	/**
	* Attempts to write the report to the given output using the specified
	* config. It will open up the expected reducer output file and read in its
	* contents and then split up by operation output and sort by operation type
	* and then for each operation type it will generate a report to the specified
	* result file and the console.
	*
	* @param cfg
	* the config specifying the files and output
	*
	* @throws Exception
	* if files can not be opened/closed/read or invalid format
	*/
	private void writeReport(ConfigExtractor cfg) throws Exception {
	Path dn = cfg.getOutputPath();
	LOG.info("Writing report using contents of " + dn);
	FileSystem fs = FileSystem.get(cfg.getConfig());
	FileStatus[] reduceFiles = fs.listStatus(dn);
	BufferedReader fileReader = null;
	PrintWriter reportWriter = null;
	try {
	List<OperationOutput> noOperations = new ArrayList<OperationOutput>();
	Map<String, List<OperationOutput>> splitTypes = new TreeMap<String, List<OperationOutput>>();
	for(FileStatus fn : reduceFiles) {
	if(!fn.getPath().getName().startsWith("part")) continue;
	fileReader = new BufferedReader(new InputStreamReader(
	new DataInputStream(fs.open(fn.getPath()))));
	String line;
	while ((line = fileReader.readLine()) != null) {
	String pieces[] = line.split("\t", 2);
	if (pieces.length == 2) {
	OperationOutput data = new OperationOutput(pieces[0], pieces[1]);
	String op = (data.getOperationType());
	if (op != null) {
	List<OperationOutput> opList = splitTypes.get(op);
	if (opList == null) {
	opList = new ArrayList<OperationOutput>();
	}
	opList.add(data);
	splitTypes.put(op, opList);
	} else {
	noOperations.add(data);
	}
	} else {
	throw new IOException("Unparseable line " + line);
	}
	}
	fileReader.close();
	fileReader = null;
	}
	File resFile = null;
	if (cfg.getResultFile() != null) {
	resFile = new File(cfg.getResultFile());
	}
	if (resFile != null) {
	LOG.info("Report results being placed to logging output and to file "
	+ resFile.getCanonicalPath());
	reportWriter = new PrintWriter(new FileOutputStream(resFile));
	} else {
	LOG.info("Report results being placed to logging output");
	}
	ReportWriter reporter = new ReportWriter();
	if (!noOperations.isEmpty()) {
	reporter.basicReport(noOperations, reportWriter);
	}
	for (String opType : splitTypes.keySet()) {
	reporter.opReport(opType, splitTypes.get(opType), reportWriter);
	}
	} finally {
	if (fileReader != null) {
	fileReader.close();
	}
	if (reportWriter != null) {
	reportWriter.close();
	}
	}
	}

	/**
	* Cleans up the base directory by removing it
	*
	* @param cfg
	* ConfigExtractor which has location of base directory
	*
	* @throws IOException
	*/
	private void cleanup(ConfigExtractor cfg) throws IOException {
	FileSystem fs = FileSystem.get(cfg.getConfig());
	Path base = cfg.getBaseDirectory();
	if (base != null) {
	LOG.info("Attempting to recursively delete " + base);
	fs.delete(base, true);
	}
	}

	/**
	* The main program entry point. Sets up and parses the command line options,
	* then merges those options and then dumps those options and the runs the
	* corresponding map/reduce job that those operations represent and then
	* writes the report for the output of the run that occurred.
	*
	* @param args
	* command line options
	*/
	public static void main(String[] args) throws Exception {
	Configuration startCfg = new Configuration(true);
	SliveTest runner = new SliveTest(startCfg);
	int ec = ToolRunner.run(runner, args);
	System.exit(ec);
	}

	@Override // Configurable
	public Configuration getConf() {
	return this.base;
	}

	@Override // Configurable
	public void setConf(Configuration conf) {
	this.base = conf;
	}
	}