blob: 9800bc511b324f29692b15dc5db64b03c9490a1f [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.mapred;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Random;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.UTF8;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/**
* Runs a job multiple times and takes average of all runs.
*/
public class MRBench extends Configured implements Tool{
private static final Log LOG = LogFactory.getLog(MRBench.class);
private static Path BASE_DIR =
new Path(System.getProperty("test.build.data","/benchmarks/MRBench"));
private static Path INPUT_DIR = new Path(BASE_DIR, "mr_input");
private static Path OUTPUT_DIR = new Path(BASE_DIR, "mr_output");
public static enum Order {RANDOM, ASCENDING, DESCENDING};
/**
* Takes input format as text lines, runs some processing on it and
* writes out data as text again.
*/
public static class Map extends MapReduceBase
implements Mapper<WritableComparable, Text, UTF8, UTF8> {
public void map(WritableComparable key, Text value,
OutputCollector<UTF8, UTF8> output,
Reporter reporter) throws IOException
{
String line = value.toString();
output.collect(new UTF8(process(line)), new UTF8(""));
}
public String process(String line) {
return line;
}
}
/**
* Ignores the key and writes values to the output.
*/
public static class Reduce extends MapReduceBase
implements Reducer<UTF8, UTF8, UTF8, UTF8> {
public void reduce(UTF8 key, Iterator<UTF8> values,
OutputCollector<UTF8, UTF8> output, Reporter reporter) throws IOException
{
while(values.hasNext()) {
output.collect(key, new UTF8(values.next().toString()));
}
}
}
/**
* Generate a text file on the given filesystem with the given path name.
* The text file will contain the given number of lines of generated data.
* The generated data are string representations of numbers. Each line
* is the same length, which is achieved by padding each number with
* an appropriate number of leading '0' (zero) characters. The order of
* generated data is one of ascending, descending, or random.
*/
public void generateTextFile(FileSystem fs, Path inputFile,
long numLines, Order sortOrder) throws IOException
{
LOG.info("creating control file: "+numLines+" numLines, "+sortOrder+" sortOrder");
PrintStream output = null;
try {
output = new PrintStream(fs.create(inputFile));
int padding = String.valueOf(numLines).length();
switch(sortOrder) {
case RANDOM:
for (long l = 0; l < numLines; l++) {
output.println(pad((new Random()).nextLong(), padding));
}
break;
case ASCENDING:
for (long l = 0; l < numLines; l++) {
output.println(pad(l, padding));
}
break;
case DESCENDING:
for (long l = numLines; l > 0; l--) {
output.println(pad(l, padding));
}
break;
}
} finally {
if (output != null)
output.close();
}
LOG.info("created control file: " + inputFile);
}
/**
* Convert the given number to a string and pad the number with
* leading '0' (zero) characters so that the string is exactly
* the given length.
*/
private static String pad(long number, int length) {
String str = String.valueOf(number);
StringBuffer value = new StringBuffer();
for (int i = str.length(); i < length; i++) {
value.append("0");
}
value.append(str);
return value.toString();
}
/**
* Create the job configuration.
*/
private JobConf setupJob(int numMaps, int numReduces, String jarFile) {
JobConf jobConf = new JobConf(getConf());
jobConf.setJarByClass(MRBench.class);
FileInputFormat.addInputPath(jobConf, INPUT_DIR);
jobConf.setInputFormat(TextInputFormat.class);
jobConf.setOutputFormat(TextOutputFormat.class);
jobConf.setOutputValueClass(UTF8.class);
jobConf.setMapOutputKeyClass(UTF8.class);
jobConf.setMapOutputValueClass(UTF8.class);
if (null != jarFile) {
jobConf.setJar(jarFile);
}
jobConf.setMapperClass(Map.class);
jobConf.setReducerClass(Reduce.class);
jobConf.setNumMapTasks(numMaps);
jobConf.setNumReduceTasks(numReduces);
return jobConf;
}
/**
* Runs a MapReduce task, given number of times. The input to each run
* is the same file.
*/
private ArrayList<Long> runJobInSequence(JobConf masterJobConf, int numRuns) throws IOException {
Random rand = new Random();
ArrayList<Long> execTimes = new ArrayList<Long>();
for (int i = 0; i < numRuns; i++) {
// create a new job conf every time, reusing same object does not work
JobConf jobConf = new JobConf(masterJobConf);
// reset the job jar because the copy constructor doesn't
jobConf.setJar(masterJobConf.getJar());
// give a new random name to output of the mapred tasks
FileOutputFormat.setOutputPath(jobConf,
new Path(OUTPUT_DIR, "output_" + rand.nextInt()));
LOG.info("Running job " + i + ":" +
" input=" + FileInputFormat.getInputPaths(jobConf)[0] +
" output=" + FileOutputFormat.getOutputPath(jobConf));
// run the mapred task now
long curTime = System.currentTimeMillis();
JobClient.runJob(jobConf);
execTimes.add(new Long(System.currentTimeMillis() - curTime));
}
return execTimes;
}
/**
* <pre>
* Usage: mrbench
* [-baseDir <base DFS path for output/input, default is /benchmarks/MRBench>]
* [-jar <local path to job jar file containing Mapper and Reducer implementations, default is current jar file>]
* [-numRuns <number of times to run the job, default is 1>]
* [-maps <number of maps for each run, default is 2>]
* [-reduces <number of reduces for each run, default is 1>]
* [-inputLines <number of input lines to generate, default is 1>]
* [-inputType <type of input to generate, one of ascending (default), descending, random>]
* [-verbose]
* </pre>
*/
public static void main (String[] args) throws Exception {
int res = ToolRunner.run(new MRBench(), args);
System.exit(res);
}
@Override
public int run(String[] args) throws Exception {
String version = "MRBenchmark.0.0.2";
System.out.println(version);
String usage =
"Usage: mrbench " +
"[-baseDir <base DFS path for output/input, default is /benchmarks/MRBench>] " +
"[-jar <local path to job jar file containing Mapper and Reducer implementations, default is current jar file>] " +
"[-numRuns <number of times to run the job, default is 1>] " +
"[-maps <number of maps for each run, default is 2>] " +
"[-reduces <number of reduces for each run, default is 1>] " +
"[-inputLines <number of input lines to generate, default is 1>] " +
"[-inputType <type of input to generate, one of ascending (default), descending, random>] " +
"[-verbose]";
String jarFile = null;
int inputLines = 1;
int numRuns = 1;
int numMaps = 2;
int numReduces = 1;
boolean verbose = false;
Order inputSortOrder = Order.ASCENDING;
for (int i = 0; i < args.length; i++) { // parse command line
if (args[i].equals("-jar")) {
jarFile = args[++i];
} else if (args[i].equals("-numRuns")) {
numRuns = Integer.parseInt(args[++i]);
} else if (args[i].equals("-baseDir")) {
BASE_DIR = new Path(args[++i]);
} else if (args[i].equals("-maps")) {
numMaps = Integer.parseInt(args[++i]);
} else if (args[i].equals("-reduces")) {
numReduces = Integer.parseInt(args[++i]);
} else if (args[i].equals("-inputLines")) {
inputLines = Integer.parseInt(args[++i]);
} else if (args[i].equals("-inputType")) {
String s = args[++i];
if (s.equalsIgnoreCase("ascending")) {
inputSortOrder = Order.ASCENDING;
} else if (s.equalsIgnoreCase("descending")) {
inputSortOrder = Order.DESCENDING;
} else if (s.equalsIgnoreCase("random")) {
inputSortOrder = Order.RANDOM;
} else {
inputSortOrder = null;
}
} else if (args[i].equals("-verbose")) {
verbose = true;
} else {
System.err.println(usage);
System.exit(-1);
}
}
if (numRuns < 1 || // verify args
numMaps < 1 ||
numReduces < 1 ||
inputLines < 0 ||
inputSortOrder == null)
{
System.err.println(usage);
return -1;
}
JobConf jobConf = setupJob(numMaps, numReduces, jarFile);
FileSystem fs = FileSystem.get(jobConf);
Path inputFile = new Path(INPUT_DIR, "input_" + (new Random()).nextInt() + ".txt");
generateTextFile(fs, inputFile, inputLines, inputSortOrder);
// setup test output directory
fs.mkdirs(BASE_DIR);
ArrayList<Long> execTimes = new ArrayList<Long>();
try {
execTimes = runJobInSequence(jobConf, numRuns);
} finally {
// delete output -- should we really do this?
fs.delete(BASE_DIR, true);
}
if (verbose) {
// Print out a report
System.out.println("Total MapReduce jobs executed: " + numRuns);
System.out.println("Total lines of data per job: " + inputLines);
System.out.println("Maps per job: " + numMaps);
System.out.println("Reduces per job: " + numReduces);
}
int i = 0;
long totalTime = 0;
for (Long time : execTimes) {
totalTime += time.longValue();
if (verbose) {
System.out.println("Total milliseconds for task: " + (++i) +
" = " + time);
}
}
long avgTime = totalTime / numRuns;
System.out.println("DataLines\tMaps\tReduces\tAvgTime (milliseconds)");
System.out.println(inputLines + "\t\t" + numMaps + "\t" +
numReduces + "\t" + avgTime);
return 0;
}
}