blob: 36ec966b69698b370be3f32798f3cc7fb812ef0c [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.mapreduce;
import java.io.BufferedWriter;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.LineRecordReader;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.mapreduce.lib.reduce.IntSumReducer;
import org.junit.Assert;
import org.junit.Test;
public class TestMapperReducerCleanup {
static boolean mapCleanup = false;
static boolean reduceCleanup = false;
static boolean recordReaderCleanup = false;
static boolean recordWriterCleanup = false;
static void reset() {
mapCleanup = false;
reduceCleanup = false;
recordReaderCleanup = false;
recordWriterCleanup = false;
}
private static class FailingMapper
extends Mapper<LongWritable, Text, LongWritable, Text> {
/** Map method with different behavior based on the thread id */
public void map(LongWritable key, Text val, Context c)
throws IOException, InterruptedException {
throw new IOException("TestMapperReducerCleanup");
}
protected void cleanup(Context context)
throws IOException, InterruptedException {
mapCleanup = true;
super.cleanup(context);
}
}
private static class TrackingTokenizerMapper
extends Mapper<Object, Text, Text, IntWritable> {
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
public void map(Object key, Text value, Context context
) throws IOException, InterruptedException {
StringTokenizer itr = new StringTokenizer(value.toString());
while (itr.hasMoreTokens()) {
word.set(itr.nextToken());
context.write(word, one);
}
}
@SuppressWarnings({ "rawtypes", "unchecked" })
@Override
protected void cleanup(org.apache.hadoop.mapreduce.Mapper.Context context)
throws IOException, InterruptedException {
mapCleanup = true;
super.cleanup(context);
}
}
private static class FailingReducer
extends Reducer<LongWritable, Text, LongWritable, LongWritable> {
public void reduce(LongWritable key, Iterable<Text> vals, Context context)
throws IOException, InterruptedException {
throw new IOException("TestMapperReducerCleanup");
}
protected void cleanup(Context context)
throws IOException, InterruptedException {
reduceCleanup = true;
super.cleanup(context);
}
}
@SuppressWarnings("rawtypes")
private static class TrackingIntSumReducer extends IntSumReducer {
@SuppressWarnings("unchecked")
protected void cleanup(Context context)
throws IOException, InterruptedException {
reduceCleanup = true;
super.cleanup(context);
}
}
public static class TrackingTextInputFormat extends TextInputFormat {
public static class TrackingRecordReader extends LineRecordReader {
@Override
public synchronized void close() throws IOException {
recordReaderCleanup = true;
super.close();
}
}
@Override
public RecordReader<LongWritable, Text> createRecordReader(
InputSplit split, TaskAttemptContext context) {
return new TrackingRecordReader();
}
}
@SuppressWarnings("rawtypes")
public static class TrackingTextOutputFormat extends TextOutputFormat {
public static class TrackingRecordWriter extends LineRecordWriter {
public TrackingRecordWriter(DataOutputStream out) {
super(out);
}
@Override
public synchronized void close(TaskAttemptContext context)
throws IOException {
recordWriterCleanup = true;
super.close(context);
}
}
@Override
public RecordWriter getRecordWriter(TaskAttemptContext job)
throws IOException, InterruptedException {
Configuration conf = job.getConfiguration();
Path file = getDefaultWorkFile(job, "");
FileSystem fs = file.getFileSystem(conf);
FSDataOutputStream fileOut = fs.create(file, false);
return new TrackingRecordWriter(fileOut);
}
}
/**
* Create a single input file in the input directory.
* @param dirPath the directory in which the file resides
* @param id the file id number
* @param numRecords how many records to write to each file.
*/
private void createInputFile(Path dirPath, int id, int numRecords)
throws IOException {
final String MESSAGE = "This is a line in a file: ";
Path filePath = new Path(dirPath, "" + id);
Configuration conf = new Configuration();
FileSystem fs = FileSystem.getLocal(conf);
OutputStream os = fs.create(filePath);
BufferedWriter w = new BufferedWriter(new OutputStreamWriter(os));
for (int i = 0; i < numRecords; i++) {
w.write(MESSAGE + id + " " + i + "\n");
}
w.close();
}
private final String INPUT_DIR = "input";
private final String OUTPUT_DIR = "output";
private Path getInputPath() {
String dataDir = System.getProperty("test.build.data");
if (null == dataDir) {
return new Path(INPUT_DIR);
} else {
return new Path(new Path(dataDir), INPUT_DIR);
}
}
private Path getOutputPath() {
String dataDir = System.getProperty("test.build.data");
if (null == dataDir) {
return new Path(OUTPUT_DIR);
} else {
return new Path(new Path(dataDir), OUTPUT_DIR);
}
}
private Path createInput() throws IOException {
Configuration conf = new Configuration();
FileSystem fs = FileSystem.getLocal(conf);
Path inputPath = getInputPath();
// Clear the input directory if it exists, first.
if (fs.exists(inputPath)) {
fs.delete(inputPath, true);
}
// Create an input file
createInputFile(inputPath, 0, 10);
return inputPath;
}
@Test
public void testMapCleanup() throws Exception {
reset();
Job job = Job.getInstance();
Path inputPath = createInput();
Path outputPath = getOutputPath();
Configuration conf = new Configuration();
FileSystem fs = FileSystem.getLocal(conf);
if (fs.exists(outputPath)) {
fs.delete(outputPath, true);
}
job.setMapperClass(FailingMapper.class);
job.setInputFormatClass(TrackingTextInputFormat.class);
job.setOutputFormatClass(TrackingTextOutputFormat.class);
job.setNumReduceTasks(0);
FileInputFormat.addInputPath(job, inputPath);
FileOutputFormat.setOutputPath(job, outputPath);
job.waitForCompletion(true);
Assert.assertTrue(mapCleanup);
Assert.assertTrue(recordReaderCleanup);
Assert.assertTrue(recordWriterCleanup);
}
@Test
public void testReduceCleanup() throws Exception {
reset();
Job job = Job.getInstance();
Path inputPath = createInput();
Path outputPath = getOutputPath();
Configuration conf = new Configuration();
FileSystem fs = FileSystem.getLocal(conf);
if (fs.exists(outputPath)) {
fs.delete(outputPath, true);
}
job.setMapperClass(TrackingTokenizerMapper.class);
job.setReducerClass(FailingReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setInputFormatClass(TrackingTextInputFormat.class);
job.setOutputFormatClass(TrackingTextOutputFormat.class);
job.setNumReduceTasks(1);
FileInputFormat.addInputPath(job, inputPath);
FileOutputFormat.setOutputPath(job, outputPath);
job.waitForCompletion(true);
Assert.assertTrue(mapCleanup);
Assert.assertTrue(reduceCleanup);
Assert.assertTrue(recordReaderCleanup);
Assert.assertTrue(recordWriterCleanup);
}
@Test
public void testJobSuccessCleanup() throws Exception {
reset();
Job job = Job.getInstance();
Path inputPath = createInput();
Path outputPath = getOutputPath();
Configuration conf = new Configuration();
FileSystem fs = FileSystem.getLocal(conf);
if (fs.exists(outputPath)) {
fs.delete(outputPath, true);
}
job.setMapperClass(TrackingTokenizerMapper.class);
job.setReducerClass(TrackingIntSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setInputFormatClass(TrackingTextInputFormat.class);
job.setOutputFormatClass(TrackingTextOutputFormat.class);
job.setNumReduceTasks(1);
FileInputFormat.addInputPath(job, inputPath);
FileOutputFormat.setOutputPath(job, outputPath);
job.waitForCompletion(true);
Assert.assertTrue(mapCleanup);
Assert.assertTrue(reduceCleanup);
Assert.assertTrue(recordReaderCleanup);
Assert.assertTrue(recordWriterCleanup);
}
}