blob: 6c432ddfc7f6b4dd10ae8201c3f56f29ec2989a0 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.mapreduce.lib.output;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.serializer.JavaSerializationComparator;
import org.apache.hadoop.mapred.HadoopTestCase;
import org.apache.hadoop.mapreduce.*;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
public class TestMRMultipleOutputs extends HadoopTestCase {
public TestMRMultipleOutputs() throws IOException {
super(HadoopTestCase.LOCAL_MR, HadoopTestCase.LOCAL_FS, 1, 1);
}
public void testWithoutCounters() throws Exception {
_testMultipleOutputs(false);
_testMOWithJavaSerialization(false);
}
public void testWithCounters() throws Exception {
_testMultipleOutputs(true);
_testMOWithJavaSerialization(true);
}
private static String localPathRoot =
System.getProperty("test.build.data", "/tmp");
private static final Path ROOT_DIR = new Path(localPathRoot, "testing/mo");
private static final Path IN_DIR = new Path(ROOT_DIR, "input");
private static final Path OUT_DIR = new Path(ROOT_DIR, "output");
private static String TEXT = "text";
private static String SEQUENCE = "sequence";
public void setUp() throws Exception {
super.setUp();
Configuration conf = createJobConf();
FileSystem fs = FileSystem.get(conf);
fs.delete(ROOT_DIR, true);
}
public void tearDown() throws Exception {
Configuration conf = createJobConf();
FileSystem fs = FileSystem.get(conf);
fs.delete(ROOT_DIR, true);
super.tearDown();
}
protected void _testMOWithJavaSerialization(boolean withCounters) throws Exception {
String input = "a\nb\nc\nd\ne\nc\nd\ne";
Configuration conf = createJobConf();
conf.set("io.serializations",
"org.apache.hadoop.io.serializer.JavaSerialization," +
"org.apache.hadoop.io.serializer.WritableSerialization");
Job job = MapReduceTestUtil.createJob(conf, IN_DIR, OUT_DIR, 2, 1, input);
job.setJobName("mo");
MultipleOutputs.addNamedOutput(job, TEXT, TextOutputFormat.class,
Long.class, String.class);
MultipleOutputs.setCountersEnabled(job, withCounters);
job.setSortComparatorClass(JavaSerializationComparator.class);
job.setMapOutputKeyClass(Long.class);
job.setMapOutputValueClass(String.class);
job.setOutputKeyClass(Long.class);
job.setOutputValueClass(String.class);
job.setMapperClass(MOJavaSerDeMap.class);
job.setReducerClass(MOJavaSerDeReduce.class);
job.waitForCompletion(true);
// assert number of named output part files
int namedOutputCount = 0;
int valueBasedOutputCount = 0;
FileSystem fs = OUT_DIR.getFileSystem(conf);
FileStatus[] statuses = fs.listStatus(OUT_DIR);
for (FileStatus status : statuses) {
String fileName = status.getPath().getName();
if (fileName.equals("text-m-00000") ||
fileName.equals("text-m-00001") ||
fileName.equals("text-r-00000")) {
namedOutputCount++;
} else if (fileName.equals("a-r-00000") ||
fileName.equals("b-r-00000") ||
fileName.equals("c-r-00000") ||
fileName.equals("d-r-00000") ||
fileName.equals("e-r-00000")) {
valueBasedOutputCount++;
}
}
assertEquals(3, namedOutputCount);
assertEquals(5, valueBasedOutputCount);
// assert TextOutputFormat files correctness
BufferedReader reader = new BufferedReader(
new InputStreamReader(fs.open(
new Path(FileOutputFormat.getOutputPath(job), "text-r-00000"))));
int count = 0;
String line = reader.readLine();
while (line != null) {
assertTrue(line.endsWith(TEXT));
line = reader.readLine();
count++;
}
reader.close();
assertFalse(count == 0);
if (withCounters) {
CounterGroup counters =
job.getCounters().getGroup(MultipleOutputs.class.getName());
assertEquals(6, counters.size());
assertEquals(4, counters.findCounter(TEXT).getValue());
assertEquals(2, counters.findCounter("a").getValue());
assertEquals(2, counters.findCounter("b").getValue());
assertEquals(4, counters.findCounter("c").getValue());
assertEquals(4, counters.findCounter("d").getValue());
assertEquals(4, counters.findCounter("e").getValue());
}
}
protected void _testMultipleOutputs(boolean withCounters) throws Exception {
String input = "a\nb\nc\nd\ne\nc\nd\ne";
Configuration conf = createJobConf();
Job job = MapReduceTestUtil.createJob(conf, IN_DIR, OUT_DIR, 2, 1, input);
job.setJobName("mo");
MultipleOutputs.addNamedOutput(job, TEXT, TextOutputFormat.class,
LongWritable.class, Text.class);
MultipleOutputs.addNamedOutput(job, SEQUENCE,
SequenceFileOutputFormat.class, IntWritable.class, Text.class);
MultipleOutputs.setCountersEnabled(job, withCounters);
job.setMapperClass(MOMap.class);
job.setReducerClass(MOReduce.class);
job.waitForCompletion(true);
// assert number of named output part files
int namedOutputCount = 0;
int valueBasedOutputCount = 0;
FileSystem fs = OUT_DIR.getFileSystem(conf);
FileStatus[] statuses = fs.listStatus(OUT_DIR);
for (FileStatus status : statuses) {
String fileName = status.getPath().getName();
if (fileName.equals("text-m-00000") ||
fileName.equals("text-m-00001") ||
fileName.equals("text-r-00000") ||
fileName.equals("sequence_A-m-00000") ||
fileName.equals("sequence_A-m-00001") ||
fileName.equals("sequence_B-m-00000") ||
fileName.equals("sequence_B-m-00001") ||
fileName.equals("sequence_B-r-00000") ||
fileName.equals("sequence_C-r-00000")) {
namedOutputCount++;
} else if (fileName.equals("a-r-00000") ||
fileName.equals("b-r-00000") ||
fileName.equals("c-r-00000") ||
fileName.equals("d-r-00000") ||
fileName.equals("e-r-00000")) {
valueBasedOutputCount++;
}
}
assertEquals(9, namedOutputCount);
assertEquals(5, valueBasedOutputCount);
// assert TextOutputFormat files correctness
BufferedReader reader = new BufferedReader(
new InputStreamReader(fs.open(
new Path(FileOutputFormat.getOutputPath(job), "text-r-00000"))));
int count = 0;
String line = reader.readLine();
while (line != null) {
assertTrue(line.endsWith(TEXT));
line = reader.readLine();
count++;
}
reader.close();
assertFalse(count == 0);
// assert SequenceOutputFormat files correctness
SequenceFile.Reader seqReader =
new SequenceFile.Reader(fs, new Path(FileOutputFormat.getOutputPath(job),
"sequence_B-r-00000"), conf);
assertEquals(IntWritable.class, seqReader.getKeyClass());
assertEquals(Text.class, seqReader.getValueClass());
count = 0;
IntWritable key = new IntWritable();
Text value = new Text();
while (seqReader.next(key, value)) {
assertEquals(SEQUENCE, value.toString());
count++;
}
seqReader.close();
assertFalse(count == 0);
if (withCounters) {
CounterGroup counters =
job.getCounters().getGroup(MultipleOutputs.class.getName());
assertEquals(9, counters.size());
assertEquals(4, counters.findCounter(TEXT).getValue());
assertEquals(2, counters.findCounter(SEQUENCE + "_A").getValue());
assertEquals(4, counters.findCounter(SEQUENCE + "_B").getValue());
assertEquals(2, counters.findCounter(SEQUENCE + "_C").getValue());
assertEquals(2, counters.findCounter("a").getValue());
assertEquals(2, counters.findCounter("b").getValue());
assertEquals(4, counters.findCounter("c").getValue());
assertEquals(4, counters.findCounter("d").getValue());
assertEquals(4, counters.findCounter("e").getValue());
}
}
@SuppressWarnings("unchecked")
public static class MOMap extends Mapper<LongWritable, Text, LongWritable,
Text> {
private MultipleOutputs mos;
public void setup(Context context) {
mos = new MultipleOutputs(context);
}
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
context.write(key, value);
if (value.toString().equals("a")) {
mos.write(TEXT, key, new Text(TEXT));
mos.write(SEQUENCE, new IntWritable(1), new Text(SEQUENCE),
(SEQUENCE + "_A"));
mos.write(SEQUENCE, new IntWritable(2), new Text(SEQUENCE),
(SEQUENCE + "_B"));
}
}
public void cleanup(Context context)
throws IOException, InterruptedException {
mos.close();
}
}
@SuppressWarnings("unchecked")
public static class MOReduce extends Reducer<LongWritable, Text,
LongWritable, Text> {
private MultipleOutputs mos;
public void setup(Context context) {
mos = new MultipleOutputs(context);
}
public void reduce(LongWritable key, Iterable<Text> values,
Context context) throws IOException, InterruptedException {
for (Text value : values) {
mos.write(key, value, value.toString());
if (!value.toString().equals("b")) {
context.write(key, value);
} else {
mos.write(TEXT, key, new Text(TEXT));
mos.write(SEQUENCE, new IntWritable(2), new Text(SEQUENCE),
(SEQUENCE + "_B"));
mos.write(SEQUENCE, new IntWritable(3), new Text(SEQUENCE),
(SEQUENCE + "_C"));
}
}
}
public void cleanup(Context context)
throws IOException, InterruptedException {
mos.close();
}
}
public static class MOJavaSerDeMap extends Mapper<LongWritable, Text, Long,
String> {
private MultipleOutputs<Long, String> mos;
public void setup(Context context) {
mos = new MultipleOutputs<Long, String>(context);
}
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
context.write(key.get(), value.toString());
if (value.toString().equals("a")) {
mos.write(TEXT, key.get(), TEXT);
}
}
public void cleanup(Context context)
throws IOException, InterruptedException {
mos.close();
}
}
public static class MOJavaSerDeReduce extends Reducer<Long, String,
Long, String> {
private MultipleOutputs<Long, String> mos;
public void setup(Context context) {
mos = new MultipleOutputs<Long, String>(context);
}
public void reduce(Long key, Iterable<String> values,
Context context) throws IOException, InterruptedException {
for (String value : values) {
mos.write(key, value, value.toString());
if (!value.toString().equals("b")) {
context.write(key, value);
} else {
mos.write(TEXT, key, new Text(TEXT));
}
}
}
public void cleanup(Context context)
throws IOException, InterruptedException {
mos.close();
}
}
}