src/main/java/org/apache/hadoop/mrunit/internal/mapreduce/MockMapreduceOutputFormat.java - mrunit - Git at Google

 /**
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.hadoop.mrunit.internal.mapreduce;

 import java.io.File;
 import java.io.IOException;
 import java.lang.reflect.Constructor;
 import java.lang.reflect.InvocationTargetException;
 import java.util.ArrayList;
 import java.util.List;

 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapreduce.InputFormat;
 import org.apache.hadoop.mapreduce.InputSplit;
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.JobContext;
 import org.apache.hadoop.mapreduce.JobID;
 import org.apache.hadoop.mapreduce.OutputFormat;
 import org.apache.hadoop.mapreduce.RecordReader;
 import org.apache.hadoop.mapreduce.RecordWriter;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
 import org.apache.hadoop.mapreduce.TaskAttemptID;
 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 import org.apache.hadoop.mrunit.internal.io.Serialization;
 import org.apache.hadoop.mrunit.internal.output.OutputCollectable;
 import org.apache.hadoop.mrunit.types.Pair;
 import org.apache.hadoop.util.ReflectionUtils;

 public class MockMapreduceOutputFormat<K, V> implements OutputCollectable<K, V> {

   private static String ATTEMPT = "attempt_000000000000_0000_m_000000_0";
   private static TaskAttemptID TASK_ID = TaskAttemptID.forName(ATTEMPT);
   private static final Class<?>[] TASK_ATTEMPT_CONTEXT_CLASSES = new Class<?>[] {
       Configuration.class, TaskAttemptID.class };
   private static final Class<?>[] JOB_CONTEXT_CLASSES = new Class<?>[] {
       Configuration.class, JobID.class };

   private final Job outputFormatJob;
   private final Job inputFormatJob;
   private final File outputPath = new File(
       System.getProperty("java.io.tmpdir"), "mrunit-" + Math.random());
   private TaskAttemptContext taskAttemptContext;
   @SuppressWarnings("rawtypes")
   private RecordWriter recordWriter;
   @SuppressWarnings("rawtypes")
   private final InputFormat inputFormat;
   @SuppressWarnings("rawtypes")
   private final OutputFormat outputFormat;
   private final List<Pair<K, V>> outputs = new ArrayList<Pair<K, V>>();

   @SuppressWarnings("rawtypes")
   public MockMapreduceOutputFormat(Job outputFormatJob,
       Class<? extends OutputFormat> outputFormatClass,
       Class<? extends InputFormat> inputFormatClass, Job inputFormatJob)
       throws IOException {
     this.outputFormatJob = outputFormatJob;
     this.inputFormatJob = inputFormatJob;

     outputFormat = ReflectionUtils.newInstance(outputFormatClass,
         outputFormatJob.getConfiguration());
     inputFormat = ReflectionUtils.newInstance(inputFormatClass,
         inputFormatJob.getConfiguration());

     if (outputPath.exists()) {
       throw new IllegalStateException(
           "Generated the same random dir name twice: " + outputPath);
     }
     if (!outputPath.mkdir()) {
       throw new IOException("Failed to create output dir " + outputPath);
     }
     FileOutputFormat.setOutputPath(outputFormatJob,
         new Path(outputPath.toString()));
   }

   private void setClassIfUnset(String name, Class<?> classType) {
     outputFormatJob.getConfiguration().setIfUnset(name, classType.getName());
   }

   private Object createObject(String primaryClassName,
       String secondaryClassName, Class<?>[] constructorParametersClasses,
       Object... constructorParameters) {
     try {
       Class<?> classType = Class.forName(primaryClassName);
       try {
         Constructor<?> constructor = classType
             .getConstructor(constructorParametersClasses);
         return constructor.newInstance(constructorParameters);
       } catch (SecurityException e) {
         throw new IllegalStateException(e);
       } catch (NoSuchMethodException e) {
         throw new IllegalStateException(e);
       } catch (IllegalArgumentException e) {
         throw new IllegalStateException(e);
       } catch (InstantiationException e) {
         throw new IllegalStateException(e);
       } catch (IllegalAccessException e) {
         throw new IllegalStateException(e);
       } catch (InvocationTargetException e) {
         throw new IllegalStateException(e);
       }
     } catch (ClassNotFoundException e) {
       if (secondaryClassName == null) {
         throw new IllegalStateException(e);
       }
       return createObject(secondaryClassName, null,
           constructorParametersClasses, constructorParameters);
     }
   }

   @Override
   public void collect(K key, V value) throws IOException {
     try {
       if (recordWriter == null) {
         setClassIfUnset("mapred.output.key.class", key.getClass());
         setClassIfUnset("mapred.output.value.class", value.getClass());

         taskAttemptContext = (TaskAttemptContext) createObject(
             "org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl",
             "org.apache.hadoop.mapreduce.TaskAttemptContext",
             TASK_ATTEMPT_CONTEXT_CLASSES, outputFormatJob.getConfiguration(),
             TASK_ID);
         recordWriter = outputFormat.getRecordWriter(taskAttemptContext);
       }

       recordWriter.write(key, value);
     } catch (InterruptedException e) {
       throw new IllegalStateException(e);
     }
   }

   @Override
   public List<Pair<K, V>> getOutputs() throws IOException {
     try {
       recordWriter.close(taskAttemptContext);
     } catch (InterruptedException e) {
       throw new IOException(e);
     }

     final Serialization serialization = new Serialization(
         inputFormatJob.getConfiguration());
     FileInputFormat.setInputPaths(inputFormatJob, outputPath + "/*/*/*/*");
     try {
       List<InputSplit> inputSplits = inputFormat
           .getSplits((JobContext) createObject(
               "org.apache.hadoop.mapreduce.task.JobContextImpl",
               "org.apache.hadoop.mapreduce.JobContext", JOB_CONTEXT_CLASSES,
               inputFormatJob.getConfiguration(), new JobID()));
       for (InputSplit inputSplit : inputSplits) {
         RecordReader<K, V> recordReader = inputFormat.createRecordReader(
             inputSplit, taskAttemptContext);
         recordReader.initialize(inputSplit, taskAttemptContext);
         while (recordReader.nextKeyValue()) {
           outputs.add(new Pair<K, V>(serialization.copy(recordReader
               .getCurrentKey()), serialization.copy(recordReader
               .getCurrentValue())));
         }
       }
     } catch (InterruptedException e) {
       throw new IOException(e);
     }
     FileUtil.fullyDelete(outputPath);
     return outputs;
   }

 }
	/**
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.hadoop.mrunit.internal.mapreduce;

	import java.io.File;
	import java.io.IOException;
	import java.lang.reflect.Constructor;
	import java.lang.reflect.InvocationTargetException;
	import java.util.ArrayList;
	import java.util.List;

	import org.apache.hadoop.conf.Configuration;
	import org.apache.hadoop.fs.FileUtil;
	import org.apache.hadoop.fs.Path;
	import org.apache.hadoop.mapreduce.InputFormat;
	import org.apache.hadoop.mapreduce.InputSplit;
	import org.apache.hadoop.mapreduce.Job;
	import org.apache.hadoop.mapreduce.JobContext;
	import org.apache.hadoop.mapreduce.JobID;
	import org.apache.hadoop.mapreduce.OutputFormat;
	import org.apache.hadoop.mapreduce.RecordReader;
	import org.apache.hadoop.mapreduce.RecordWriter;
	import org.apache.hadoop.mapreduce.TaskAttemptContext;
	import org.apache.hadoop.mapreduce.TaskAttemptID;
	import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
	import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
	import org.apache.hadoop.mrunit.internal.io.Serialization;
	import org.apache.hadoop.mrunit.internal.output.OutputCollectable;
	import org.apache.hadoop.mrunit.types.Pair;
	import org.apache.hadoop.util.ReflectionUtils;

	public class MockMapreduceOutputFormat<K, V> implements OutputCollectable<K, V> {

	private static String ATTEMPT = "attempt_000000000000_0000_m_000000_0";
	private static TaskAttemptID TASK_ID = TaskAttemptID.forName(ATTEMPT);
	private static final Class<?>[] TASK_ATTEMPT_CONTEXT_CLASSES = new Class<?>[] {
	Configuration.class, TaskAttemptID.class };
	private static final Class<?>[] JOB_CONTEXT_CLASSES = new Class<?>[] {
	Configuration.class, JobID.class };

	private final Job outputFormatJob;
	private final Job inputFormatJob;
	private final File outputPath = new File(
	System.getProperty("java.io.tmpdir"), "mrunit-" + Math.random());
	private TaskAttemptContext taskAttemptContext;
	@SuppressWarnings("rawtypes")
	private RecordWriter recordWriter;
	@SuppressWarnings("rawtypes")
	private final InputFormat inputFormat;
	@SuppressWarnings("rawtypes")
	private final OutputFormat outputFormat;
	private final List<Pair<K, V>> outputs = new ArrayList<Pair<K, V>>();

	@SuppressWarnings("rawtypes")
	public MockMapreduceOutputFormat(Job outputFormatJob,
	Class<? extends OutputFormat> outputFormatClass,
	Class<? extends InputFormat> inputFormatClass, Job inputFormatJob)
	throws IOException {
	this.outputFormatJob = outputFormatJob;
	this.inputFormatJob = inputFormatJob;

	outputFormat = ReflectionUtils.newInstance(outputFormatClass,
	outputFormatJob.getConfiguration());
	inputFormat = ReflectionUtils.newInstance(inputFormatClass,
	inputFormatJob.getConfiguration());

	if (outputPath.exists()) {
	throw new IllegalStateException(
	"Generated the same random dir name twice: " + outputPath);
	}
	if (!outputPath.mkdir()) {
	throw new IOException("Failed to create output dir " + outputPath);
	}
	FileOutputFormat.setOutputPath(outputFormatJob,
	new Path(outputPath.toString()));
	}

	private void setClassIfUnset(String name, Class<?> classType) {
	outputFormatJob.getConfiguration().setIfUnset(name, classType.getName());
	}

	private Object createObject(String primaryClassName,
	String secondaryClassName, Class<?>[] constructorParametersClasses,
	Object... constructorParameters) {
	try {
	Class<?> classType = Class.forName(primaryClassName);
	try {
	Constructor<?> constructor = classType
	.getConstructor(constructorParametersClasses);
	return constructor.newInstance(constructorParameters);
	} catch (SecurityException e) {
	throw new IllegalStateException(e);
	} catch (NoSuchMethodException e) {
	throw new IllegalStateException(e);
	} catch (IllegalArgumentException e) {
	throw new IllegalStateException(e);
	} catch (InstantiationException e) {
	throw new IllegalStateException(e);
	} catch (IllegalAccessException e) {
	throw new IllegalStateException(e);
	} catch (InvocationTargetException e) {
	throw new IllegalStateException(e);
	}
	} catch (ClassNotFoundException e) {
	if (secondaryClassName == null) {
	throw new IllegalStateException(e);
	}
	return createObject(secondaryClassName, null,
	constructorParametersClasses, constructorParameters);
	}
	}

	@Override
	public void collect(K key, V value) throws IOException {
	try {
	if (recordWriter == null) {
	setClassIfUnset("mapred.output.key.class", key.getClass());
	setClassIfUnset("mapred.output.value.class", value.getClass());

	taskAttemptContext = (TaskAttemptContext) createObject(
	"org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl",
	"org.apache.hadoop.mapreduce.TaskAttemptContext",
	TASK_ATTEMPT_CONTEXT_CLASSES, outputFormatJob.getConfiguration(),
	TASK_ID);
	recordWriter = outputFormat.getRecordWriter(taskAttemptContext);
	}

	recordWriter.write(key, value);
	} catch (InterruptedException e) {
	throw new IllegalStateException(e);
	}
	}

	@Override
	public List<Pair<K, V>> getOutputs() throws IOException {
	try {
	recordWriter.close(taskAttemptContext);
	} catch (InterruptedException e) {
	throw new IOException(e);
	}

	final Serialization serialization = new Serialization(
	inputFormatJob.getConfiguration());
	FileInputFormat.setInputPaths(inputFormatJob, outputPath + "////");
	try {
	List<InputSplit> inputSplits = inputFormat
	.getSplits((JobContext) createObject(
	"org.apache.hadoop.mapreduce.task.JobContextImpl",
	"org.apache.hadoop.mapreduce.JobContext", JOB_CONTEXT_CLASSES,
	inputFormatJob.getConfiguration(), new JobID()));
	for (InputSplit inputSplit : inputSplits) {
	RecordReader<K, V> recordReader = inputFormat.createRecordReader(
	inputSplit, taskAttemptContext);
	recordReader.initialize(inputSplit, taskAttemptContext);
	while (recordReader.nextKeyValue()) {
	outputs.add(new Pair<K, V>(serialization.copy(recordReader
	.getCurrentKey()), serialization.copy(recordReader
	.getCurrentValue())));
	}
	}
	} catch (InterruptedException e) {
	throw new IOException(e);
	}
	FileUtil.fullyDelete(outputPath);
	return outputs;
	}

	}