blob: 783ae552f070ca79bc550d1f03dae3c183757315 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.backend.hadoop.executionengine.mapReduceLayer;
import java.io.ByteArrayOutputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configuration.IntegerRanges;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.RawComparator;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.jobcontrol.Job;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.JobID;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.OutputCommitter;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.ReduceContext;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.hadoop.mapreduce.Reducer.Context;
import org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer;
import org.apache.hadoop.mapreduce.task.ReduceContextImpl;
import org.apache.hadoop.security.Credentials;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigMapBase.IllustratorContext;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POPackage;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.io.NullableTuple;
import org.apache.pig.impl.io.PigNullableWritable;
import org.apache.pig.impl.util.Pair;
import org.apache.pig.pen.FakeRawKeyValueIterator;
public class PigMapReduce extends PigGenericMapReduce {
static class IllustrateReducerContext extends WrappedReducer<PigNullableWritable, NullableTuple, PigNullableWritable, Writable> {
public IllustratorContext
getReducerContext(ReduceContext<PigNullableWritable, NullableTuple, PigNullableWritable, Writable> reduceContext) {
return new IllustratorContext(reduceContext);
}
public class IllustratorContext
extends WrappedReducer.Context {
public IllustratorContext(
ReduceContext<PigNullableWritable, NullableTuple, PigNullableWritable, Writable> reduceContext) {
super(reduceContext);
}
public POPackage getPack() {
return ((Reduce.IllustratorContextImpl)reduceContext).pack;
}
}
}
public static class Reduce extends PigGenericMapReduce.Reduce {
/**
* Get reducer's illustrator context
*
* @param input Input buffer as output by maps
* @param pkg package
* @return reducer's illustrator context
* @throws IOException
* @throws InterruptedException
*/
@Override
public Context getIllustratorContext(Job job,
List<Pair<PigNullableWritable, Writable>> input, POPackage pkg) throws IOException, InterruptedException {
org.apache.hadoop.mapreduce.Reducer.Context reducerContext = new IllustrateReducerContext()
.getReducerContext(new IllustratorContextImpl(job, input, pkg));
return reducerContext;
}
@SuppressWarnings("unchecked")
public class IllustratorContextImpl extends ReduceContextImpl<PigNullableWritable, NullableTuple, PigNullableWritable, Writable> {
private PigNullableWritable currentKey = null, nextKey = null;
private NullableTuple nextValue = null;
private List<NullableTuple> currentValues = null;
private Iterator<Pair<PigNullableWritable, Writable>> it;
private final ByteArrayOutputStream bos;
private final DataOutputStream dos;
private final RawComparator sortComparator, groupingComparator;
public POPackage pack = null;
private IllustratorValueIterable iterable = new IllustratorValueIterable();
public IllustratorContextImpl(Job job,
List<Pair<PigNullableWritable, Writable>> input,
POPackage pkg
) throws IOException, InterruptedException {
super(job.getJobConf(), new TaskAttemptID(), new FakeRawKeyValueIterator(input.iterator().hasNext()),
null, null, null, null, new IllustrateDummyReporter(), null, PigNullableWritable.class, NullableTuple.class);
bos = new ByteArrayOutputStream();
dos = new DataOutputStream(bos);
org.apache.hadoop.mapreduce.Job nwJob = new org.apache.hadoop.mapreduce.Job(job.getJobConf());
sortComparator = nwJob.getSortComparator();
groupingComparator = nwJob.getGroupingComparator();
Collections.sort(input, new Comparator<Pair<PigNullableWritable, Writable>>() {
@Override
public int compare(Pair<PigNullableWritable, Writable> o1,
Pair<PigNullableWritable, Writable> o2) {
try {
o1.first.write(dos);
int l1 = bos.size();
o2.first.write(dos);
int l2 = bos.size();
byte[] bytes = bos.toByteArray();
bos.reset();
return sortComparator.compare(bytes, 0, l1, bytes, l1, l2-l1);
} catch (IOException e) {
throw new RuntimeException("Serialization exception in sort:"+e.getMessage());
}
}
}
);
currentValues = new ArrayList<NullableTuple>();
it = input.iterator();
if (it.hasNext()) {
Pair<PigNullableWritable, Writable> entry = it.next();
nextKey = entry.first;
nextValue = (NullableTuple) entry.second;
}
pack = pkg;
}
public class IllustratorValueIterator implements ReduceContext.ValueIterator<NullableTuple> {
private int pos = -1;
private int mark = -1;
@Override
public void mark() throws IOException {
mark=pos-1;
if (mark<-1)
mark=-1;
}
@Override
public void reset() throws IOException {
pos=mark;
}
@Override
public void clearMark() throws IOException {
mark=-1;
}
@Override
public boolean hasNext() {
return pos<currentValues.size()-1;
}
@Override
public NullableTuple next() {
pos++;
return currentValues.get(pos);
}
@Override
public void remove() {
throw new UnsupportedOperationException("remove not implemented");
}
@Override
public void resetBackupStore() throws IOException {
pos=-1;
mark=-1;
}
}
protected class IllustratorValueIterable implements Iterable<NullableTuple> {
private IllustratorValueIterator iterator = new IllustratorValueIterator();
@Override
public Iterator<NullableTuple> iterator() {
return iterator;
}
}
@Override
public PigNullableWritable getCurrentKey() {
return currentKey;
}
@Override
public boolean nextKey() {
if (nextKey == null)
return false;
currentKey = nextKey;
currentValues.clear();
currentValues.add(nextValue);
nextKey = null;
for(; it.hasNext(); ) {
Pair<PigNullableWritable, Writable> entry = it.next();
/* Why can't raw comparison be used?
byte[] bytes;
int l1, l2;
try {
currentKey.write(dos);
l1 = bos.size();
entry.first.write(dos);
l2 = bos.size();
bytes = bos.toByteArray();
} catch (IOException e) {
throw new RuntimeException("nextKey exception : "+e.getMessage());
}
bos.reset();
if (groupingComparator.compare(bytes, 0, l1, bytes, l1, l2-l1) == 0)
*/
if (groupingComparator.compare(currentKey, entry.first) == 0)
{
currentValues.add((NullableTuple)entry.second);
} else {
nextKey = entry.first;
nextValue = (NullableTuple) entry.second;
break;
}
}
return true;
}
@Override
public Iterable<NullableTuple> getValues() {
return iterable;
}
@Override
public void write(PigNullableWritable k, Writable t) {
}
@Override
public void progress() {
}
}
@Override
public boolean inIllustrator(org.apache.hadoop.mapreduce.Reducer.Context context) {
return (context instanceof PigMapReduce.IllustrateReducerContext.IllustratorContext);
}
@Override
public POPackage getPack(org.apache.hadoop.mapreduce.Reducer.Context context) {
return ((PigMapReduce.IllustrateReducerContext.IllustratorContext) context).getPack();
}
}
}