blob: d9c16d88bcb1408c0850263a91f14f98c7248884 [file] [log] [blame]
/*
* Copyright 2009-2010 by The Regents of the University of California
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* you may obtain a copy of the License from
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package edu.uci.ics.hyracks.dataflow.std.sort;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.List;
import edu.uci.ics.hyracks.api.comm.IFrameReader;
import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
import edu.uci.ics.hyracks.api.dataflow.ActivityId;
import edu.uci.ics.hyracks.api.dataflow.IActivityGraphBuilder;
import edu.uci.ics.hyracks.api.dataflow.IOperatorNodePushable;
import edu.uci.ics.hyracks.api.dataflow.TaskId;
import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
import edu.uci.ics.hyracks.api.dataflow.value.IRecordDescriptorProvider;
import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.api.job.IOperatorDescriptorRegistry;
import edu.uci.ics.hyracks.api.job.JobId;
import edu.uci.ics.hyracks.dataflow.std.base.AbstractActivityNode;
import edu.uci.ics.hyracks.dataflow.std.base.AbstractOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.base.AbstractStateObject;
import edu.uci.ics.hyracks.dataflow.std.base.AbstractUnaryInputSinkOperatorNodePushable;
import edu.uci.ics.hyracks.dataflow.std.base.AbstractUnaryOutputSourceOperatorNodePushable;
/**
* @author pouria
* Operator descriptor for sorting with replacement, consisting of two
* phases:
* - Run Generation: Denoted by OptimizedSortActivity below, in which
* sort runs get generated from the input data. This phases uses the
* Selection Tree and Memory Manager to benefit from the replacement
* selection optimization, to create runs which are longer than the
* available memory size.
* - Merging: Denoted by MergeActivity below, in which runs (generated
* in the previous phase) get merged via a merger. Each run has a single
* buffer in memory, and a priority queue is used to select the top
* tuple each time. Top tuple is send to a new run or output
*/
public class OptimizedExternalSortOperatorDescriptor extends AbstractOperatorDescriptor {
private static final int NO_LIMIT = -1;
private static final long serialVersionUID = 1L;
private static final int SORT_ACTIVITY_ID = 0;
private static final int MERGE_ACTIVITY_ID = 1;
private final int[] sortFields;
private final INormalizedKeyComputerFactory firstKeyNormalizerFactory;
private final IBinaryComparatorFactory[] comparatorFactories;
private final int memSize;
private final int outputLimit;
public OptimizedExternalSortOperatorDescriptor(IOperatorDescriptorRegistry spec, int framesLimit, int[] sortFields,
IBinaryComparatorFactory[] comparatorFactories, RecordDescriptor recordDescriptor) {
this(spec, framesLimit, NO_LIMIT, sortFields, null, comparatorFactories, recordDescriptor);
}
public OptimizedExternalSortOperatorDescriptor(IOperatorDescriptorRegistry spec, int framesLimit, int outputLimit,
int[] sortFields, IBinaryComparatorFactory[] comparatorFactories, RecordDescriptor recordDescriptor) {
this(spec, framesLimit, outputLimit, sortFields, null, comparatorFactories, recordDescriptor);
}
public OptimizedExternalSortOperatorDescriptor(IOperatorDescriptorRegistry spec, int memSize, int outputLimit,
int[] sortFields, INormalizedKeyComputerFactory firstKeyNormalizerFactory,
IBinaryComparatorFactory[] comparatorFactories, RecordDescriptor recordDescriptor) {
super(spec, 1, 1);
this.memSize = memSize;
this.outputLimit = outputLimit;
this.sortFields = sortFields;
this.firstKeyNormalizerFactory = firstKeyNormalizerFactory;
this.comparatorFactories = comparatorFactories;
if (memSize <= 1) {
throw new IllegalStateException();// minimum of 2 fames (1 in,1 out)
}
recordDescriptors[0] = recordDescriptor;
}
@Override
public void contributeActivities(IActivityGraphBuilder builder) {
OptimizedSortActivity osa = new OptimizedSortActivity(new ActivityId(odId, SORT_ACTIVITY_ID));
OptimizedMergeActivity oma = new OptimizedMergeActivity(new ActivityId(odId, MERGE_ACTIVITY_ID));
builder.addActivity(this, osa);
builder.addSourceEdge(0, osa, 0);
builder.addActivity(this, oma);
builder.addTargetEdge(0, oma, 0);
builder.addBlockingEdge(osa, oma);
}
public static class OptimizedSortTaskState extends AbstractStateObject {
private List<IFrameReader> runs;
public OptimizedSortTaskState() {
}
private OptimizedSortTaskState(JobId jobId, TaskId taskId) {
super(jobId, taskId);
}
@Override
public void toBytes(DataOutput out) throws IOException {
}
@Override
public void fromBytes(DataInput in) throws IOException {
}
}
private class OptimizedSortActivity extends AbstractActivityNode {
private static final long serialVersionUID = 1L;
public OptimizedSortActivity(ActivityId id) {
super(id);
}
@Override
public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx,
IRecordDescriptorProvider recordDescProvider, final int partition, int nPartitions) {
final IRunGenerator runGen;
if (outputLimit == NO_LIMIT) {
runGen = new OptimizedExternalSortRunGenerator(ctx, sortFields, firstKeyNormalizerFactory,
comparatorFactories, recordDescriptors[0], memSize);
} else {
runGen = new OptimizedExternalSortRunGeneratorWithLimit(ctx, sortFields, firstKeyNormalizerFactory,
comparatorFactories, recordDescriptors[0], memSize, outputLimit);
}
IOperatorNodePushable op = new AbstractUnaryInputSinkOperatorNodePushable() {
@Override
public void open() throws HyracksDataException {
runGen.open();
}
@Override
public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
runGen.nextFrame(buffer);
}
@Override
public void close() throws HyracksDataException {
OptimizedSortTaskState state = new OptimizedSortTaskState(ctx.getJobletContext().getJobId(),
new TaskId(getActivityId(), partition));
runGen.close();
state.runs = runGen.getRuns();
ctx.setStateObject(state);
}
@Override
public void fail() throws HyracksDataException {
runGen.fail();
}
};
return op;
}
}
private class OptimizedMergeActivity extends AbstractActivityNode {
private static final long serialVersionUID = 1L;
public OptimizedMergeActivity(ActivityId id) {
super(id);
}
@Override
public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx,
IRecordDescriptorProvider recordDescProvider, final int partition, int nPartitions) {
IOperatorNodePushable op = new AbstractUnaryOutputSourceOperatorNodePushable() {
@Override
public void initialize() throws HyracksDataException {
OptimizedSortTaskState state = (OptimizedSortTaskState) ctx.getStateObject(new TaskId(
new ActivityId(getOperatorId(), SORT_ACTIVITY_ID), partition));
List<IFrameReader> runs = state.runs;
IBinaryComparator[] comparators = new IBinaryComparator[comparatorFactories.length];
for (int i = 0; i < comparatorFactories.length; ++i) {
comparators[i] = comparatorFactories[i].createBinaryComparator();
}
int necessaryFrames = Math.min(runs.size() + 2, memSize);
ExternalSortRunMerger merger = new ExternalSortRunMerger(ctx, outputLimit, runs, sortFields,
comparators, recordDescriptors[0], necessaryFrames, writer);
merger.processWithReplacementSelection();
}
};
return op;
}
}
}