blob: 273d5baf7494a5e8f6da3d2c153347f67d883487 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.hyracks.dataflow.std.sort;
import java.nio.ByteBuffer;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.hyracks.api.comm.IFrameWriter;
import org.apache.hyracks.api.context.IHyracksTaskContext;
import org.apache.hyracks.api.dataflow.ActivityId;
import org.apache.hyracks.api.dataflow.IActivityGraphBuilder;
import org.apache.hyracks.api.dataflow.IOperatorNodePushable;
import org.apache.hyracks.api.dataflow.TaskId;
import org.apache.hyracks.api.dataflow.value.IBinaryComparator;
import org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory;
import org.apache.hyracks.api.dataflow.value.INormalizedKeyComputer;
import org.apache.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
import org.apache.hyracks.api.dataflow.value.IRecordDescriptorProvider;
import org.apache.hyracks.api.dataflow.value.RecordDescriptor;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.api.job.IOperatorDescriptorRegistry;
import org.apache.hyracks.api.job.JobId;
import org.apache.hyracks.dataflow.common.io.GeneratedRunFileReader;
import org.apache.hyracks.dataflow.std.base.AbstractActivityNode;
import org.apache.hyracks.dataflow.std.base.AbstractOperatorDescriptor;
import org.apache.hyracks.dataflow.std.base.AbstractStateObject;
import org.apache.hyracks.dataflow.std.base.AbstractUnaryInputSinkOperatorNodePushable;
import org.apache.hyracks.dataflow.std.base.AbstractUnaryOutputSourceOperatorNodePushable;
public abstract class AbstractSorterOperatorDescriptor extends AbstractOperatorDescriptor {
private static final Logger LOGGER = Logger.getLogger(AbstractSorterOperatorDescriptor.class.getName());
private static final long serialVersionUID = 1L;
protected static final int SORT_ACTIVITY_ID = 0;
protected static final int MERGE_ACTIVITY_ID = 1;
protected final int[] sortFields;
protected final INormalizedKeyComputerFactory firstKeyNormalizerFactory;
protected final IBinaryComparatorFactory[] comparatorFactories;
protected final int framesLimit;
public AbstractSorterOperatorDescriptor(IOperatorDescriptorRegistry spec, int framesLimit, int[] sortFields,
INormalizedKeyComputerFactory firstKeyNormalizerFactory, IBinaryComparatorFactory[] comparatorFactories,
RecordDescriptor recordDescriptor) {
super(spec, 1, 1);
this.framesLimit = framesLimit;
this.sortFields = sortFields;
this.firstKeyNormalizerFactory = firstKeyNormalizerFactory;
this.comparatorFactories = comparatorFactories;
recordDescriptors[0] = recordDescriptor;
}
public abstract SortActivity getSortActivity(ActivityId id);
public abstract MergeActivity getMergeActivity(ActivityId id);
@Override
public void contributeActivities(IActivityGraphBuilder builder) {
SortActivity sa = getSortActivity(new ActivityId(odId, SORT_ACTIVITY_ID));
MergeActivity ma = getMergeActivity(new ActivityId(odId, MERGE_ACTIVITY_ID));
builder.addActivity(this, sa);
builder.addSourceEdge(0, sa, 0);
builder.addActivity(this, ma);
builder.addTargetEdge(0, ma, 0);
builder.addBlockingEdge(sa, ma);
}
public static class SortTaskState extends AbstractStateObject {
public List<GeneratedRunFileReader> generatedRunFileReaders;
public ISorter sorter;
public SortTaskState(JobId jobId, TaskId taskId) {
super(jobId, taskId);
}
}
protected abstract class SortActivity extends AbstractActivityNode {
private static final long serialVersionUID = 1L;
public SortActivity(ActivityId id) {
super(id);
}
protected abstract AbstractSortRunGenerator getRunGenerator(IHyracksTaskContext ctx,
IRecordDescriptorProvider recordDescProvider) throws HyracksDataException;
@Override
public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx,
final IRecordDescriptorProvider recordDescProvider, final int partition, int nPartitions) {
IOperatorNodePushable op = new AbstractUnaryInputSinkOperatorNodePushable() {
private AbstractSortRunGenerator runGen;
@Override
public void open() throws HyracksDataException {
runGen = getRunGenerator(ctx, recordDescProvider);
runGen.open();
}
@Override
public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
runGen.nextFrame(buffer);
}
@Override
public void close() throws HyracksDataException {
SortTaskState state = new SortTaskState(ctx.getJobletContext().getJobId(),
new TaskId(getActivityId(), partition));
runGen.close();
state.generatedRunFileReaders = runGen.getRuns();
state.sorter = runGen.getSorter();
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("InitialNumberOfRuns:" + runGen.getRuns().size());
}
ctx.setStateObject(state);
}
@Override
public void fail() throws HyracksDataException {
runGen.fail();
}
};
return op;
}
}
protected abstract class MergeActivity extends AbstractActivityNode {
private static final long serialVersionUID = 1L;
public MergeActivity(ActivityId id) {
super(id);
}
protected abstract AbstractExternalSortRunMerger getSortRunMerger(IHyracksTaskContext ctx,
IRecordDescriptorProvider recordDescProvider, IFrameWriter writer, ISorter sorter,
List<GeneratedRunFileReader> runs, IBinaryComparator[] comparators, INormalizedKeyComputer nmkComputer,
int necessaryFrames);
@Override
public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx,
final IRecordDescriptorProvider recordDescProvider, final int partition, int nPartitions) {
IOperatorNodePushable op = new AbstractUnaryOutputSourceOperatorNodePushable() {
@Override
public void initialize() throws HyracksDataException {
SortTaskState state = (SortTaskState) ctx
.getStateObject(new TaskId(new ActivityId(getOperatorId(), SORT_ACTIVITY_ID), partition));
List<GeneratedRunFileReader> runs = state.generatedRunFileReaders;
ISorter sorter = state.sorter;
IBinaryComparator[] comparators = new IBinaryComparator[comparatorFactories.length];
for (int i = 0; i < comparatorFactories.length; ++i) {
comparators[i] = comparatorFactories[i].createBinaryComparator();
}
INormalizedKeyComputer nmkComputer = firstKeyNormalizerFactory == null ? null
: firstKeyNormalizerFactory.createNormalizedKeyComputer();
AbstractExternalSortRunMerger merger = getSortRunMerger(ctx, recordDescProvider, writer, sorter,
runs, comparators, nmkComputer, framesLimit);
merger.process();
}
};
return op;
}
}
}