blob: 156d5749299e36812b94a08f0651aebfde7a9c95 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#ifndef IMPALA_EXEC_PARTIAL_SORT_NODE_H
#define IMPALA_EXEC_PARTIAL_SORT_NODE_H
#include "exec/exec-node.h"
#include "runtime/sorter.h"
namespace impala {
/// Node that implements a partial sort, where its input is divided up into runs, each
/// of which is sorted individually.
///
/// In GetNext(), PartialSortNode accepts rows up to its memory limit and sorts them,
/// creating a single sorted run. It then outputs as many rows as fit in the output batch.
/// Subsequent calls to GetNext() continue to ouptut rows from the sorted run until it is
/// exhausted, at which point the next call to GetNext() will again accept rows to create
/// another run. This means that PartialSortNode never spills to disk.
///
/// Uses Sorter and BufferedBlockMgr for the external sort implementation. The sorter
/// instance owns the sorted data.
///
/// Input rows to PartialSortNode may consist of several tuples. The Sorter materializes
/// them into a single tuple using the expressions specified in sort_tuple_exprs_. This
/// single tuple is then what the sort operates on.
///
/// PartialSortNode does not support limits or offsets.
class PartialSortNode : public ExecNode {
public:
PartialSortNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs);
~PartialSortNode();
virtual Status Init(const TPlanNode& tnode, RuntimeState* state);
virtual Status Prepare(RuntimeState* state);
virtual void Codegen(RuntimeState* state);
virtual Status Open(RuntimeState* state);
virtual Status GetNext(RuntimeState* state, RowBatch* row_batch, bool* eos);
virtual Status Reset(RuntimeState* state, RowBatch* row_batch);
virtual void Close(RuntimeState* state);
protected:
virtual void DebugString(int indentation_level, std::stringstream* out) const;
private:
/// Expressions and parameters used for tuple comparison.
std::vector<ScalarExpr*> ordering_exprs_;
/// Expressions used to materialize slots in the tuples to be sorted.
/// One expr per slot in the materialized tuple.
std::vector<ScalarExpr*> sort_tuple_exprs_;
std::vector<bool> is_asc_order_;
std::vector<bool> nulls_first_;
/////////////////////////////////////////
/// BEGIN: Members that must be Reset()
/// Object used for external sorting.
boost::scoped_ptr<Sorter> sorter_;
/// The current batch of rows retrieved from the input (the output of child(0)). This
/// allows us to store rows across calls to GetNext when the sorter run fills up.
std::unique_ptr<RowBatch> input_batch_;
/// The index in 'input_batch_' of the next row to be passed to the sorter.
int input_batch_index_;
/// True if the end of the input (the output of child(0)) has been reached.
bool input_eos_;
/// True if the current run in the sorter has been fully output. This node is done when
/// both 'sorter_eos_' and 'input_eos_' are true.
bool sorter_eos_;
/// END: Members that must be Reset()
/////////////////////////////////////////
};
}
#endif