blob: 6bcb86368c74f6a67afe913059fe222811756654 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#ifndef IMPALA_UTIL_TUPLE_ROW_COMPARE_H_
#define IMPALA_UTIL_TUPLE_ROW_COMPARE_H_
#include "codegen/codegen-fn-ptr.h"
#include "codegen/impala-ir.h"
#include "common/compiler-util.h"
#include "exprs/scalar-expr.h"
#include "runtime/descriptors.h"
#include "runtime/raw-value.h"
#include "runtime/raw-value.inline.h"
#include "runtime/tuple.h"
#include "runtime/tuple-row.h"
namespace impala {
class FragmentState;
class RuntimeState;
class ScalarExprEvaluator;
/// TupleRowComparatorConfig contains the static state initialized from its corresponding
/// thrift structure. It serves as an input for creating instances of the
/// TupleRowComparator class.
class TupleRowComparatorConfig {
public:
/// 'tsort_info' : the thrift struct contains all relevant input params.
/// 'ordering_exprs': the ordering expressions for tuple comparison created from the
/// params in 'tsort_info'.
TupleRowComparatorConfig(
const TSortInfo& tsort_info, const std::vector<ScalarExpr*>& ordering_exprs);
/// Codegens a Compare() function for this comparator that is used in Compare().
/// The pointer to this llvm function object is returned in 'compare_fn'.
Status Codegen(FragmentState* state, llvm::Function** compare_fn);
/// A version of the above Cdegen() function that does not care the llvm function
/// object.
Status Codegen(FragmentState* state) {
llvm::Function* compare_fn = nullptr;
return Codegen(state, &compare_fn);
}
/// Indicates the sorting ordering used. Specified using the SORT BY clause.
TSortingOrder::type sorting_order_;
/// References to ordering expressions owned by the plan node which owns this
/// TupleRowComparatorConfig.
const std::vector<ScalarExpr*>& ordering_exprs_;
/// Indicates, for each ordering expr, whether it enforces an ascending order or not.
/// Not Owned.
const std::vector<bool>& is_asc_;
/// Indicates, for each ordering expr, if nulls should be listed first or last. As a
/// optimization for the TupleRowComparator::Compare() implementation, true/false is
/// stored as +/- 1 respectively. This is independent of is_asc_.
std::vector<int8_t> nulls_first_;
/// Codegened version of TupleRowComparator::Compare().
typedef int (*CompareFn)(ScalarExprEvaluator* const*, ScalarExprEvaluator* const*,
const TupleRow*, const TupleRow*);
CodegenFnPtr<CompareFn> codegend_compare_fn_;
private:
/// Codegen TupleRowLexicalComparator::Compare(). Returns a non-OK status if codegen is
/// unsuccessful. TODO: inline this at codegen'd callsites instead of indirectly calling
/// via function pointer.
Status CodegenLexicalCompare(LlvmCodeGen* codegen, llvm::Function** fn);
};
/// Interface for comparing two TupleRows based on a set of exprs.
class TupleRowComparator {
public:
TupleRowComparator(const TupleRowComparatorConfig& config)
: ordering_exprs_(config.ordering_exprs_),
codegend_compare_fn_(config.codegend_compare_fn_) {
}
virtual ~TupleRowComparator() {}
/// Create the evaluators for the ordering expressions and store them in 'pool'. The
/// evaluators use 'expr_perm_pool' and 'expr_results_pool' for permanent and result
/// allocations made by exprs respectively. 'state' is passed in for initialization
/// of the evaluator.
Status Open(ObjectPool* pool, RuntimeState* state, MemPool* expr_perm_pool,
MemPool* expr_results_pool);
/// Release resources held by the ordering expressions' evaluators.
void Close(RuntimeState* state);
int ALWAYS_INLINE Compare(const TupleRow* lhs, const TupleRow* rhs) const {
return Compare(nullptr, nullptr, lhs, rhs);
}
/// Returns true if lhs is strictly less than rhs.
/// All exprs (ordering_exprs_lhs_ and ordering_exprs_rhs_) must have been prepared
/// and opened before calling this.
/// Force inlining because it tends not to be always inlined at callsites, even in
/// hot loops.
bool ALWAYS_INLINE Less(const TupleRow* lhs, const TupleRow* rhs) const {
return Compare(
ordering_expr_evals_lhs_.data(), ordering_expr_evals_rhs_.data(), lhs, rhs)
< 0;
}
bool ALWAYS_INLINE Less(const Tuple* lhs, const Tuple* rhs) const {
TupleRow* lhs_row = reinterpret_cast<TupleRow*>(&lhs);
TupleRow* rhs_row = reinterpret_cast<TupleRow*>(&rhs);
return Less(lhs_row, rhs_row);
}
/// A Symbol (or a substring of the symbol) of following.
///
/// int Compare(ScalarExprEvaluator* const* evaluator_lhs,
/// ScalarExprEvaluator* const* evaluator_rhs, const TupleRow* lhs,
/// const TupleRow* rhs) const;
///
/// It is passed to LlvmCodeGen::ReplaceCallSites().
static const char* COMPARE_SYMBOL;
/// Class name in LLVM IR.
static const char* LLVM_CLASS_NAME;
protected:
/// References to ordering expressions owned by the plan node which owns the
/// TupleRowComparatorConfig used to create this instance.
const std::vector<ScalarExpr*>& ordering_exprs_;
/// The evaluators for the LHS and RHS ordering expressions. The RHS evaluator is
/// created via cloning the evaluator after it has been Opened().
std::vector<ScalarExprEvaluator*> ordering_expr_evals_lhs_;
std::vector<ScalarExprEvaluator*> ordering_expr_evals_rhs_;
/// Reference to the codegened function pointer owned by the TupleRowComparatorConfig
/// object that was used to create this instance.
const CodegenFnPtr<TupleRowComparatorConfig::CompareFn>& codegend_compare_fn_;
private:
/// Interpreted implementation of Compare().
virtual int CompareInterpreted(const TupleRow* lhs, const TupleRow* rhs) const = 0;
/// Returns a negative value if lhs is less than rhs, a positive value if lhs is
/// greater than rhs, or 0 if they are equal. All exprs (ordering_exprs_lhs_ and
/// ordering_exprs_rhs_) must have been prepared and opened before calling this,
/// i.e. 'sort_key_exprs' in the constructor must have been opened.
///
/// This method calls CompareInterpreted(lhs, rhs) in non-code-gen version and
/// morphs into the code-gen version otherwise.
///
/// The presence of evaluator_lhs and evaluator_rhs in argument is to match similar
/// arguments in the code-gen version.
///
/// Mark the method IR_NO_INLINE to facilitate call site replacement during code-gen.
/// Do not remove this attribute.
IR_NO_INLINE int Compare(ScalarExprEvaluator* const* evaluator_lhs,
ScalarExprEvaluator* const* evaluator_rhs, const TupleRow* lhs,
const TupleRow* rhs) const;
};
/// Compares two TupleRows based on a set of exprs, in lexicographical order.
class TupleRowLexicalComparator : public TupleRowComparator {
public:
/// 'ordering_exprs': the ordering expressions for tuple comparison.
/// 'is_asc' determines, for each expr, if it should be ascending or descending sort
/// order.
/// 'nulls_first' determines, for each expr, if nulls should come before or after all
/// other values.
TupleRowLexicalComparator(const TupleRowComparatorConfig& config)
: TupleRowComparator(config),
is_asc_(config.is_asc_),
nulls_first_(config.nulls_first_) {
DCHECK(config.sorting_order_ == TSortingOrder::LEXICAL);
DCHECK_EQ(nulls_first_.size(), ordering_exprs_.size());
DCHECK_EQ(is_asc_.size(), ordering_exprs_.size());
}
private:
const std::vector<bool>& is_asc_;
const std::vector<int8_t>& nulls_first_;
int CompareInterpreted(const TupleRow* lhs, const TupleRow* rhs) const override;
};
/// Compares two TupleRows based on a set of exprs, in Z-order.
class TupleRowZOrderComparator : public TupleRowComparator {
public:
/// 'ordering_exprs': the ordering expressions for tuple comparison.
TupleRowZOrderComparator(const TupleRowComparatorConfig& config)
: TupleRowComparator(config) {
DCHECK(config.sorting_order_ == TSortingOrder::ZORDER);
}
private:
typedef __uint128_t uint128_t;
int CompareInterpreted(const TupleRow* lhs, const TupleRow* rhs) const override;
/// Compares the rows using Z-ordering. The function does not calculate the actual
/// Z-value, only looks for the column with the most significant dimension, and compares
/// the values of that column. To make this possible, a unified type is necessary where
/// all values share the same bit-representation. The three common types which all the
/// others are converted to are uint32_t, uint64_t and uint128_t. Comparing smaller
/// types (ie. having less bits) with bigger ones would make the bigger type much more
/// dominant therefore the bits of these smaller types are shifted up.
template<typename U>
int CompareBasedOnSize(const TupleRow* lhs, const TupleRow* rhs) const;
/// We transform the original a and b values to their "shared representation", a' and b'
/// in a way that if a < b then a' is lexically less than b' regarding to their bits.
/// Thus, for ints INT_MIN would be 0, INT_MIN+1 would be 1, and so on, and in the end
/// INT_MAX would be 111..111.
/// The basic concept of getting the shared representation is as follows:
/// 1. Reinterpret void* as the actual type
/// 2. Convert the number to the chosen unsigned type (U)
/// 3. If U is bigger than the actual type, the bits of the small type are shifted up.
/// 4. Flip the sign bit because the value was converted to unsigned.
/// Note that floating points are represented differently, where for negative values all
/// bits have to get flipped.
/// Null values will be treated as the minimum value (unsigned 0).
template <typename U>
U GetSharedRepresentation(void* val, ColumnType type) const;
template <typename U>
U inline GetSharedStringRepresentation(const char* char_ptr, int length) const;
template <typename U, typename T>
U inline GetSharedIntRepresentation(const T val, U mask) const;
template <typename U, typename T>
U inline GetSharedFloatRepresentation(void* val, U mask) const;
};
/// Compares the equality of two Tuples, going slot by slot.
struct TupleEqualityChecker {
TupleDescriptor* tuple_desc_;
TupleEqualityChecker(TupleDescriptor* tuple_desc) : tuple_desc_(tuple_desc) {
}
bool Equal(Tuple* x, Tuple* y) {
const std::vector<SlotDescriptor*>& slots = tuple_desc_->slots();
for (int i = 0; i < slots.size(); ++i) {
SlotDescriptor* slot = slots[i];
if (slot->is_nullable()) {
const NullIndicatorOffset& null_offset = slot->null_indicator_offset();
if (x->IsNull(null_offset) || y->IsNull(null_offset)) {
if (x->IsNull(null_offset) && y->IsNull(null_offset)) {
continue;
} else {
return false;
}
}
}
int tuple_offset = slot->tuple_offset();
const ColumnType& type = slot->type();
if (!RawValue::Eq(x->GetSlot(tuple_offset), y->GetSlot(tuple_offset), type)) {
return false;
}
}
return true;
}
};
/// Compares the equality of two TupleRows, going tuple by tuple.
struct RowEqualityChecker {
std::vector<TupleEqualityChecker> tuple_checkers_;
RowEqualityChecker(const RowDescriptor& row_desc) {
const std::vector<TupleDescriptor*>& tuple_descs = row_desc.tuple_descriptors();
for (int i = 0; i < tuple_descs.size(); ++i) {
tuple_checkers_.push_back(TupleEqualityChecker(tuple_descs[i]));
}
}
bool Equal(const TupleRow* x, const TupleRow* y) {
for (int i = 0; i < tuple_checkers_.size(); ++i) {
Tuple* x_tuple = x->GetTuple(i);
Tuple* y_tuple = y->GetTuple(i);
if (!tuple_checkers_[i].Equal(x_tuple, y_tuple)) return false;
}
return true;
}
};
}
#endif