blob: 82cf54cd797a045caf99bc9e5dc89ac0b035b6ee [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#ifndef IMPALA_UTIL_MIN_MAX_FILTER_H
#define IMPALA_UTIL_MIN_MAX_FILTER_H
#include "gen-cpp/ImpalaInternalService_types.h"
#include "impala-ir/impala-ir-functions.h"
#include "runtime/decimal-value.h"
#include "runtime/string-buffer.h"
#include "runtime/string-value.h"
#include "runtime/timestamp-value.h"
#include "runtime/types.h"
namespace impala {
class MemTacker;
class ObjectPool;
/// A MinMaxFilter tracks the min and max currently seen values in a data set for use in
/// runtime filters.
///
/// Filters are constructed using MinMaxFilter::Create() which returns a MinMaxFilter of
/// the appropriate type. Values can then be added using Insert(), and the min and max can
/// be retrieved using GetMin()/GetMax().
///
/// MinMaxFilters ignore NULL values, and so are only appropriate to use as a runtime
/// filter if the join predicate is '=' and not 'is not distinct from'.
class MinMaxFilter {
public:
virtual ~MinMaxFilter() {}
virtual void Close() {}
/// Returns the min/max values in the tuple slot representation. It is not valid to call
/// these functions if AlwaysFalse() returns true.
virtual void* GetMin() = 0;
virtual void* GetMax() = 0;
/// Returns the min/max values in the out paramsters 'out_min'/'out_max', converted to
/// fit into 'type', eg. if the calculated max value is greater than the max value for
/// 'type', the returned max is the max for 'type'. Returns false if the entire range
/// from the calculated min to max is outside the range for 'type'. May only be called
/// for integer-typed filters.
virtual bool GetCastIntMinMax(
const ColumnType& type, int64_t* out_min, int64_t* out_max);
virtual PrimitiveType type() = 0;
/// Add a new value, updating the current min/max.
virtual void Insert(void* val) = 0;
/// If true, this filter allows all rows to pass.
virtual bool AlwaysTrue() const = 0;
/// If true, this filter doesn't allow any rows to pass.
virtual bool AlwaysFalse() const = 0;
/// Materialize filter values by copying any values stored by filters into memory owned
/// by the filter. Filters may assume that the memory for Insert()-ed values stays valid
/// until this is called.
virtual void MaterializeValues() {}
/// Convert this filter to a thrift representation.
virtual void ToThrift(TMinMaxFilter* thrift) const = 0;
virtual std::string DebugString() const = 0;
/// Returns a new MinMaxFilter with the given type, allocated from 'mem_tracker'.
static MinMaxFilter* Create(ColumnType type, ObjectPool* pool, MemTracker* mem_tracker);
/// Returns a new MinMaxFilter created from the thrift representation, allocated from
/// 'mem_tracker'.
static MinMaxFilter* Create(const TMinMaxFilter& thrift, ColumnType type,
ObjectPool* pool, MemTracker* mem_tracker);
/// Computes the logical OR of 'in' with 'out' and stores the result in 'out'.
static void Or(
const TMinMaxFilter& in, TMinMaxFilter* out, const ColumnType& columnType);
/// Copies the contents of 'in' into 'out'.
static void Copy(const TMinMaxFilter& in, TMinMaxFilter* out);
/// Returns the LLVM_CLASS_NAME for the given type.
static std::string GetLlvmClassName(PrimitiveType type);
/// Returns the IRFunction::Type for Insert() for the given type.
static IRFunction::Type GetInsertIRFunctionType(ColumnType col_type);
};
#define NUMERIC_MIN_MAX_FILTER(NAME, TYPE) \
class NAME##MinMaxFilter : public MinMaxFilter { \
public: \
NAME##MinMaxFilter() { \
min_ = std::numeric_limits<TYPE>::max(); \
max_ = std::numeric_limits<TYPE>::lowest(); \
} \
NAME##MinMaxFilter(const TMinMaxFilter& thrift); \
virtual ~NAME##MinMaxFilter() {} \
virtual void* GetMin() override { return &min_; } \
virtual void* GetMax() override { return &max_; } \
virtual bool GetCastIntMinMax( \
const ColumnType& type, int64_t* out_min, int64_t* out_max) override; \
virtual PrimitiveType type() override; \
virtual void Insert(void* val) override; \
virtual bool AlwaysTrue() const override { return false; } \
virtual bool AlwaysFalse() const override { \
return min_ == std::numeric_limits<TYPE>::max() \
&& max_ == std::numeric_limits<TYPE>::lowest(); \
} \
virtual void ToThrift(TMinMaxFilter* thrift) const override; \
virtual std::string DebugString() const override; \
static void Or(const TMinMaxFilter& in, TMinMaxFilter* out); \
static void Copy(const TMinMaxFilter& in, TMinMaxFilter* out); \
static const char* LLVM_CLASS_NAME; \
\
private: \
TYPE min_; \
TYPE max_; \
};
NUMERIC_MIN_MAX_FILTER(Bool, bool);
NUMERIC_MIN_MAX_FILTER(TinyInt, int8_t);
NUMERIC_MIN_MAX_FILTER(SmallInt, int16_t);
NUMERIC_MIN_MAX_FILTER(Int, int32_t);
NUMERIC_MIN_MAX_FILTER(BigInt, int64_t);
NUMERIC_MIN_MAX_FILTER(Float, float);
NUMERIC_MIN_MAX_FILTER(Double, double);
class StringMinMaxFilter : public MinMaxFilter {
public:
StringMinMaxFilter(MemTracker* mem_tracker)
: mem_pool_(mem_tracker),
min_buffer_(&mem_pool_),
max_buffer_(&mem_pool_),
always_false_(true),
always_true_(false) {}
StringMinMaxFilter(const TMinMaxFilter& thrift, MemTracker* mem_tracker);
virtual ~StringMinMaxFilter() {}
virtual void Close() override { mem_pool_.FreeAll(); }
virtual void* GetMin() override { return &min_; }
virtual void* GetMax() override { return &max_; }
virtual PrimitiveType type() override;
virtual void Insert(void* val) override;
virtual bool AlwaysTrue() const override { return always_true_; }
virtual bool AlwaysFalse() const override { return always_false_; }
/// Copies the values pointed to by 'min_'/'max_' into 'min_buffer_'/'max_buffer_',
/// truncating them if necessary.
virtual void MaterializeValues() override;
virtual void ToThrift(TMinMaxFilter* thrift) const override;
virtual std::string DebugString() const override;
static void Or(const TMinMaxFilter& in, TMinMaxFilter* out);
static void Copy(const TMinMaxFilter& in, TMinMaxFilter* out);
/// Struct name in LLVM IR.
static const char* LLVM_CLASS_NAME;
private:
/// Copies the contents of 'value' into 'buffer', up to 'len', and reassignes 'value' to
/// point to 'buffer'. If an oom is hit, disables the filter by setting 'always_true_'
/// to true.
void CopyToBuffer(StringBuffer* buffer, StringValue* value, int64_t len);
/// Sets 'always_true_' to true and clears the values of 'min_', 'max_', 'min_buffer_',
/// and 'max_buffer_'.
void SetAlwaysTrue();
/// The maximum length of string to store in 'min_str_' or 'max_str_'. Strings inserted
/// into this filter that are longer than this will be truncated.
static const int MAX_BOUND_LENGTH;
/// MemPool that 'min_buffer_'/'max_buffer_' are allocated from.
MemPool mem_pool_;
/// The min/max values. After a call to MaterializeValues() these will point to
/// 'min_buffer_'/'max_buffer_'.
StringValue min_;
StringValue max_;
/// Local buffers to copy min/max data into. If Insert() was called and 'min_'/'max_'
/// was updated, these will be empty until MaterializeValues() is called.
StringBuffer min_buffer_;
StringBuffer max_buffer_;
/// True if no rows have been inserted.
bool always_false_;
bool always_true_;
};
class TimestampMinMaxFilter : public MinMaxFilter {
public:
TimestampMinMaxFilter() { always_false_ = true; }
TimestampMinMaxFilter(const TMinMaxFilter& thrift);
virtual ~TimestampMinMaxFilter() {}
virtual void* GetMin() override { return &min_; }
virtual void* GetMax() override { return &max_; }
virtual PrimitiveType type() override;
virtual void Insert(void* val) override;
virtual bool AlwaysTrue() const override { return false; }
virtual bool AlwaysFalse() const override { return always_false_; }
virtual void ToThrift(TMinMaxFilter* thrift) const override;
virtual std::string DebugString() const override;
static void Or(const TMinMaxFilter& in, TMinMaxFilter* out);
static void Copy(const TMinMaxFilter& in, TMinMaxFilter* out);
/// Struct name in LLVM IR.
static const char* LLVM_CLASS_NAME;
private:
TimestampValue min_;
TimestampValue max_;
/// True if no rows have been inserted.
bool always_false_;
};
#define DECIMAL_SIZE_4BYTE 4
#define DECIMAL_SIZE_8BYTE 8
#define DECIMAL_SIZE_16BYTE 16
// body of GetMin and GetMax defined below
#pragma push_macro("GET_MINMAX")
#define GET_MINMAX(MIN_OR_MAX) \
do { \
switch (size_) { \
case DECIMAL_SIZE_4BYTE: \
return &(MIN_OR_MAX##4_); \
break; \
case DECIMAL_SIZE_8BYTE: \
return &(MIN_OR_MAX##8_); \
break; \
case DECIMAL_SIZE_16BYTE: \
return &(MIN_OR_MAX##16_); \
break; \
default: \
DCHECK(false) << "Unknown decimal size: " << size_; \
} \
} while (false)
// Decimal data can be stored using 4 bytes, 8 bytes or 16 bytes. The filter
// is for a particular column, and the size needed is fixed based on the
// column specification. So, a union is used to store the min and max value.
// The precision comes in as input. Based on the precision the size is found and
// the respective variable will be used to store the value. The code is
// macro-ized to handle different sized decimals.
class DecimalMinMaxFilter : public MinMaxFilter {
public:
DecimalMinMaxFilter(int precision)
: size_(ColumnType::GetDecimalByteSize(precision)), always_false_(true) {}
DecimalMinMaxFilter(const TMinMaxFilter& thrift, int precision);
virtual ~DecimalMinMaxFilter() {}
virtual void* GetMin() override {
GET_MINMAX(min);
return nullptr;
}
virtual void* GetMax() override {
GET_MINMAX(max);
return nullptr;
}
virtual void Insert(void* val) override;
virtual PrimitiveType type() override;
virtual bool AlwaysTrue() const override { return false; }
virtual bool AlwaysFalse() const override { return always_false_; }
virtual void ToThrift(TMinMaxFilter* thrift) const override;
virtual std::string DebugString() const override;
static void Or(const TMinMaxFilter& in, TMinMaxFilter* out, int precision);
static void Copy(const TMinMaxFilter& in, TMinMaxFilter* out);
void Insert4(void* val);
void Insert8(void* val);
void Insert16(void* val);
/// Struct name in LLVM IR.
static const char* LLVM_CLASS_NAME;
private:
const int size_;
union {
Decimal16Value min16_;
Decimal8Value min8_;
Decimal4Value min4_;
};
union {
Decimal16Value max16_;
Decimal8Value max8_;
Decimal4Value max4_;
};
/// True if no rows have been inserted.
bool always_false_;
};
#pragma pop_macro("GET_MINMAX")
}
#endif