blob: ac6ed60848b8d22a24439b5d8d6d9eabd8654aa4 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <gen_cpp/Metrics_types.h>
#include <gen_cpp/PaloInternalService_types.h>
#include <glog/logging.h>
#include <stddef.h>
#include <boost/container/detail/std_fwd.hpp>
#include <boost/lexical_cast.hpp>
#include <cstdint>
#include <iterator>
#include <limits>
#include <memory>
#include <set>
#include <sstream>
#include <string>
#include <utility>
#include <variant>
#include <vector>
#include "common/compare.h"
#include "common/status.h"
#include "exec/olap_utils.h"
#include "olap/filter_olap_param.h"
#include "olap/olap_common.h"
#include "olap/olap_tuple.h"
#include "runtime/define_primitive_type.h"
#include "runtime/primitive_type.h"
#include "runtime/type_limit.h"
#include "util/runtime_profile.h"
#include "vec/core/types.h"
#include "vec/io/io_helper.h"
#include "vec/runtime/ipv4_value.h"
#include "vec/runtime/ipv6_value.h"
#include "vec/runtime/time_value.h"
#include "vec/runtime/vdatetime_value.h"
namespace doris {
#include "common/compile_check_begin.h"
template <PrimitiveType primitive_type, class T>
std::string cast_to_string(T value, int scale) {
if constexpr (primitive_type == TYPE_DECIMAL32) {
return ((vectorized::Decimal<int32_t>)value).to_string(scale);
} else if constexpr (primitive_type == TYPE_DECIMAL64) {
return ((vectorized::Decimal<int64_t>)value).to_string(scale);
} else if constexpr (primitive_type == TYPE_DECIMAL128I) {
return ((vectorized::Decimal<int128_t>)value).to_string(scale);
} else if constexpr (primitive_type == TYPE_DECIMAL256) {
return ((vectorized::Decimal<wide::Int256>)value).to_string(scale);
} else if constexpr (primitive_type == TYPE_TINYINT) {
return std::to_string(static_cast<int>(value));
} else if constexpr (primitive_type == TYPE_LARGEINT) {
return vectorized::int128_to_string(value);
} else if constexpr (primitive_type == TYPE_DATETIMEV2) {
auto datetimev2_val = static_cast<DateV2Value<DateTimeV2ValueType>>(value);
char buf[30];
datetimev2_val.to_string(buf);
std::stringstream ss;
ss << buf;
return ss.str();
} else if constexpr (primitive_type == TYPE_TIMESTAMPTZ) {
auto timestamptz_val = static_cast<TimestampTzValue>(value);
return timestamptz_val.to_string(cctz::utc_time_zone(), scale);
} else if constexpr (primitive_type == TYPE_TIMEV2) {
return TimeValue::to_string(value, scale);
} else if constexpr (primitive_type == TYPE_IPV4) {
return IPv4Value::to_string(value);
} else if constexpr (primitive_type == TYPE_IPV6) {
return IPv6Value::to_string(value);
} else {
return boost::lexical_cast<std::string>(value);
}
}
/**
* @brief Column's value range
**/
template <PrimitiveType primitive_type>
class ColumnValueRange {
public:
using CppType = std::conditional_t<primitive_type == TYPE_HLL, StringRef,
typename PrimitiveTypeTraits<primitive_type>::CppType>;
using SetType = std::set<CppType, doris::Less<CppType>>;
using IteratorType = typename SetType::iterator;
ColumnValueRange();
ColumnValueRange(std::string col_name, bool is_nullable_col, int precision, int scale);
// should add fixed value before add range
Status add_fixed_value(const CppType& value);
// should remove fixed value after add fixed value
void remove_fixed_value(const CppType& value);
Status add_range(SQLFilterOp op, CppType value);
bool is_fixed_value_range() const;
bool is_scope_value_range() const;
bool is_empty_value_range() const;
bool is_fixed_value_convertible() const;
bool is_range_value_convertible() const;
void convert_to_range_value();
bool convert_to_avg_range_value(std::vector<OlapTuple>& begin_scan_keys,
std::vector<OlapTuple>& end_scan_keys, bool& begin_include,
bool& end_include, int32_t max_scan_key_num);
bool convert_to_close_range(std::vector<OlapTuple>& begin_scan_keys,
std::vector<OlapTuple>& end_scan_keys, bool& begin_include,
bool& end_include);
constexpr bool is_reject_split_type() const { return _is_reject_split_type; }
void intersection(ColumnValueRange<primitive_type>& range);
void set_empty_value_range() {
_fixed_values.clear();
_low_value = TYPE_MAX;
_high_value = TYPE_MIN;
_contain_null = false;
}
const SetType& get_fixed_value_set() const { return _fixed_values; }
CppType get_range_max_value() const { return _high_value; }
CppType get_range_min_value() const { return _low_value; }
const CppType* get_range_max_value_ptr() const { return &_high_value; }
const CppType* get_range_min_value_ptr() const { return &_low_value; }
SQLFilterOp get_range_high_op() const { return _high_op; }
SQLFilterOp get_range_low_op() const { return _low_op; }
bool is_low_value_minimum() const { return Compare::equal(_low_value, TYPE_MIN); }
bool is_high_value_maximum() const { return Compare::equal(_high_value, TYPE_MAX); }
bool is_begin_include() const { return _low_op == FILTER_LARGER_OR_EQUAL; }
bool is_end_include() const { return _high_op == FILTER_LESS_OR_EQUAL; }
PrimitiveType type() const { return _column_type; }
const std::string& column_name() const { return _column_name; }
bool contain_null() const { return _contain_null; }
size_t get_fixed_value_size() const { return _fixed_values.size(); }
void set_whole_value_range() {
_fixed_values.clear();
_low_value = TYPE_MIN;
_high_value = TYPE_MAX;
_low_op = FILTER_LARGER_OR_EQUAL;
_high_op = FILTER_LESS_OR_EQUAL;
_contain_null = _is_nullable_col;
}
bool is_whole_value_range() const {
DCHECK(_is_nullable_col || !contain_null())
<< "Non-nullable column cannot contains null value";
return _fixed_values.empty() && Compare::equal(_low_value, TYPE_MIN) &&
Compare::equal(_high_value, TYPE_MAX) && _low_op == FILTER_LARGER_OR_EQUAL &&
_high_op == FILTER_LESS_OR_EQUAL && _is_nullable_col == contain_null();
}
// only two case will set range contain null, call by temp_range in olap scan node
// 'is null' and 'is not null'
// 1. if the pred is 'is null' means the range should be
// empty in fixed_range and _high_value < _low_value
// 2. if the pred is 'is not null' means the range should be whole range and
// 'is not null' be effective
void set_contain_null(bool contain_null) {
if (contain_null) {
set_empty_value_range();
} else {
set_whole_value_range();
}
_contain_null = _is_nullable_col && contain_null;
}
int precision() const { return _precision; }
int scale() const { return _scale; }
static void add_fixed_value_range(ColumnValueRange<primitive_type>& range,
const CppType* value) {
static_cast<void>(range.add_fixed_value(*value));
}
static void remove_fixed_value_range(ColumnValueRange<primitive_type>& range,
const CppType* value) {
range.remove_fixed_value(*value);
}
static void add_value_range(ColumnValueRange<primitive_type>& range, SQLFilterOp op,
const CppType* value) {
static_cast<void>(range.add_range(op, *value));
}
static ColumnValueRange<primitive_type> create_empty_column_value_range(bool is_nullable_col,
int precision,
int scale) {
return ColumnValueRange<primitive_type>("", TYPE_MAX, TYPE_MIN, is_nullable_col, false,
precision, scale);
}
protected:
bool is_in_range(const CppType& value);
private:
ColumnValueRange(std::string col_name, const CppType& min, const CppType& max,
bool contain_null);
ColumnValueRange(std::string col_name, const CppType& min, const CppType& max,
bool is_nullable_col, bool contain_null, int precision, int scale);
const static CppType TYPE_MIN; // Column type's min value
const static CppType TYPE_MAX; // Column type's max value
std::string _column_name;
PrimitiveType _column_type; // Column type (eg: TINYINT,SMALLINT,INT,BIGINT)
CppType _low_value; // Column's low value, closed interval at left
CppType _high_value; // Column's high value, open interval at right
SQLFilterOp _low_op;
SQLFilterOp _high_op;
SetType _fixed_values; // Column's fixed int value
bool _is_nullable_col;
bool _contain_null;
int _precision;
int _scale;
static constexpr bool _is_reject_split_type =
primitive_type == PrimitiveType::TYPE_FLOAT ||
primitive_type == PrimitiveType::TYPE_DOUBLE ||
primitive_type == PrimitiveType::TYPE_LARGEINT ||
primitive_type == PrimitiveType::TYPE_DECIMALV2 ||
primitive_type == PrimitiveType::TYPE_HLL ||
primitive_type == PrimitiveType::TYPE_VARCHAR ||
primitive_type == PrimitiveType::TYPE_CHAR ||
primitive_type == PrimitiveType::TYPE_STRING ||
primitive_type == PrimitiveType::TYPE_BOOLEAN ||
primitive_type == PrimitiveType::TYPE_DATETIME ||
primitive_type == PrimitiveType::TYPE_DATETIMEV2 ||
primitive_type == PrimitiveType::TYPE_TIMESTAMPTZ ||
primitive_type == PrimitiveType::TYPE_DECIMAL256;
};
template <>
const typename ColumnValueRange<TYPE_FLOAT>::CppType ColumnValueRange<TYPE_FLOAT>::TYPE_MIN;
template <>
const typename ColumnValueRange<TYPE_FLOAT>::CppType ColumnValueRange<TYPE_FLOAT>::TYPE_MAX;
template <>
const typename ColumnValueRange<TYPE_DOUBLE>::CppType ColumnValueRange<TYPE_DOUBLE>::TYPE_MIN;
template <>
const typename ColumnValueRange<TYPE_DOUBLE>::CppType ColumnValueRange<TYPE_DOUBLE>::TYPE_MAX;
class OlapScanKeys {
public:
// TODO(gabriel): use ColumnPredicate to extend scan key
template <PrimitiveType primitive_type>
Status extend_scan_key(ColumnValueRange<primitive_type>& range, int32_t max_scan_key_num,
bool* exact_value, bool* eos, bool* should_break);
Status get_key_range(std::vector<std::unique_ptr<OlapScanRange>>* key_range);
bool has_range_value() const { return _has_range_value; }
void clear() {
_has_range_value = false;
_begin_scan_keys.clear();
_end_scan_keys.clear();
}
std::string debug_string() {
std::stringstream ss;
DCHECK(_begin_scan_keys.size() == _end_scan_keys.size());
ss << "ScanKeys:";
for (int i = 0; i < _begin_scan_keys.size(); ++i) {
ss << "ScanKey=" << (_begin_include ? "[" : "(") << _begin_scan_keys[i] << " : "
<< _end_scan_keys[i] << (_end_include ? "]" : ")");
}
return ss.str();
}
size_t size() {
DCHECK(_begin_scan_keys.size() == _end_scan_keys.size());
return _begin_scan_keys.size();
}
void set_is_convertible(bool is_convertible) { _is_convertible = is_convertible; }
private:
std::vector<OlapTuple> _begin_scan_keys;
std::vector<OlapTuple> _end_scan_keys;
bool _has_range_value = false;
bool _begin_include = false;
bool _end_include = false;
bool _is_convertible = false;
};
using ColumnValueRangeType = std::variant<
ColumnValueRange<TYPE_TINYINT>, ColumnValueRange<TYPE_SMALLINT>, ColumnValueRange<TYPE_INT>,
ColumnValueRange<TYPE_BIGINT>, ColumnValueRange<TYPE_LARGEINT>,
ColumnValueRange<TYPE_FLOAT>, ColumnValueRange<TYPE_DOUBLE>, ColumnValueRange<TYPE_IPV4>,
ColumnValueRange<TYPE_IPV6>, ColumnValueRange<TYPE_CHAR>, ColumnValueRange<TYPE_VARCHAR>,
ColumnValueRange<TYPE_STRING>, ColumnValueRange<TYPE_DATE>, ColumnValueRange<TYPE_DATEV2>,
ColumnValueRange<TYPE_DATETIME>, ColumnValueRange<TYPE_DATETIMEV2>,
ColumnValueRange<TYPE_TIMESTAMPTZ>, ColumnValueRange<TYPE_DECIMALV2>,
ColumnValueRange<TYPE_BOOLEAN>, ColumnValueRange<TYPE_HLL>,
ColumnValueRange<TYPE_DECIMAL32>, ColumnValueRange<TYPE_DECIMAL64>,
ColumnValueRange<TYPE_DECIMAL128I>, ColumnValueRange<TYPE_DECIMAL256>>;
template <PrimitiveType primitive_type>
const typename ColumnValueRange<primitive_type>::CppType
ColumnValueRange<primitive_type>::TYPE_MIN =
type_limit<typename ColumnValueRange<primitive_type>::CppType>::min();
template <PrimitiveType primitive_type>
const typename ColumnValueRange<primitive_type>::CppType
ColumnValueRange<primitive_type>::TYPE_MAX =
type_limit<typename ColumnValueRange<primitive_type>::CppType>::max();
template <PrimitiveType primitive_type>
ColumnValueRange<primitive_type>::ColumnValueRange()
: _column_type(INVALID_TYPE), _precision(-1), _scale(-1) {}
template <PrimitiveType primitive_type>
ColumnValueRange<primitive_type>::ColumnValueRange(std::string col_name, const CppType& min,
const CppType& max, bool contain_null)
: _column_name(std::move(col_name)),
_column_type(primitive_type),
_low_value(min),
_high_value(max),
_low_op(FILTER_LARGER_OR_EQUAL),
_high_op(FILTER_LESS_OR_EQUAL),
_is_nullable_col(true),
_contain_null(contain_null),
_precision(-1),
_scale(-1) {}
template <PrimitiveType primitive_type>
ColumnValueRange<primitive_type>::ColumnValueRange(std::string col_name, const CppType& min,
const CppType& max, bool is_nullable_col,
bool contain_null, int precision, int scale)
: _column_name(std::move(col_name)),
_column_type(primitive_type),
_low_value(min),
_high_value(max),
_low_op(FILTER_LARGER_OR_EQUAL),
_high_op(FILTER_LESS_OR_EQUAL),
_is_nullable_col(is_nullable_col),
_contain_null(is_nullable_col && contain_null),
_precision(precision),
_scale(scale) {}
template <PrimitiveType primitive_type>
ColumnValueRange<primitive_type>::ColumnValueRange(std::string col_name, bool is_nullable_col,
int precision, int scale)
: ColumnValueRange(std::move(col_name), TYPE_MIN, TYPE_MAX, is_nullable_col,
is_nullable_col, precision, scale) {}
template <PrimitiveType primitive_type>
Status ColumnValueRange<primitive_type>::add_fixed_value(const CppType& value) {
if (INVALID_TYPE == _column_type) {
return Status::InternalError("AddFixedValue failed, Invalid type");
}
_fixed_values.insert(value);
_contain_null = false;
_high_value = TYPE_MIN;
_low_value = TYPE_MAX;
return Status::OK();
}
template <PrimitiveType primitive_type>
void ColumnValueRange<primitive_type>::remove_fixed_value(const CppType& value) {
_fixed_values.erase(value);
}
template <PrimitiveType primitive_type>
bool ColumnValueRange<primitive_type>::is_fixed_value_range() const {
return _fixed_values.size() != 0;
}
template <PrimitiveType primitive_type>
bool ColumnValueRange<primitive_type>::is_scope_value_range() const {
return Compare::greater(_high_value, _low_value);
}
template <PrimitiveType primitive_type>
bool ColumnValueRange<primitive_type>::is_empty_value_range() const {
if (INVALID_TYPE == _column_type) {
return true;
}
return (!is_fixed_value_range() && !is_scope_value_range() && !contain_null());
}
template <PrimitiveType primitive_type>
bool ColumnValueRange<primitive_type>::is_fixed_value_convertible() const {
if (is_fixed_value_range()) {
return false;
}
if (!is_enumeration_type(_column_type)) {
return false;
}
return true;
}
template <PrimitiveType primitive_type>
bool ColumnValueRange<primitive_type>::is_range_value_convertible() const {
if (!is_fixed_value_range()) {
return false;
}
if (TYPE_NULL == _column_type || TYPE_BOOLEAN == _column_type) {
return false;
}
return true;
}
// The return value indicates whether eos.
template <PrimitiveType primitive_type>
bool ColumnValueRange<primitive_type>::convert_to_close_range(
std::vector<OlapTuple>& begin_scan_keys, std::vector<OlapTuple>& end_scan_keys,
bool& begin_include, bool& end_include) {
if constexpr (!_is_reject_split_type) {
begin_include = true;
end_include = true;
bool is_empty = false;
if (!is_begin_include()) {
if (Compare::equal(_low_value, TYPE_MAX)) {
is_empty = true;
} else {
++_low_value;
}
}
if (!is_end_include()) {
if (Compare::equal(_high_value, TYPE_MIN)) {
is_empty = true;
} else {
--_high_value;
}
}
if (Compare::less(_high_value, _low_value)) {
is_empty = true;
}
if (is_empty && !contain_null()) {
begin_scan_keys.clear();
end_scan_keys.clear();
return true;
}
}
return false;
}
// The return value indicates whether the split result is range or fixed value.
template <PrimitiveType primitive_type>
bool ColumnValueRange<primitive_type>::convert_to_avg_range_value(
std::vector<OlapTuple>& begin_scan_keys, std::vector<OlapTuple>& end_scan_keys,
bool& begin_include, bool& end_include, int32_t max_scan_key_num) {
if constexpr (!_is_reject_split_type) {
CppType min_value = get_range_min_value();
CppType max_value = get_range_max_value();
if constexpr (primitive_type == PrimitiveType::TYPE_DATE) {
min_value.set_type(TimeType::TIME_DATE);
max_value.set_type(TimeType::TIME_DATE);
}
auto empty_range_only_null = Compare::greater(min_value, max_value);
if (empty_range_only_null) {
// Not contain null will be disposed in `convert_to_close_range`, return eos.
DCHECK(contain_null());
}
auto no_split = [&]() -> bool {
begin_scan_keys.emplace_back();
begin_scan_keys.back().add_value(
cast_to_string<primitive_type, CppType>(get_range_min_value(), scale()),
contain_null());
end_scan_keys.emplace_back();
end_scan_keys.back().add_value(
cast_to_string<primitive_type, CppType>(get_range_max_value(), scale()),
empty_range_only_null ? true : false);
return true;
};
if (empty_range_only_null || max_scan_key_num == 1) {
return no_split();
}
auto cast = [](const CppType& value) {
if constexpr (primitive_type == PrimitiveType::TYPE_DATE ||
primitive_type == PrimitiveType::TYPE_DATEV2) {
return value;
} else {
return (int128_t)value;
}
};
// When CppType is date, we can not convert it to integer number and calculate distance.
// In other case, we convert element to int128 to avoit overflow.
int128_t step_size_may_overflow = (cast(max_value) - min_value) / max_scan_key_num;
constexpr size_t MAX_STEP_SIZE = 1 << 20;
// When the step size is too large, the range is easy to not really contain data.
if (step_size_may_overflow > MAX_STEP_SIZE) {
return no_split();
}
int step_size = cast_set<int>(step_size_may_overflow);
int real_step_size = 0;
// Add null key if contain null, must do after no_split check
if (contain_null()) {
begin_scan_keys.emplace_back();
begin_scan_keys.back().add_null();
end_scan_keys.emplace_back();
end_scan_keys.back().add_null();
}
while (true) {
begin_scan_keys.emplace_back();
begin_scan_keys.back().add_value(
cast_to_string<primitive_type, CppType>(min_value, scale()));
if (cast(max_value) - min_value < step_size) {
min_value = max_value;
} else {
min_value += step_size;
}
end_scan_keys.emplace_back();
end_scan_keys.back().add_value(
cast_to_string<primitive_type, CppType>(min_value, scale()));
if (Compare::equal(min_value, max_value)) {
break;
}
++min_value;
++real_step_size;
if (real_step_size > MAX_STEP_SIZE) {
throw Exception(Status::InternalError(
"convert_to_avg_range_value meet error. type={}, step_size={}, "
"min_value={}, max_value={}",
int(primitive_type), step_size,
cast_to_string<primitive_type, CppType>(min_value, scale()),
cast_to_string<primitive_type, CppType>(max_value, scale())));
}
}
return step_size != 0;
}
return false;
}
template <PrimitiveType primitive_type>
void ColumnValueRange<primitive_type>::convert_to_range_value() {
if (!is_range_value_convertible()) {
return;
}
if (!_fixed_values.empty()) {
_low_value = *_fixed_values.begin();
_low_op = FILTER_LARGER_OR_EQUAL;
_high_value = *_fixed_values.rbegin();
_high_op = FILTER_LESS_OR_EQUAL;
_fixed_values.clear();
}
}
template <PrimitiveType primitive_type>
Status ColumnValueRange<primitive_type>::add_range(SQLFilterOp op, const CppType value) {
if (INVALID_TYPE == _column_type) {
return Status::InternalError("AddRange failed, Invalid type");
}
// add range means range should not contain null
_contain_null = false;
if (is_fixed_value_range()) {
std::pair<IteratorType, IteratorType> bound_pair = _fixed_values.equal_range(value);
switch (op) {
case FILTER_LARGER: {
_fixed_values.erase(_fixed_values.begin(), bound_pair.second);
break;
}
case FILTER_LARGER_OR_EQUAL: {
_fixed_values.erase(_fixed_values.begin(), bound_pair.first);
break;
}
case FILTER_LESS: {
if (bound_pair.first == _fixed_values.find(value)) {
_fixed_values.erase(bound_pair.first, _fixed_values.end());
} else {
_fixed_values.erase(bound_pair.second, _fixed_values.end());
}
break;
}
case FILTER_LESS_OR_EQUAL: {
_fixed_values.erase(bound_pair.second, _fixed_values.end());
break;
}
default: {
return Status::InternalError("Add Range fail! Unsupported SQLFilterOp.");
}
}
_high_value = TYPE_MIN;
_low_value = TYPE_MAX;
} else {
if (Compare::greater(_high_value, _low_value)) {
switch (op) {
case FILTER_LARGER: {
if (Compare::greater_equal(value, _low_value)) {
_low_value = value;
_low_op = op;
}
break;
}
case FILTER_LARGER_OR_EQUAL: {
if (Compare::greater(value, _low_value)) {
_low_value = value;
_low_op = op;
}
break;
}
case FILTER_LESS: {
if (Compare::less_equal(value, _high_value)) {
_high_value = value;
_high_op = op;
}
break;
}
case FILTER_LESS_OR_EQUAL: {
if (Compare::less(value, _high_value)) {
_high_value = value;
_high_op = op;
}
break;
}
default: {
return Status::InternalError("Add Range fail! Unsupported SQLFilterOp.");
}
}
}
if (FILTER_LARGER_OR_EQUAL == _low_op && FILTER_LESS_OR_EQUAL == _high_op &&
Compare::equal(_high_value, _low_value)) {
RETURN_IF_ERROR(add_fixed_value(_high_value));
_high_value = TYPE_MIN;
_low_value = TYPE_MAX;
}
}
return Status::OK();
}
template <PrimitiveType primitive_type>
bool ColumnValueRange<primitive_type>::is_in_range(const CppType& value) {
switch (_high_op) {
case FILTER_LESS: {
switch (_low_op) {
case FILTER_LARGER: {
return Compare::less(value, _high_value) && Compare::greater(value, _low_value);
}
case FILTER_LARGER_OR_EQUAL: {
return Compare::less(value, _high_value) && Compare::greater_equal(value, _low_value);
}
default: {
DCHECK(false);
}
}
break;
}
case FILTER_LESS_OR_EQUAL: {
switch (_low_op) {
case FILTER_LARGER: {
return Compare::less_equal(value, _high_value) && Compare::greater(value, _low_value);
}
case FILTER_LARGER_OR_EQUAL: {
return Compare::less_equal(value, _high_value) &&
Compare::greater_equal(value, _low_value);
}
default: {
DCHECK(false);
}
}
}
default: {
DCHECK(false);
}
}
return false;
}
template <PrimitiveType primitive_type>
void ColumnValueRange<primitive_type>::intersection(ColumnValueRange<primitive_type>& range) {
// 1. clear if column type not match
if (_column_type != range._column_type) {
set_empty_value_range();
}
// 2. clear if any range is empty
if (is_empty_value_range() || range.is_empty_value_range()) {
set_empty_value_range();
}
SetType result_values;
// 3. fixed_value intersection, fixed value range do not contain null
if (is_fixed_value_range() || range.is_fixed_value_range()) {
if (is_fixed_value_range() && range.is_fixed_value_range()) {
set_intersection(_fixed_values.begin(), _fixed_values.end(),
range._fixed_values.begin(), range._fixed_values.end(),
std::inserter(result_values, result_values.begin()));
} else if (is_fixed_value_range() && !range.is_fixed_value_range()) {
IteratorType iter = _fixed_values.begin();
while (iter != _fixed_values.end()) {
if (range.is_in_range(*iter)) {
result_values.insert(*iter);
}
++iter;
}
} else if (!is_fixed_value_range() && range.is_fixed_value_range()) {
IteratorType iter = range._fixed_values.begin();
while (iter != range._fixed_values.end()) {
if (this->is_in_range(*iter)) {
result_values.insert(*iter);
}
++iter;
}
}
if (!result_values.empty()) {
_fixed_values = std::move(result_values);
_contain_null = false;
_high_value = TYPE_MIN;
_low_value = TYPE_MAX;
} else {
set_empty_value_range();
}
} else {
if (contain_null() && range.contain_null()) {
// if both is_whole_range to keep the same, else set_contain_null
if (!is_whole_value_range() || !range.is_whole_value_range()) {
set_contain_null(true);
}
} else {
static_cast<void>(add_range(range._high_op, range._high_value));
static_cast<void>(add_range(range._low_op, range._low_value));
}
}
}
template <PrimitiveType primitive_type>
Status OlapScanKeys::extend_scan_key(ColumnValueRange<primitive_type>& range,
int32_t max_scan_key_num, bool* exact_value, bool* eos,
bool* should_break) {
using CppType = std::conditional_t<primitive_type == TYPE_HLL, StringRef,
typename PrimitiveTypeTraits<primitive_type>::CppType>;
using ConstIterator = typename ColumnValueRange<primitive_type>::SetType::const_iterator;
// 1. clear ScanKey if some column range is empty
if (range.is_empty_value_range()) {
_begin_scan_keys.clear();
_end_scan_keys.clear();
return Status::OK();
}
// 2. stop extend ScanKey when it's already extend a range value
if (_has_range_value) {
return Status::OK();
}
//if a column doesn't have any predicate, we will try converting the range to fixed values
auto scan_keys_size = _begin_scan_keys.empty() ? 1 : _begin_scan_keys.size();
if (range.is_fixed_value_range()) {
if (range.get_fixed_value_size() > max_scan_key_num / scan_keys_size) {
if (range.is_range_value_convertible()) {
range.convert_to_range_value();
*exact_value = false;
} else {
*should_break = true;
return Status::OK();
}
}
} else {
if (_begin_scan_keys.empty() && range.is_fixed_value_convertible() && _is_convertible &&
!range.is_reject_split_type()) {
*eos |= range.convert_to_close_range(_begin_scan_keys, _end_scan_keys, _begin_include,
_end_include);
if (!(*eos) &&
range.convert_to_avg_range_value(_begin_scan_keys, _end_scan_keys, _begin_include,
_end_include, max_scan_key_num)) {
_has_range_value = true;
}
return Status::OK();
}
}
// 3.1 extend ScanKey with FixedValueRange
if (range.is_fixed_value_range()) {
// 3.1.1 construct num of fixed value ScanKey (begin_key == end_key)
if (_begin_scan_keys.empty()) {
auto fixed_value_set = range.get_fixed_value_set();
ConstIterator iter = fixed_value_set.begin();
for (; iter != fixed_value_set.end(); ++iter) {
_begin_scan_keys.emplace_back();
_begin_scan_keys.back().add_value(
cast_to_string<primitive_type, CppType>(*iter, range.scale()));
_end_scan_keys.emplace_back();
_end_scan_keys.back().add_value(
cast_to_string<primitive_type, CppType>(*iter, range.scale()));
}
if (range.contain_null()) {
_begin_scan_keys.emplace_back();
_begin_scan_keys.back().add_null();
_end_scan_keys.emplace_back();
_end_scan_keys.back().add_null();
}
} // 3.1.2 produces the Cartesian product of ScanKey and fixed_value
else {
auto fixed_value_set = range.get_fixed_value_set();
size_t original_key_range_size = _begin_scan_keys.size();
for (int i = 0; i < original_key_range_size; ++i) {
OlapTuple start_base_key_range = _begin_scan_keys[i];
OlapTuple end_base_key_range = _end_scan_keys[i];
ConstIterator iter = fixed_value_set.begin();
for (; iter != fixed_value_set.end(); ++iter) {
// alter the first ScanKey in original place
if (iter == fixed_value_set.begin()) {
_begin_scan_keys[i].add_value(
cast_to_string<primitive_type, CppType>(*iter, range.scale()));
_end_scan_keys[i].add_value(
cast_to_string<primitive_type, CppType>(*iter, range.scale()));
} // append follow ScanKey
else {
_begin_scan_keys.push_back(start_base_key_range);
_begin_scan_keys.back().add_value(
cast_to_string<primitive_type, CppType>(*iter, range.scale()));
_end_scan_keys.push_back(end_base_key_range);
_end_scan_keys.back().add_value(
cast_to_string<primitive_type, CppType>(*iter, range.scale()));
}
}
if (range.contain_null()) {
_begin_scan_keys.push_back(start_base_key_range);
_begin_scan_keys.back().add_null();
_end_scan_keys.push_back(end_base_key_range);
_end_scan_keys.back().add_null();
}
}
}
_begin_include = true;
_end_include = true;
} // Extend ScanKey with range value
else {
_has_range_value = true;
/// if max < min, this range should only contains a null value.
if (Compare::less(range.get_range_max_value(), range.get_range_min_value())) {
CHECK(range.contain_null());
if (_begin_scan_keys.empty()) {
_begin_scan_keys.emplace_back();
_begin_scan_keys.back().add_null();
_end_scan_keys.emplace_back();
_end_scan_keys.back().add_null();
} else {
for (int i = 0; i < _begin_scan_keys.size(); ++i) {
_begin_scan_keys[i].add_null();
_end_scan_keys[i].add_null();
}
}
} else if (_begin_scan_keys.empty()) {
_begin_scan_keys.emplace_back();
_begin_scan_keys.back().add_value(cast_to_string<primitive_type, CppType>(
range.get_range_min_value(), range.scale()),
range.contain_null());
_end_scan_keys.emplace_back();
_end_scan_keys.back().add_value(cast_to_string<primitive_type, CppType>(
range.get_range_max_value(), range.scale()));
} else {
for (int i = 0; i < _begin_scan_keys.size(); ++i) {
_begin_scan_keys[i].add_value(cast_to_string<primitive_type, CppType>(
range.get_range_min_value(), range.scale()),
range.contain_null());
}
for (int i = 0; i < _end_scan_keys.size(); ++i) {
_end_scan_keys[i].add_value(cast_to_string<primitive_type, CppType>(
range.get_range_max_value(), range.scale()));
}
}
_begin_include = range.is_begin_include();
_end_include = range.is_end_include();
}
return Status::OK();
}
#include "common/compile_check_end.h"
} // namespace doris