| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| #include "kudu/common/column_predicate.h" |
| |
| #include <algorithm> |
| #include <cstring> |
| #include <iterator> |
| #include <type_traits> |
| |
| #include "kudu/common/columnblock.h" |
| #include "kudu/common/key_util.h" |
| #include "kudu/common/rowblock.h" |
| #include "kudu/common/schema.h" |
| #include "kudu/common/types.h" |
| #include "kudu/gutil/port.h" |
| #include "kudu/gutil/strings/join.h" |
| #include "kudu/gutil/strings/substitute.h" |
| #include "kudu/util/alignment.h" |
| #include "kudu/util/bitmap.h" |
| #include "kudu/util/logging.h" |
| #include "kudu/util/memory/arena.h" |
| |
| using std::move; |
| using std::string; |
| using std::vector; |
| |
| namespace kudu { |
| |
| ColumnPredicate::ColumnPredicate(PredicateType predicate_type, |
| ColumnSchema column, |
| const void* lower, |
| const void* upper) |
| : predicate_type_(predicate_type), |
| column_(move(column)), |
| lower_(lower), |
| upper_(upper) { |
| } |
| |
| ColumnPredicate::ColumnPredicate(PredicateType predicate_type, |
| ColumnSchema column, |
| vector<const void*>* values) |
| : predicate_type_(predicate_type), |
| column_(move(column)), |
| lower_(nullptr), |
| upper_(nullptr) { |
| values_.swap(*values); |
| } |
| |
| ColumnPredicate::ColumnPredicate(PredicateType predicate_type, |
| ColumnSchema column, |
| vector<BlockBloomFilter*> bfs, |
| const void* lower, |
| const void* upper) |
| : predicate_type_(predicate_type), |
| column_(move(column)), |
| lower_(lower), |
| upper_(upper), |
| bloom_filters_(move(bfs)) {} |
| |
| ColumnPredicate ColumnPredicate::Equality(ColumnSchema column, const void* value) { |
| CHECK(value != nullptr); |
| return ColumnPredicate(PredicateType::Equality, move(column), value, nullptr); |
| } |
| |
| ColumnPredicate ColumnPredicate::Range(ColumnSchema column, |
| const void* lower, |
| const void* upper) { |
| CHECK(lower != nullptr || upper != nullptr); |
| ColumnPredicate pred(PredicateType::Range, move(column), lower, upper); |
| pred.Simplify(); |
| return pred; |
| } |
| |
| ColumnPredicate ColumnPredicate::InList(ColumnSchema column, |
| vector<const void*>* values) { |
| CHECK(values != nullptr); |
| |
| // Sort values and remove duplicates. |
| std::sort(values->begin(), values->end(), |
| [&] (const void* a, const void* b) { |
| return column.type_info()->Compare(a, b) < 0; |
| }); |
| values->erase(std::unique(values->begin(), values->end(), |
| [&] (const void* a, const void* b) { |
| return column.type_info()->Compare(a, b) == 0; |
| }), |
| values->end()); |
| |
| ColumnPredicate pred(PredicateType::InList, move(column), values); |
| pred.Simplify(); |
| return pred; |
| } |
| |
| ColumnPredicate ColumnPredicate::InBloomFilter(ColumnSchema column, |
| std::vector<BlockBloomFilter*> bfs, |
| const void* lower, |
| const void* upper) { |
| CHECK(!bfs.empty()); |
| ColumnPredicate pred(PredicateType::InBloomFilter, move(column), move(bfs), lower, |
| upper); |
| pred.Simplify(); |
| return pred; |
| } |
| |
| std::optional<ColumnPredicate> ColumnPredicate::InclusiveRange(ColumnSchema column, |
| const void* lower, |
| const void* upper, |
| Arena* arena) { |
| CHECK(lower != nullptr || upper != nullptr); |
| |
| if (upper != nullptr) { |
| // Transform the upper bound to exclusive by incrementing it. |
| // Make a copy of the value before incrementing in case it's aliased. |
| size_t size = column.type_info()->size(); |
| void* buf = CHECK_NOTNULL(arena->AllocateBytes(size)); |
| memcpy(buf, upper, size); |
| if (!key_util::IncrementCell(column, buf, arena)) { |
| if (lower == nullptr) { |
| if (column.is_nullable()) { |
| // If incrementing the upper bound fails and the column is nullable, |
| // then return an IS NOT NULL predicate, so that null values will be |
| // filtered. |
| return ColumnPredicate::IsNotNull(move(column)); |
| } |
| return std::nullopt; |
| } |
| upper = nullptr; |
| } else { |
| upper = buf; |
| } |
| } |
| return ColumnPredicate::Range(move(column), lower, upper); |
| } |
| |
| ColumnPredicate ColumnPredicate::ExclusiveRange(ColumnSchema column, |
| const void* lower, |
| const void* upper, |
| Arena* arena) { |
| CHECK(lower != nullptr || upper != nullptr); |
| |
| if (lower != nullptr) { |
| // Transform the lower bound to inclusive by incrementing it. |
| // Make a copy of the value before incrementing in case it's aliased. |
| size_t size = column.type_info()->size(); |
| void* buf = CHECK_NOTNULL(arena->AllocateBytes(size)); |
| memcpy(buf, lower, size); |
| if (!key_util::IncrementCell(column, buf, arena)) { |
| // If incrementing the lower bound fails then the predicate can match no values. |
| return ColumnPredicate::None(move(column)); |
| } else { |
| lower = buf; |
| } |
| } |
| return ColumnPredicate::Range(move(column), lower, upper); |
| } |
| |
| ColumnPredicate ColumnPredicate::IsNotNull(ColumnSchema column) { |
| return ColumnPredicate(PredicateType::IsNotNull, move(column), nullptr, nullptr); |
| } |
| |
| ColumnPredicate ColumnPredicate::IsNull(ColumnSchema column) { |
| return column.is_nullable() ? |
| ColumnPredicate(PredicateType::IsNull, move(column), nullptr, nullptr) : |
| None(move(column)); |
| } |
| |
| ColumnPredicate ColumnPredicate::None(ColumnSchema column) { |
| return ColumnPredicate(PredicateType::None, move(column), nullptr, nullptr); |
| } |
| |
| void ColumnPredicate::SetToNone() { |
| predicate_type_ = PredicateType::None; |
| lower_ = nullptr; |
| upper_ = nullptr; |
| } |
| |
| // TODO(granthenke): For decimal columns, use column_.type_attributes().precision |
| // to calculate the "true" max/min values for improved simplification. |
| void ColumnPredicate::Simplify() { |
| auto type_info = column_.type_info(); |
| switch (predicate_type_) { |
| case PredicateType::None: |
| case PredicateType::Equality: |
| case PredicateType::IsNotNull: return; |
| case PredicateType::IsNull: return; |
| case PredicateType::Range: { |
| DCHECK(lower_ != nullptr || upper_ != nullptr); |
| if (lower_ != nullptr && upper_ != nullptr) { |
| // _ <= VALUE < _ |
| if (type_info->Compare(lower_, upper_) >= 0) { |
| // If the range bounds are empty then no results can be returned. |
| SetToNone(); |
| } else if (type_info->AreConsecutive(lower_, upper_)) { |
| // If the values are consecutive, then it is an equality bound. |
| predicate_type_ = PredicateType::Equality; |
| upper_ = nullptr; |
| } |
| } else if (lower_ != nullptr) { |
| // VALUE >= _ |
| if (type_info->IsMinValue(lower_)) { |
| predicate_type_ = PredicateType::IsNotNull; |
| lower_ = nullptr; |
| } else if (type_info->IsMaxValue(lower_)) { |
| predicate_type_ = PredicateType::Equality; |
| } |
| } else if (upper_ != nullptr) { |
| // VALUE < _ |
| if (type_info->IsMinValue(upper_)) { |
| SetToNone(); |
| } |
| } |
| return; |
| }; |
| case PredicateType::InList: { |
| if (values_.empty()) { |
| // If the list is empty, no results can be returned. |
| SetToNone(); |
| } else if (values_.size() == 1) { |
| // List has only one value, so convert to Equality |
| predicate_type_ = PredicateType::Equality; |
| lower_ = values_[0]; |
| values_.clear(); |
| } else if (type_info->type() == BOOL) { |
| // If this is a boolean IN list with both true and false in the list, |
| // then we can just convert it to IS NOT NULL. This same simplification |
| // could be done for other integer types, but it's probably not as |
| // common (and hard to test). |
| predicate_type_ = PredicateType::IsNotNull; |
| lower_ = nullptr; |
| upper_ = nullptr; |
| values_.clear(); |
| } |
| return; |
| }; |
| case PredicateType::InBloomFilter: { |
| if (lower_ == nullptr && upper_ == nullptr) { |
| return; |
| } |
| // Merge the optional lower and upper bound. |
| if (lower_ != nullptr && upper_ != nullptr) { |
| if (type_info->Compare(lower_, upper_) >= 0) { |
| // If the range bounds are empty then no results can be returned. |
| SetToNone(); |
| } else if (type_info->AreConsecutive(lower_, upper_)) { |
| if (CheckValueInBloomFilter(lower_)) { |
| predicate_type_ = PredicateType::Equality; |
| upper_ = nullptr; |
| bloom_filters_.clear(); |
| } else { |
| SetToNone(); |
| } |
| } |
| } else if (lower_ != nullptr) { |
| if (type_info->IsMinValue(lower_)) { |
| lower_ = nullptr; |
| } else if (type_info->IsMaxValue(lower_)) { |
| if (CheckValueInBloomFilter(lower_)) { |
| predicate_type_ = PredicateType::Equality; |
| bloom_filters_.clear(); |
| } else { |
| SetToNone(); |
| } |
| } |
| } else if (upper_ != nullptr) { |
| if (type_info->IsMinValue(upper_)) { |
| SetToNone(); |
| } |
| } |
| return; |
| }; |
| } |
| LOG(FATAL) << "unknown predicate type"; |
| } |
| |
| void ColumnPredicate::Merge(const ColumnPredicate& other) { |
| CHECK(column_.Equals(other.column_, ColumnSchema::COMPARE_NAME_AND_TYPE)); |
| switch (predicate_type_) { |
| case PredicateType::None: return; |
| case PredicateType::Range: { |
| MergeIntoRange(other); |
| return; |
| }; |
| case PredicateType::Equality: { |
| MergeIntoEquality(other); |
| return; |
| }; |
| case PredicateType::IsNotNull: { |
| MergeIntoIsNotNull(other); |
| return; |
| }; |
| case PredicateType::IsNull: { |
| MergeIntoIsNull(other); |
| return; |
| } |
| case PredicateType::InList: { |
| MergeIntoInList(other); |
| return; |
| }; |
| case PredicateType::InBloomFilter: { |
| MergeIntoBloomFilter(other); |
| return; |
| }; |
| } |
| LOG(FATAL) << "unknown predicate type"; |
| } |
| |
| void ColumnPredicate::MergeIntoRange(const ColumnPredicate& other) { |
| CHECK(predicate_type_ == PredicateType::Range); |
| |
| switch (other.predicate_type()) { |
| case PredicateType::None: |
| SetToNone(); |
| return; |
| case PredicateType::InBloomFilter: |
| bloom_filters_ = other.bloom_filters_; |
| predicate_type_ = PredicateType::InBloomFilter; |
| [[fallthrough]]; |
| case PredicateType::Range: |
| // Set the lower bound to the larger of the two. |
| if (other.lower_ != nullptr && |
| (lower_ == nullptr || column_.type_info()->Compare(lower_, other.lower_) < 0)) { |
| lower_ = other.lower_; |
| } |
| |
| // Set the upper bound to the smaller of the two. |
| if (other.upper_ != nullptr && |
| (upper_ == nullptr || column_.type_info()->Compare(upper_, other.upper_) > 0)) { |
| upper_ = other.upper_; |
| } |
| |
| Simplify(); |
| return; |
| case PredicateType::Equality: |
| if ((lower_ != nullptr && column_.type_info()->Compare(lower_, other.lower_) > 0) || |
| (upper_ != nullptr && column_.type_info()->Compare(upper_, other.lower_) <= 0)) { |
| // The equality value does not fall in this range. |
| SetToNone(); |
| } else { |
| predicate_type_ = PredicateType::Equality; |
| lower_ = other.lower_; |
| upper_ = nullptr; |
| } |
| return; |
| case PredicateType::IsNotNull: |
| return; |
| case PredicateType::IsNull: |
| SetToNone(); |
| return; |
| case PredicateType::InList: |
| // The InList predicate values are examined to check whether |
| // they lie in the range. |
| // The current predicate is then converted from a range predicate to |
| // an InList predicate (since it is more selective). |
| // The number of values in the InList will depend on how many |
| // values were within the range. |
| // The call to Simplify() will then convert the InList if appropriate: |
| // i.e an InList with zero entries gets converted to a NONE |
| // and InList with 1 entry gets converted into an Equality. |
| values_ = other.values_; |
| values_.erase(std::remove_if(values_.begin(), values_.end(), |
| [this] (const void* v) { |
| return !CheckValueInRange(v); |
| }), values_.end()); |
| predicate_type_ = PredicateType::InList; |
| Simplify(); |
| return; |
| } |
| LOG(FATAL) << "unknown predicate type"; |
| } |
| |
| void ColumnPredicate::MergeIntoEquality(const ColumnPredicate& other) { |
| CHECK(predicate_type_ == PredicateType::Equality); |
| |
| switch (other.predicate_type()) { |
| case PredicateType::None: { |
| SetToNone(); |
| return; |
| } |
| case PredicateType::Range: { |
| if (!other.CheckValueInRange(lower_)) { |
| // This equality value does not fall in the other range. |
| SetToNone(); |
| } |
| return; |
| }; |
| case PredicateType::Equality: { |
| if (column_.type_info()->Compare(lower_, other.lower_) != 0) { |
| SetToNone(); |
| } |
| return; |
| }; |
| case PredicateType::IsNotNull: return; |
| case PredicateType::IsNull: { |
| SetToNone(); |
| return; |
| } |
| case PredicateType::InList : { |
| // The equality value needs to be a member of the InList |
| if (!other.CheckValueInList(lower_)) { |
| SetToNone(); |
| } |
| return; |
| }; |
| case PredicateType::InBloomFilter: { |
| if (!other.CheckValueInBloomFilter(lower_)) { |
| SetToNone(); |
| } |
| return; |
| }; |
| } |
| LOG(FATAL) << "unknown predicate type"; |
| } |
| |
| void ColumnPredicate::MergeIntoIsNotNull(const ColumnPredicate &other) { |
| CHECK(predicate_type_ == PredicateType::IsNotNull); |
| switch (other.predicate_type()) { |
| // The intersection of NULL and IS NOT NULL is None. |
| case PredicateType::IsNull: { |
| SetToNone(); |
| return; |
| } |
| default: { |
| // Otherwise, the intersection is the other predicate. |
| predicate_type_ = other.predicate_type_; |
| lower_ = other.lower_; |
| upper_ = other.upper_; |
| values_ = other.values_; |
| bloom_filters_ = other.bloom_filters_; |
| return; |
| } |
| } |
| } |
| |
| void ColumnPredicate::MergeIntoIsNull(const ColumnPredicate &other) { |
| CHECK(predicate_type_ == PredicateType::IsNull); |
| switch (other.predicate_type()) { |
| // The intersection of IS NULL and IS NULL is IS NULL. |
| case PredicateType::IsNull: { |
| return; |
| } |
| default: { |
| // Otherwise, the intersection is None. |
| // NB: This will not be true if NULL is allowed in an InList predicate. |
| SetToNone(); |
| return; |
| } |
| } |
| } |
| |
| void ColumnPredicate::MergeIntoInList(const ColumnPredicate &other) { |
| CHECK(predicate_type_ == PredicateType::InList); |
| DCHECK(values_.size() > 1); |
| |
| switch (other.predicate_type()) { |
| case PredicateType::None: { |
| SetToNone(); |
| return; |
| }; |
| case PredicateType::Range: { |
| // Only values within the range should be retained. |
| auto search_by = [&] (const void* lhs, const void* rhs) { |
| return this->column_.type_info()->Compare(lhs, rhs) < 0; |
| }; |
| |
| // Remove all values greater than the range. |
| if (other.upper_ != nullptr) { |
| // lower_bound is used here instead of upper_bound, since the upper |
| // bound of the range is exclusive, and the in list is inclusive. |
| auto upper = std::lower_bound(values_.begin(), values_.end(), other.upper_, search_by); |
| values_.erase(upper, values_.end()); |
| } |
| |
| // Remove all values less than the range. |
| if (other.lower_ != nullptr) { |
| auto lower = std::lower_bound(values_.begin(), values_.end(), other.lower_, search_by); |
| values_.erase(values_.begin(), lower); |
| } |
| |
| Simplify(); |
| return; |
| } |
| case PredicateType::Equality: { |
| if (CheckValueInList(other.lower_)) { |
| // value falls in list so change to Equality predicate |
| predicate_type_ = PredicateType::Equality; |
| lower_ = other.lower_; |
| upper_ = nullptr; |
| } else { |
| SetToNone(); // Value does not fall in list |
| } |
| return; |
| } |
| case PredicateType::IsNotNull: return; |
| case PredicateType::IsNull: { |
| SetToNone(); |
| return; |
| } |
| case PredicateType::InList: { |
| // Merge the 'other' IN list into this IN list. The basic idea is to loop |
| // through this predicate list, retaining only the values which are also |
| // contained in the other predicate list. We apply an optimization first: |
| // all values from this in list which fall outside the range of the other |
| // IN list are removed, and the remaining values in this IN list are only |
| // checked against values in the other list which are in this list's |
| // range. This doesn't reduce the worst-case complexity, but can really |
| // speed up the merge for big lists in certain cases. This optimization |
| // relies on the lists being sorted. |
| DCHECK(other.values_.size() > 1); |
| |
| auto search_by = [&] (const void* lhs, const void* rhs) { |
| return this->column_.type_info()->Compare(lhs, rhs) < 0; |
| }; |
| |
| // Remove all values in this IN list which are greater than the largest |
| // value in the other list. |
| values_.erase(std::upper_bound(values_.begin(), values_.end(), |
| other.values_.back(), search_by), |
| values_.end()); |
| |
| // Remove all values in this IN list which are less than the smallest |
| // value in the other list. |
| values_.erase(values_.begin(), |
| std::lower_bound(values_.begin(), values_.end(), |
| other.values_.front(), search_by)); |
| |
| if (values_.empty()) { |
| SetToNone(); |
| return; |
| } |
| |
| // Find the sublist in the other IN list which overlaps with this IN list. |
| auto other_start = std::lower_bound(other.values_.begin(), other.values_.end(), |
| values_.front(), search_by); |
| auto other_end = std::upper_bound(other_start, other.values_.end(), |
| values_.back(), search_by); |
| |
| // Returns true if the value is *not* present in the other list. |
| // Modifies other_start to point at the position of v in the other list. |
| auto other_absent = [&] (const void* v) { |
| other_start = std::lower_bound(other_start, other_end, v, search_by); |
| return this->column_.type_info()->Compare(v, *other_start) != 0; |
| }; |
| |
| // Iterate through the values_ list and remove elements that do not exist |
| // in the other list. |
| values_.erase(std::remove_if(values_.begin(), values_.end(), other_absent), values_.end()); |
| Simplify(); |
| return; |
| }; |
| case PredicateType::InBloomFilter: { |
| std::vector<const void*> new_values; |
| std::copy_if(values_.begin(), values_.end(), std::back_inserter(new_values), |
| [&] (const void* value) { |
| return other.CheckValueInBloomFilter(value); |
| }); |
| values_.swap(new_values); |
| Simplify(); |
| return; |
| }; |
| } |
| LOG(FATAL) << "unknown predicate type"; |
| } |
| |
| void ColumnPredicate::MergeIntoBloomFilter(const ColumnPredicate &other) { |
| CHECK(predicate_type_ == PredicateType::InBloomFilter); |
| DCHECK(!bloom_filters_.empty()); |
| |
| switch (other.predicate_type()) { |
| case PredicateType::None: |
| SetToNone(); |
| return; |
| case PredicateType::InBloomFilter: |
| bloom_filters_.insert(bloom_filters_.end(), other.bloom_filters().begin(), |
| other.bloom_filters().end()); |
| [[fallthrough]]; |
| case PredicateType::Range: |
| // Merge the optional lower and upper bound. |
| if (other.lower_ != nullptr && |
| (lower_ == nullptr || column_.type_info()->Compare(lower_, other.lower_) < 0)) { |
| lower_ = other.lower_; |
| } |
| if (other.upper_ != nullptr && |
| (upper_ == nullptr || column_.type_info()->Compare(upper_, other.upper_) > 0)) { |
| upper_ = other.upper_; |
| } |
| Simplify(); |
| return; |
| case PredicateType::Equality: |
| if (CheckValueInBloomFilter(other.lower_)) { |
| // Value falls in bloom filters so change to Equality predicate. |
| predicate_type_ = PredicateType::Equality; |
| lower_ = other.lower_; |
| upper_ = nullptr; |
| bloom_filters_.clear(); |
| } else { |
| SetToNone(); // Value does not fall in bloom filters. |
| } |
| return; |
| case PredicateType::IsNotNull: |
| return; |
| case PredicateType::IsNull: |
| SetToNone(); |
| return; |
| case PredicateType::InList: |
| DCHECK(other.values_.size() > 1); |
| std::vector<const void*> new_values; |
| std::copy_if(other.values_.begin(), other.values_.end(), std::back_inserter(new_values), |
| [&] (const void* value) { |
| return CheckValueInBloomFilter(value); |
| }); |
| predicate_type_ = PredicateType::InList; |
| values_.swap(new_values); |
| bloom_filters_.clear(); |
| Simplify(); |
| return; |
| } |
| LOG(FATAL) << "unknown predicate type"; |
| } |
| |
| namespace { |
| |
| // Optimized predicate evaluation for primitive types. |
| // |
| // For primitives, it's safe to evaluate a predicate even if the cell is |
| // null or deselected -- it might be junk data, which means we'd get |
| // a junk comparison result, but that's OK, because we can just bitwise-AND |
| // the result against the null bitmap and the preexisting selection vector. |
| // |
| // This ends up removing most of the branches from the inner loop of comparisons |
| // and enables compilers to do SIMD optimizations. |
| // |
| // This technique can't safely be applied to cells like BINARY since these |
| // consist of pointers, and following a junk pointer might crash the process. |
| // |
| // Returns the number of elements of 'cb' that were processed. This function |
| // only processes multiples of 8, so if cb.nrows() is not a multiple of 8, the |
| // last few elements may need to be processed by the caller. |
| template <DataType PhysicalType, typename P> |
| ATTRIBUTE_NOINLINE |
| int ApplyPredicatePrimitive(const ColumnBlock& block, uint8_t* __restrict__ sel_bitmap, P p) { |
| using cpp_type = typename DataTypeTraits<PhysicalType>::cpp_type; |
| const cpp_type* data = reinterpret_cast<const cpp_type*>(block.data()); |
| const int n_chunks = block.nrows() / 8; |
| for (int i = 0; i < n_chunks; i++) { |
| uint8_t res_8 = 0; |
| for (int j = 0; j < 8; j++) { |
| res_8 |= p(data++) << j; |
| } |
| sel_bitmap[i] &= res_8; |
| } |
| if (block.is_nullable()) { |
| for (int i = 0; i < n_chunks; i++) { |
| sel_bitmap[i] &= block.non_null_bitmap()[i]; |
| } |
| } |
| return n_chunks * 8; |
| } |
| |
| |
| template <DataType PhysicalType, typename P> |
| void ApplyPredicate(const ColumnBlock& block, SelectionVector* sel, P p) { |
| using cpp_type = typename DataTypeTraits<PhysicalType>::cpp_type; |
| int start_idx = 0; |
| if (std::is_fundamental<cpp_type>::value) { |
| start_idx = ApplyPredicatePrimitive<PhysicalType>(block, sel->mutable_bitmap(), p); |
| if (PREDICT_TRUE(start_idx == block.nrows())) return; |
| // If we couldn't process the whole block unrolled by 8, fall through to the |
| // remainder. |
| } |
| |
| const cpp_type* data = reinterpret_cast<const cpp_type*>(block.data()); |
| if (block.is_nullable()) { |
| for (size_t i = start_idx; i < block.nrows(); i++) { |
| if (!sel->IsRowSelected(i)) continue; |
| const cpp_type* cell = block.is_null(i) ? nullptr : &data[i]; |
| if (cell == nullptr || !p(cell)) { |
| BitmapClear(sel->mutable_bitmap(), i); |
| } |
| } |
| } else { |
| for (size_t i = start_idx; i < block.nrows(); i++) { |
| if (!sel->IsRowSelected(i)) continue; |
| const cpp_type* cell = &data[i]; |
| if (!p(cell)) { |
| BitmapClear(sel->mutable_bitmap(), i); |
| } |
| } |
| } |
| } |
| |
| template<bool IS_NOT_NULL> |
| void ApplyNullPredicate(const ColumnBlock& block, uint8_t* __restrict__ sel_vec) { |
| int n_bytes = KUDU_ALIGN_UP(block.nrows(), 8) / 8; |
| for (int i = 0; i < n_bytes; i++) { |
| uint8_t non_null_byte = block.non_null_bitmap()[i]; |
| if (!IS_NOT_NULL) non_null_byte = ~non_null_byte; |
| sel_vec[i] &= non_null_byte; |
| } |
| } |
| } // anonymous namespace |
| |
| template <DataType PhysicalType> |
| void ColumnPredicate::EvaluateForPhysicalType(const ColumnBlock& block, |
| SelectionVector* sel) const { |
| using traits = DataTypeTraits<PhysicalType>; |
| using cpp_type = typename traits::cpp_type; |
| |
| switch (predicate_type()) { |
| case PredicateType::Range: { |
| cpp_type local_lower = lower_ ? *static_cast<const cpp_type*>(lower_) : cpp_type(); |
| cpp_type local_upper = upper_ ? *static_cast<const cpp_type*>(upper_) : cpp_type(); |
| |
| if (lower_ == nullptr) { |
| ApplyPredicate<PhysicalType>(block, sel, [local_upper] (const void* cell) { |
| return traits::Compare(cell, &local_upper) < 0; |
| }); |
| } else if (upper_ == nullptr) { |
| ApplyPredicate<PhysicalType>(block, sel, [local_lower] (const void* cell) { |
| return traits::Compare(cell, &local_lower) >= 0; |
| }); |
| } else { |
| ApplyPredicate<PhysicalType>(block, sel, [local_lower, local_upper] (const void* cell) { |
| return traits::Compare(cell, &local_upper) < 0 && |
| traits::Compare(cell, &local_lower) >= 0; |
| }); |
| } |
| return; |
| }; |
| case PredicateType::Equality: { |
| cpp_type local_lower = lower_ ? *static_cast<const cpp_type*>(lower_) : cpp_type(); |
| ApplyPredicate<PhysicalType>(block, sel, [local_lower] (const void* cell) { |
| return traits::Compare(cell, &local_lower) == 0; |
| }); |
| return; |
| }; |
| case PredicateType::IsNotNull: { |
| if (!block.is_nullable()) return; |
| ApplyNullPredicate<true>(block, sel->mutable_bitmap()); |
| return; |
| }; |
| case PredicateType::IsNull: { |
| if (!block.is_nullable()) { |
| BitmapChangeBits(sel->mutable_bitmap(), 0, block.nrows(), false); |
| return; |
| } |
| ApplyNullPredicate<false>(block, sel->mutable_bitmap()); |
| return; |
| } |
| case PredicateType::InList: { |
| ApplyPredicate<PhysicalType>(block, sel, [this] (const void* cell) { |
| return std::binary_search(values_.begin(), values_.end(), cell, |
| [] (const void* lhs, const void* rhs) { |
| return traits::Compare(lhs, rhs) < 0; |
| }); |
| }); |
| return; |
| }; |
| case PredicateType::None: LOG(FATAL) << "NONE predicate evaluation"; |
| case PredicateType::InBloomFilter: { |
| ApplyPredicate<PhysicalType>(block, sel, [this] (const void* cell) { |
| return EvaluateCell<PhysicalType>(cell); |
| }); |
| return; |
| }; |
| } |
| LOG(FATAL) << "unknown predicate type"; |
| } |
| |
| bool ColumnPredicate::EvaluateCell(DataType type, const void* cell) const { |
| switch (type) { |
| case BOOL: return EvaluateCell<BOOL>(cell); |
| case INT8: return EvaluateCell<INT8>(cell); |
| case INT16: return EvaluateCell<INT16>(cell); |
| case INT32: return EvaluateCell<INT32>(cell); |
| case INT64: return EvaluateCell<INT64>(cell); |
| case INT128: return EvaluateCell<INT128>(cell); |
| case UINT8: return EvaluateCell<UINT8>(cell); |
| case UINT16: return EvaluateCell<UINT16>(cell); |
| case UINT32: return EvaluateCell<UINT32>(cell); |
| case UINT64: return EvaluateCell<UINT64>(cell); |
| case FLOAT: return EvaluateCell<FLOAT>(cell); |
| case DOUBLE: return EvaluateCell<DOUBLE>(cell); |
| case BINARY: return EvaluateCell<BINARY>(cell); |
| default: LOG(FATAL) << "unknown physical type: " << GetTypeInfo(type)->name(); |
| } |
| } |
| |
| void ColumnPredicate::Evaluate(const ColumnBlock& block, SelectionVector* sel) const { |
| DCHECK(sel); |
| switch (block.type_info()->physical_type()) { |
| case BOOL: return EvaluateForPhysicalType<BOOL>(block, sel); |
| case INT8: return EvaluateForPhysicalType<INT8>(block, sel); |
| case INT16: return EvaluateForPhysicalType<INT16>(block, sel); |
| case INT32: return EvaluateForPhysicalType<INT32>(block, sel); |
| case INT64: return EvaluateForPhysicalType<INT64>(block, sel); |
| case INT128: return EvaluateForPhysicalType<INT128>(block, sel); |
| case UINT8: return EvaluateForPhysicalType<UINT8>(block, sel); |
| case UINT16: return EvaluateForPhysicalType<UINT16>(block, sel); |
| case UINT32: return EvaluateForPhysicalType<UINT32>(block, sel); |
| case UINT64: return EvaluateForPhysicalType<UINT64>(block, sel); |
| case FLOAT: return EvaluateForPhysicalType<FLOAT>(block, sel); |
| case DOUBLE: return EvaluateForPhysicalType<DOUBLE>(block, sel); |
| case BINARY: return EvaluateForPhysicalType<BINARY>(block, sel); |
| default: LOG(FATAL) << "unknown physical type: " << block.type_info()->name(); |
| } |
| } |
| |
| string ColumnPredicate::ToString() const { |
| switch (predicate_type()) { |
| case PredicateType::None: return strings::Substitute("$0 NONE", column_.name()); |
| case PredicateType::Range: { |
| if (lower_ == nullptr) { |
| return strings::Substitute("$0 < $1", column_.name(), column_.Stringify(upper_)); |
| } |
| if (upper_ == nullptr) { |
| return strings::Substitute("$0 >= $1", column_.name(), column_.Stringify(lower_)); |
| } |
| return strings::Substitute("$0 >= $1 AND $0 < $2", |
| column_.name(), |
| column_.Stringify(lower_), |
| column_.Stringify(upper_)); |
| |
| }; |
| case PredicateType::Equality: { |
| return strings::Substitute("$0 = $1", column_.name(), column_.Stringify(lower_)); |
| }; |
| case PredicateType::IsNotNull: { |
| return strings::Substitute("$0 IS NOT NULL", column_.name()); |
| }; |
| case PredicateType::IsNull: { |
| return strings::Substitute("$0 IS NULL", column_.name()); |
| }; |
| case PredicateType::InList: { |
| string ss; |
| ss.append(column_.name()); |
| ss.append(" IN ("); |
| ss.append(KUDU_REDACT(JoinMapped(values_, |
| [&] (const void* value) { return column_.Stringify(value); }, |
| ", "))); |
| ss.append(")"); |
| return ss; |
| }; |
| case PredicateType::InBloomFilter: { |
| return strings::Substitute("`$0` IS InBloomFilter", column_.name()); |
| }; |
| default: |
| LOG(FATAL) << "unknown predicate type"; |
| } |
| } |
| |
| bool ColumnPredicate::operator==(const ColumnPredicate& other) const { |
| if (!column_.Equals(other.column_, ColumnSchema::COMPARE_NAME_AND_TYPE)) { return false; } |
| if (predicate_type_ != other.predicate_type_) { |
| return false; |
| } |
| switch (predicate_type_) { |
| case PredicateType::Equality: |
| return column_.type_info()->Compare(lower_, other.lower_) == 0; |
| case PredicateType::InBloomFilter: |
| if (bloom_filters_.size() != other.bloom_filters().size()) { |
| return false; |
| } |
| // Compare the actual BlockBloomFilters pointed by the vectors. |
| if (!std::equal(bloom_filters_.begin(), bloom_filters_.end(), |
| other.bloom_filters().begin(), |
| [] (const BlockBloomFilter* lhs, const BlockBloomFilter* rhs) { |
| return *lhs == *rhs; |
| })) { |
| return false; |
| } |
| [[fallthrough]]; |
| case PredicateType::Range: { |
| auto bound_equal = [&] (const void* eleml, const void* elemr) { |
| return (eleml == elemr || (eleml != nullptr && elemr != nullptr && |
| column_.type_info()->Compare(eleml, elemr) == 0)); |
| }; |
| return bound_equal(lower_, other.lower_) && bound_equal(upper_, other.upper_); |
| } |
| case PredicateType::InList: |
| if (values_.size() != other.values_.size()) { |
| return false; |
| } |
| for (int i = 0; i < values_.size(); i++) { |
| if (column_.type_info()->Compare(values_[i], other.values_[i]) != 0) return false; |
| } |
| return true; |
| case PredicateType::None: |
| case PredicateType::IsNotNull: |
| case PredicateType::IsNull: |
| return true; |
| } |
| LOG(FATAL) << "unknown predicate type"; |
| } |
| |
| bool ColumnPredicate::CheckValueInRange(const void* value) const { |
| CHECK(predicate_type_ == PredicateType::Range); |
| return ((lower_ == nullptr || column_.type_info()->Compare(lower_, value) <= 0) && |
| (upper_ == nullptr || column_.type_info()->Compare(upper_, value) > 0)); |
| } |
| |
| bool ColumnPredicate::CheckValueInList(const void* value) const { |
| return std::binary_search(values_.begin(), values_.end(), value, |
| [this](const void* lhs, const void* rhs) { |
| return this->column_.type_info()->Compare(lhs, rhs) < 0; |
| }); |
| } |
| |
| bool ColumnPredicate::CheckValueInBloomFilter(const void* value) const { |
| return EvaluateCell(column_.type_info()->physical_type(), value); |
| } |
| |
| namespace { |
| int SelectivityRank(const ColumnPredicate& predicate) { |
| int rank; |
| switch (predicate.predicate_type()) { |
| case PredicateType::None: rank = 0; break; |
| case PredicateType::IsNull: rank = 1; break; |
| case PredicateType::Equality: rank = 2; break; |
| case PredicateType::InList: rank = 3; break; |
| case PredicateType::Range: rank = 4; break; |
| case PredicateType::InBloomFilter: rank = 5; break; |
| case PredicateType::IsNotNull: rank = 6; break; |
| default: LOG(FATAL) << "unknown predicate type"; |
| } |
| return rank * (kLargestTypeSize + 1) + predicate.column().type_info()->size(); |
| } |
| } // anonymous namespace |
| |
| int SelectivityComparator(const ColumnPredicate& left, const ColumnPredicate& right) { |
| return SelectivityRank(left) - SelectivityRank(right); |
| } |
| |
| } // namespace kudu |