blob: 3a918d7605d874ab563da50ff7257327672ff2cc [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <glog/logging.h>
#include <stdint.h>
#include <boost/iterator/iterator_facade.hpp>
#include <functional>
#include <memory>
#include <ostream>
#include <string>
#include <utility>
#include "common/status.h"
#include "olap/column_predicate.h"
#include "olap/rowset/segment_v2/bloom_filter.h"
#include "vec/columns/column.h"
#include "vec/columns/column_dictionary.h"
#include "vec/columns/column_nullable.h"
#include "vec/common/string_ref.h"
#include "vec/core/types.h"
#include "vec/functions/like.h"
namespace roaring {
class Roaring;
} // namespace roaring
namespace doris {
class FunctionContext;
namespace segment_v2 {
class BitmapIndexIterator;
} // namespace segment_v2
template <PrimitiveType T>
class LikeColumnPredicate : public ColumnPredicate {
public:
LikeColumnPredicate(bool opposite, uint32_t column_id, doris::FunctionContext* fn_ctx,
doris::StringRef val);
~LikeColumnPredicate() override = default;
PredicateType type() const override { return PredicateType::EQ; }
void evaluate_vec(const vectorized::IColumn& column, uint16_t size, bool* flags) const override;
Status evaluate(BitmapIndexIterator* iterator, uint32_t num_rows,
roaring::Roaring* roaring) const override {
return Status::OK();
}
bool can_do_apply_safely(PrimitiveType input_type, bool is_null) const override {
return input_type == T || (is_string_type(input_type) && is_string_type(T));
}
uint16_t evaluate(const vectorized::IColumn& column, uint16_t* sel,
uint16_t size) const override;
void evaluate_and_vec(const vectorized::IColumn& column, uint16_t size,
bool* flags) const override;
std::string get_search_str() const override {
return std::string(reinterpret_cast<const char*>(pattern.data), pattern.size);
}
bool is_opposite() const { return _opposite; }
void set_page_ng_bf(std::unique_ptr<segment_v2::BloomFilter> src) override {
_page_ng_bf = std::move(src);
}
bool evaluate_and(const BloomFilter* bf) const override {
// like predicate can not use normal bf, just return true to accept
if (!bf->is_ngram_bf()) return true;
if (_page_ng_bf) {
return bf->contains(*_page_ng_bf);
}
return true;
}
bool can_do_bloom_filter(bool ngram) const override { return ngram; }
private:
template <bool is_and>
void _evaluate_vec(const vectorized::IColumn& column, uint16_t size, bool* flags) const {
if (column.is_nullable()) {
auto* nullable_col =
vectorized::check_and_get_column<vectorized::ColumnNullable>(column);
auto& null_map_data = nullable_col->get_null_map_column().get_data();
auto& nested_col = nullable_col->get_nested_column();
if (nested_col.is_column_dictionary()) {
auto* nested_col_ptr = vectorized::check_and_get_column<
vectorized::ColumnDictionary<vectorized::Int32>>(nested_col);
auto& data_array = nested_col_ptr->get_data();
for (uint16_t i = 0; i < size; i++) {
if (null_map_data[i]) {
if constexpr (is_and) {
flags[i] &= _opposite;
} else {
flags[i] = _opposite;
}
continue;
}
StringRef cell_value = nested_col_ptr->get_shrink_value(data_array[i]);
if constexpr (is_and) {
unsigned char flag = 0;
static_cast<void>((_state->scalar_function)(
const_cast<vectorized::LikeSearchState*>(&_like_state),
StringRef(cell_value.data, cell_value.size), pattern, &flag));
flags[i] &= _opposite ^ flag;
} else {
unsigned char flag = 0;
static_cast<void>((_state->scalar_function)(
const_cast<vectorized::LikeSearchState*>(&_like_state),
StringRef(cell_value.data, cell_value.size), pattern, &flag));
flags[i] = _opposite ^ flag;
}
}
} else {
LOG(FATAL) << "vectorized (not) like predicates should be dict column";
}
} else {
if (column.is_column_dictionary()) {
auto* nested_col_ptr = vectorized::check_and_get_column<
vectorized::ColumnDictionary<vectorized::Int32>>(column);
auto& data_array = nested_col_ptr->get_data();
for (uint16_t i = 0; i < size; i++) {
StringRef cell_value = nested_col_ptr->get_shrink_value(data_array[i]);
if constexpr (is_and) {
unsigned char flag = 0;
static_cast<void>((_state->scalar_function)(
const_cast<vectorized::LikeSearchState*>(&_like_state),
StringRef(cell_value.data, cell_value.size), pattern, &flag));
flags[i] &= _opposite ^ flag;
} else {
unsigned char flag = 0;
static_cast<void>((_state->scalar_function)(
const_cast<vectorized::LikeSearchState*>(&_like_state),
StringRef(cell_value.data, cell_value.size), pattern, &flag));
flags[i] = _opposite ^ flag;
}
}
} else {
LOG(FATAL) << "vectorized (not) like predicates should be dict column";
}
}
}
std::string _debug_string() const override {
std::string info = "LikeColumnPredicate";
return info;
}
std::string _origin;
// lifetime controlled by scan node
using StateType = vectorized::LikeState;
StringRef pattern;
StateType* _state = nullptr;
// A separate scratch region is required for every concurrent caller of the
// Hyperscan API. So here _like_state is separate for each instance of
// LikeColumnPredicate.
vectorized::LikeSearchState _like_state;
std::unique_ptr<segment_v2::BloomFilter> _page_ng_bf; // for ngram-bf index
};
} // namespace doris