blob: fa57f1ef5af99af352ff450a6b657b712bad1ca6 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "olap/match_predicate.h"
#include <roaring/roaring.hh>
#include "exec/olap_utils.h"
#include "olap/field.h"
#include "olap/inverted_index_parser.h"
#include "olap/olap_common.h"
#include "olap/rowset/segment_v2/inverted_index_cache.h"
#include "olap/rowset/segment_v2/inverted_index_reader.h"
#include "olap/schema.h"
#include "olap/tablet_schema.h"
#include "olap/types.h"
#include "olap/utils.h"
#include "runtime/define_primitive_type.h"
#include "runtime/types.h"
#include "vec/common/assert_cast.h"
#include "vec/common/string_ref.h"
#include "vec/data_types/data_type.h"
#include "vec/data_types/data_type_array.h"
namespace doris {
#include "common/compile_check_begin.h"
MatchPredicate::MatchPredicate(uint32_t column_id, const std::string& value, MatchType match_type)
: ColumnPredicate(column_id), _value(value), _match_type(match_type) {}
PredicateType MatchPredicate::type() const {
return PredicateType::MATCH;
}
Status MatchPredicate::evaluate(const vectorized::IndexFieldNameAndTypePair& name_with_type,
IndexIterator* iterator, uint32_t num_rows,
roaring::Roaring* bitmap) const {
if (iterator == nullptr) {
return Status::OK();
}
if (_check_evaluate(iterator)) {
return Status::Error<ErrorCode::INDEX_INVALID_PARAMETERS>(
"phrase queries require setting support_phrase = true");
}
auto type = name_with_type.second;
const std::string& name = name_with_type.first;
auto inverted_index_query_type = _to_inverted_index_query_type(_match_type);
InvertedIndexParam param;
param.column_name = name;
param.query_type = inverted_index_query_type;
param.num_rows = num_rows;
param.roaring = std::make_shared<roaring::Roaring>();
auto primitive_type = type->get_primitive_type();
if (is_string_type(primitive_type) ||
(primitive_type == TYPE_ARRAY &&
is_string_type(assert_cast<const vectorized::DataTypeArray*>(
vectorized::remove_nullable(type).get())
->get_nested_type()
->get_primitive_type()))) {
StringRef match_value;
auto length = _value.length();
char* buffer = const_cast<char*>(_value.c_str());
match_value.replace(buffer, int32_t(length)); //is it safe?
param.query_value = &match_value;
RETURN_IF_ERROR(iterator->read_from_index(&param));
} else if (primitive_type == TYPE_ARRAY &&
is_numeric_type(TabletColumn::get_field_type_by_type(
assert_cast<const vectorized::DataTypeArray*>(
vectorized::remove_nullable(type).get())
->get_nested_type()
->get_primitive_type()))) {
std::vector<char> buf(assert_cast<const vectorized::DataTypeArray*>(
vectorized::remove_nullable(type).get())
->get_nested_type()
->get_size_of_value_in_memory());
const TypeInfo* type_info = get_scalar_type_info(TabletColumn::get_field_type_by_type(
assert_cast<const vectorized::DataTypeArray*>(
vectorized::remove_nullable(type).get())
->get_nested_type()
->get_primitive_type()));
RETURN_IF_ERROR(type_info->from_string(buf.data(), _value));
param.query_value = buf.data();
param.skip_try = true;
RETURN_IF_ERROR(iterator->read_from_index(&param));
}
// mask out null_bitmap, since NULL cmp VALUE will produce NULL
// and be treated as false in WHERE
// keep it after query, since query will try to read null_bitmap and put it to cache
if (iterator->has_null()) {
InvertedIndexQueryCacheHandle null_bitmap_cache_handle;
RETURN_IF_ERROR(iterator->read_null_bitmap(&null_bitmap_cache_handle));
std::shared_ptr<roaring::Roaring> null_bitmap = null_bitmap_cache_handle.get_bitmap();
if (null_bitmap) {
*bitmap -= *null_bitmap;
}
}
*bitmap &= *param.roaring;
return Status::OK();
}
InvertedIndexQueryType MatchPredicate::_to_inverted_index_query_type(MatchType match_type) const {
auto ret = InvertedIndexQueryType::UNKNOWN_QUERY;
switch (match_type) {
case MatchType::MATCH_ANY:
ret = InvertedIndexQueryType::MATCH_ANY_QUERY;
break;
case MatchType::MATCH_ALL:
ret = InvertedIndexQueryType::MATCH_ALL_QUERY;
break;
case MatchType::MATCH_PHRASE:
ret = InvertedIndexQueryType::MATCH_PHRASE_QUERY;
break;
case MatchType::MATCH_PHRASE_PREFIX:
ret = InvertedIndexQueryType::MATCH_PHRASE_PREFIX_QUERY;
break;
case MatchType::MATCH_REGEXP:
ret = InvertedIndexQueryType::MATCH_REGEXP_QUERY;
break;
case MatchType::MATCH_PHRASE_EDGE:
ret = InvertedIndexQueryType::MATCH_PHRASE_EDGE_QUERY;
break;
default:
DCHECK(false);
}
return ret;
}
bool MatchPredicate::_check_evaluate(IndexIterator* iterator) const {
if (_match_type == MatchType::MATCH_PHRASE || _match_type == MatchType::MATCH_PHRASE_PREFIX ||
_match_type == MatchType::MATCH_PHRASE_EDGE) {
if (iterator->get_reader()->is_fulltext_index() &&
!iterator->get_reader()->is_support_phrase()) {
return true;
}
}
return false;
}
} // namespace doris
#include "common/compile_check_end.h"