| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| #include "vec/functions/function_multi_match.h" |
| |
| #include <glog/logging.h> |
| |
| #include <memory> |
| #include <roaring/roaring.hh> |
| #include <string> |
| #include <vector> |
| |
| #include "io/fs/file_reader.h" |
| #include "olap/rowset/segment_v2/inverted_index/query/phrase_prefix_query.h" |
| #include "olap/rowset/segment_v2/segment_iterator.h" |
| #include "vec/columns/column.h" |
| #include "vec/exprs/vslot_ref.h" |
| #include "vec/functions/simple_function_factory.h" |
| |
| namespace doris::vectorized { |
| |
| Status FunctionMultiMatch::execute_impl(FunctionContext* /*context*/, Block& block, |
| const ColumnNumbers& arguments, uint32_t result, |
| size_t /*input_rows_count*/) const { |
| return Status::RuntimeError("only inverted index queries are supported"); |
| } |
| |
| InvertedIndexQueryType get_query_type(const std::string& query_type) { |
| if (query_type == "any") { |
| return InvertedIndexQueryType::MATCH_ANY_QUERY; |
| } else if (query_type == "all") { |
| return InvertedIndexQueryType::MATCH_ALL_QUERY; |
| } else if (query_type == "phrase") { |
| return InvertedIndexQueryType::MATCH_PHRASE_QUERY; |
| } else if (query_type == "phrase_prefix") { |
| return InvertedIndexQueryType::MATCH_PHRASE_PREFIX_QUERY; |
| } else { |
| return InvertedIndexQueryType::UNKNOWN_QUERY; |
| } |
| } |
| |
| Status FunctionMultiMatch::evaluate_inverted_index( |
| const ColumnsWithTypeAndName& arguments, |
| const std::vector<vectorized::IndexFieldNameAndTypePair>& data_type_with_names, |
| std::vector<segment_v2::IndexIterator*> iterators, uint32_t num_rows, |
| const InvertedIndexAnalyzerCtx* analyzer_ctx, |
| segment_v2::InvertedIndexResultBitmap& bitmap_result) const { |
| DCHECK(arguments.size() == 2); |
| std::shared_ptr<roaring::Roaring> roaring = std::make_shared<roaring::Roaring>(); |
| std::shared_ptr<roaring::Roaring> null_bitmap = std::make_shared<roaring::Roaring>(); |
| |
| // type |
| auto query_type_value = arguments[0].column->get_data_at(0); |
| auto query_type = get_query_type(query_type_value.to_string()); |
| if (query_type == InvertedIndexQueryType::UNKNOWN_QUERY) { |
| return Status::RuntimeError( |
| "parameter query type incorrect for function multi_match: query_type = {}", |
| query_type); |
| } |
| |
| // query |
| auto query_str = arguments[1].column->get_data_at(0); |
| auto param_type = arguments[1].type->get_primitive_type(); |
| if (!is_string_type(param_type)) { |
| return Status::Error<ErrorCode::INDEX_INVALID_PARAMETERS>( |
| "arguments for multi_match must be string"); |
| } |
| |
| // search |
| InvertedIndexParam param; |
| param.query_value = &query_str; |
| param.query_type = query_type; |
| param.num_rows = num_rows; |
| for (size_t i = 0; i < data_type_with_names.size(); i++) { |
| auto column_name = data_type_with_names[i].first; |
| auto* iter = iterators[i]; |
| if (iter == nullptr) { |
| std::string error_msg = "Inverted index iterator is null for column '" + column_name + |
| "' during multi_match execution"; |
| return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>(error_msg); |
| } |
| |
| param.column_name = column_name; |
| param.roaring = std::make_shared<roaring::Roaring>(); |
| param.analyzer_ctx = analyzer_ctx; |
| RETURN_IF_ERROR(iter->read_from_index(segment_v2::IndexParam {¶m})); |
| *roaring |= *param.roaring; |
| } |
| segment_v2::InvertedIndexResultBitmap result(roaring, null_bitmap); |
| bitmap_result = result; |
| |
| return Status::OK(); |
| } |
| |
| void register_function_multi_match(SimpleFunctionFactory& factory) { |
| factory.register_function<FunctionMultiMatch>(); |
| } |
| |
| } // namespace doris::vectorized |