blob: a7b154a203aae3fa4c86cbdd96fa531ab232e8b2 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "vec/exprs/virtual_slot_ref.h"
#include <gen_cpp/Exprs_types.h>
#include <glog/logging.h>
#include <thrift/protocol/TDebugProtocol.h>
#include <ostream>
#include <vector>
#include "common/exception.h"
#include "common/logging.h"
#include "common/status.h"
#include "runtime/descriptors.h"
#include "runtime/runtime_state.h"
#include "vec/columns/column.h"
#include "vec/columns/column_nothing.h"
#include "vec/core/block.h"
#include "vec/core/column_with_type_and_name.h"
#include "vec/exprs/vectorized_fn_call.h"
#include "vec/exprs/vexpr_context.h"
#include "vec/exprs/vexpr_fwd.h"
namespace doris::vectorized {
#include "common/compile_check_begin.h"
VirtualSlotRef::VirtualSlotRef(const doris::TExprNode& node)
: VExpr(node),
_column_id(-1),
_slot_id(node.slot_ref.slot_id),
_column_name(nullptr),
_column_label(node.label) {}
VirtualSlotRef::VirtualSlotRef(const SlotDescriptor* desc)
: VExpr(desc->type(), false), _column_id(-1), _slot_id(desc->id()), _column_name(nullptr) {}
Status VirtualSlotRef::prepare(doris::RuntimeState* state, const doris::RowDescriptor& desc,
VExprContext* context) {
RETURN_IF_ERROR_OR_PREPARED(VExpr::prepare(state, desc, context));
DCHECK_EQ(_children.size(), 0);
if (_slot_id == -1) {
_prepare_finished = true;
return Status::OK();
}
const SlotDescriptor* slot_desc = state->desc_tbl().get_slot_descriptor(_slot_id);
if (slot_desc == nullptr) {
return Status::Error<ErrorCode::INTERNAL_ERROR>(
"couldn't resolve slot descriptor {}, desc: {}", _slot_id,
state->desc_tbl().debug_string());
}
if (slot_desc->get_virtual_column_expr() == nullptr) {
return Status::InternalError(
"VirtualSlotRef {} has no virtual column expr, slot_id: {}, desc: {}, "
"slot_desc: {}, desc_tbl: {}",
*_column_name, _slot_id, desc.debug_string(), slot_desc->debug_string(),
state->desc_tbl().debug_string());
}
_column_name = &slot_desc->col_name();
_column_data_type = slot_desc->get_data_type_ptr();
DCHECK(_column_data_type != nullptr);
_column_id = desc.get_column_id(_slot_id);
if (_column_id < 0) {
return Status::Error<ErrorCode::INTERNAL_ERROR>(
"VirtualSlotRef {} has invalid slot id: "
"{}.\nslot_desc:\n{},\ndesc:\n{},\ndesc_tbl:\n{}",
*_column_name, _slot_id, slot_desc->debug_string(), desc.debug_string(),
state->desc_tbl().debug_string());
}
const TExpr& expr = *slot_desc->get_virtual_column_expr();
// Create a temp_ctx only for create_expr_tree.
VExprContextSPtr temp_ctx;
RETURN_IF_ERROR(VExpr::create_expr_tree(expr, temp_ctx));
_virtual_column_expr = temp_ctx->root();
// Virtual column expr should do prepare with original context.
RETURN_IF_ERROR(_virtual_column_expr->prepare(state, desc, context));
_prepare_finished = true;
return Status::OK();
}
Status VirtualSlotRef::open(RuntimeState* state, VExprContext* context,
FunctionContext::FunctionStateScope scope) {
DCHECK(_prepare_finished);
RETURN_IF_ERROR(_virtual_column_expr->open(state, context, scope));
RETURN_IF_ERROR(VExpr::open(state, context, scope));
_open_finished = true;
return Status::OK();
}
Status VirtualSlotRef::execute_column(VExprContext* context, const Block* block, size_t count,
ColumnPtr& result_column) const {
if (_column_id >= 0 && _column_id >= block->columns()) {
return Status::Error<ErrorCode::INTERNAL_ERROR>(
"input block not contain slot column {}, column_id={}, block={}", *_column_name,
_column_id, block->dump_structure());
}
ColumnWithTypeAndName col_type_name = block->get_by_position(_column_id);
result_column = col_type_name.column;
if (!col_type_name.column) {
// Maybe we need to create a column in this situation.
return Status::InternalError(
"VirtualSlotRef column is null, column_id: {}, column_name: {}", _column_id,
*_column_name);
}
const auto* col_nothing = check_and_get_column<ColumnNothing>(col_type_name.column.get());
if (this->_virtual_column_expr != nullptr) {
if (col_nothing != nullptr) {
// Virtual column is not materialized, so we need to materialize it.
// Note: After executing 'execute', we cannot use the column from line 120 in subsequent code,
// because the vector might be resized during execution, causing previous references to become invalid.
ColumnPtr tmp_column;
RETURN_IF_ERROR(
_virtual_column_expr->execute_column(context, block, count, tmp_column));
result_column = std::move(tmp_column);
VLOG_DEBUG << fmt::format(
"Materialization of virtual column, slot_id {}, column_id {}, "
"column_name {}, column size {}",
_slot_id, _column_id, *_column_name,
block->get_by_position(_column_id).column->size());
}
#ifndef NDEBUG
// get_by_position again since vector in block may be resized
col_type_name = block->get_by_position(_column_id);
DCHECK(col_type_name.type != nullptr);
if (!_column_data_type->equals(*col_type_name.type)) {
throw doris::Exception(doris::ErrorCode::FATAL_ERROR,
"Virtual column type not match, column_id: {}, "
"column_name: {}, column_type: {}, virtual_column_type: {}",
_column_id, *_column_name, col_type_name.type->get_name(),
_column_data_type->get_name());
}
#endif
} else {
// This is a virtual slot ref that not pushed to segment_iterator
if (col_nothing == nullptr) {
return Status::InternalError("Logical error, virtual column can not be materialized");
} else {
return Status::OK();
}
}
DCHECK_EQ(result_column->size(), count);
return Status::OK();
}
const std::string& VirtualSlotRef::expr_name() const {
return *_column_name;
}
std::string VirtualSlotRef::expr_label() {
return _column_label;
}
std::string VirtualSlotRef::debug_string() const {
std::stringstream out;
out << "VirtualSlotRef(slot_id=" << _slot_id << VExpr::debug_string() << ")";
return out.str();
}
bool VirtualSlotRef::equals(const VExpr& other) {
const auto* other_ptr = dynamic_cast<const VirtualSlotRef*>(&other);
if (!other_ptr) {
return false;
}
// Compare slot_id and column_id
if (this->_slot_id != other_ptr->_slot_id || this->_column_id != other_ptr->_column_id) {
return false;
}
// Compare column_name pointers properly
if (this->_column_name == nullptr && other_ptr->_column_name == nullptr) {
// Both are null, they are equal
} else if (this->_column_name == nullptr || other_ptr->_column_name == nullptr) {
// One is null, the other is not, they are not equal
return false;
} else if (*this->_column_name != *other_ptr->_column_name) {
// Both are not null, compare the string contents
return false;
}
// Compare column_label
if (this->_column_label != other_ptr->_column_label) {
return false;
}
return true;
}
/**
* @brief Implements ANN range search evaluation for virtual slot references.
*
* This method handles the case where a virtual slot reference wraps a distance
* function call that can be optimized using ANN index range search. Instead of
* computing distances for all rows, it delegates to the underlying virtual
* expression to perform the optimized search.
*
* @param range_search_runtime Runtime parameters for the range search
* @param cid_to_index_iterators Index iterators for each column
* @param idx_to_cid Column ID mapping
* @param column_iterators Data column iterators
* @param row_bitmap Result bitmap to be updated with matching rows
* @param ann_index_stats Performance statistics collector
* @return Status::OK() if successful, error status otherwise
*/
Status VirtualSlotRef::evaluate_ann_range_search(
const segment_v2::AnnRangeSearchRuntime& range_search_runtime,
const std::vector<std::unique_ptr<segment_v2::IndexIterator>>& cid_to_index_iterators,
const std::vector<ColumnId>& idx_to_cid,
const std::vector<std::unique_ptr<segment_v2::ColumnIterator>>& column_iterators,
roaring::Roaring& row_bitmap, segment_v2::AnnIndexStats& ann_index_stats) {
return _virtual_column_expr->evaluate_ann_range_search(
range_search_runtime, cid_to_index_iterators, idx_to_cid, column_iterators, row_bitmap,
ann_index_stats);
return Status::OK();
}
#include "common/compile_check_end.h"
} // namespace doris::vectorized