blob: accfa70a74311ebe716f9ed712f6397a03a748db [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "short_circuit_evaluation_expr.h"
#include <gen_cpp/Exprs_types.h>
#include <optional>
#include <sstream>
#include <string>
#include <vector>
#include "common/exception.h"
#include "common/logging.h"
#include "runtime/define_primitive_type.h"
#include "runtime/primitive_type.h"
#include "short_circuit_util.h"
#include "vec/columns/column.h"
#include "vec/columns/column_const.h"
#include "vec/columns/column_nullable.h"
#include "vec/common/assert_cast.h"
#include "vec/core/field.h"
#include "vec/data_types/data_type_nullable.h"
#include "vec/exprs/vexpr.h"
namespace doris::vectorized {
// For short-circuit execution, we need to distinguish between three types of indices:
// 1. self_index: The index of the current column (the 'i' in for(int i=0; i<size; i++),
// which always refers to the currently executing column)
// 2. executor_index: The index relative to the entire expr tree executor, passed down through
// execute_column and ultimately used in leaf nodes (e.g., SlotRef)
// 3. current_index: The index relative to the column returned by the current expr node
//
// For if/ifnull, self_index and current_index are the same because only one column serves
// as the condition column.
//
// For coalesce/case, self_index and current_index are different because multiple columns
// serve as condition columns (however, for the first condition column execution, they are
// the same, similar to if/ifnull)
Status ShortCircuitExpr::prepare(RuntimeState* state, const RowDescriptor& desc,
VExprContext* context) {
RETURN_IF_ERROR_OR_PREPARED(VExpr::prepare(state, desc, context));
_prepare_finished = true;
return Status::OK();
}
Status ShortCircuitExpr::open(RuntimeState* state, VExprContext* context,
FunctionContext::FunctionStateScope scope) {
DCHECK(_prepare_finished);
RETURN_IF_ERROR(VExpr::open(state, context, scope));
_open_finished = true;
return Status::OK();
}
void ShortCircuitExpr::close(VExprContext* context, FunctionContext::FunctionStateScope scope) {
DCHECK(_prepare_finished);
VExpr::close(context, scope);
}
std::string ShortCircuitExpr::debug_string() const {
std::string result = expr_name() + "(";
for (size_t i = 0; i < _children.size(); ++i) {
if (i != 0) {
result += ", ";
}
result += _children[i]->debug_string();
}
result += ")";
return result;
}
// Returns empty result column if count==0
// For some exprs, executing with a size-0 column may cause errors.
// These are issues with the exprs themselves, but for convenience, we handle this case uniformly in short-circuit expr.
/// TODO: Once all exprs support size-0 columns in the future, this function can be removed.
[[nodiscard]] Status try_early_return_on_empty(const VExprSPtr& expr, VExprContext* context,
const Block* block, Selector* selector, size_t count,
ColumnPtr& result_columnn) {
if (count == 0) {
result_columnn = expr->execute_type(block)->create_column();
DCHECK_EQ(result_columnn->size(), 0);
return Status::OK();
}
return expr->execute_column(context, block, selector, count, result_columnn);
}
ShortCircuitCaseExpr::ShortCircuitCaseExpr(const TExprNode& node)
: ShortCircuitExpr(node), _has_else_expr(node.case_expr.has_else_expr) {}
ColumnPtr ShortCircuitExpr::dispatch_fill_columns(const ColumnPtr& true_column,
const Selector& true_selector,
const ColumnPtr& false_column,
const Selector& false_selector,
size_t count) const {
ColumnPtr result_column;
auto scalar_fill = [&](const auto& type) -> bool {
using DataType = std::decay_t<decltype(type)>;
result_column = ScalarFillWithSelector<DataType::PType>::fill(
_data_type, true_column, true_selector, false_column, false_selector, count);
return true;
};
if (!dispatch_switch_scalar(_data_type->get_primitive_type(), scalar_fill)) {
result_column = NonScalarFillWithSelector::fill(_data_type, true_column, true_selector,
false_column, false_selector, count);
}
return result_column;
}
ColumnPtr ShortCircuitExpr::dispatch_fill_columns(
const std::vector<ColumnAndSelector>& columns_and_selectors, size_t count) const {
ColumnPtr result_column;
auto scalar_fill = [&](const auto& type) -> bool {
using DataType = std::decay_t<decltype(type)>;
result_column = ScalarFillWithSelector<DataType::PType>::fill(_data_type,
columns_and_selectors, count);
return true;
};
if (!dispatch_switch_scalar(_data_type->get_primitive_type(), scalar_fill)) {
result_column = NonScalarFillWithSelector::fill(_data_type, columns_and_selectors, count);
}
return result_column;
}
/// TODO: Potential optimization opportunities:
// 1. The logic of IF and CASE is similar; IFNULL and COALESCE are also similar.
// A better approach would be to keep only CASE and COALESCE, with the optimizer
// rewriting IF to CASE and IFNULL to COALESCE.
// 2. The following functions (execute_if_selector, execute_case_selector,
// execute_ifnull_selector, execute_coalesce_selector) could theoretically be
// further abstracted, e.g., by introducing a "column with selector" structure.
// However, since there are currently few places handling selectors and keeping
// them separate makes code review easier, each function is implemented individually.
void execute_if_selector(const ColumnPtr& cond_column, const Selector* selector, size_t count,
Selector& matched_executor_selector, Selector& matched_self_selector,
Selector& not_matched_executor_selector,
Selector& not_matched_self_selector) {
ConditionColumnView condition_view = ConditionColumnView::create(cond_column, selector, count);
matched_executor_selector.reserve(count);
matched_self_selector.reserve(count);
not_matched_executor_selector.reserve(count);
not_matched_self_selector.reserve(count);
auto null_func = [&](size_t self_index, size_t executor_index) {
not_matched_self_selector.push_back(self_index);
not_matched_executor_selector.push_back(executor_index);
};
auto true_func = [&](size_t self_index, size_t executor_index) {
matched_self_selector.push_back(self_index);
matched_executor_selector.push_back(executor_index);
};
auto false_func = [&](size_t self_index, size_t executor_index) {
not_matched_self_selector.push_back(self_index);
not_matched_executor_selector.push_back(executor_index);
};
condition_view.for_each(null_func, true_func, false_func);
}
Status ShortCircuitIfExpr::execute_column(VExprContext* context, const Block* block,
Selector* selector, size_t count,
ColumnPtr& result_column) const {
DCHECK(_open_finished || block == nullptr) << debug_string();
DCHECK(selector == nullptr || selector->size() == count);
ColumnPtr cond_column;
RETURN_IF_ERROR(
try_early_return_on_empty(_children[0], context, block, selector, count, cond_column));
DCHECK_EQ(cond_column->size(), count);
Selector true_executor_selector;
Selector true_self_selector;
Selector false_executor_selector;
Selector false_self_selector;
execute_if_selector(cond_column, selector, count, true_executor_selector, true_self_selector,
false_executor_selector, false_self_selector);
ColumnPtr true_column;
RETURN_IF_ERROR(try_early_return_on_empty(_children[1], context, block, &true_executor_selector,
true_executor_selector.size(), true_column));
ColumnPtr false_column;
RETURN_IF_ERROR(try_early_return_on_empty(_children[2], context, block,
&false_executor_selector,
false_executor_selector.size(), false_column));
result_column = dispatch_fill_columns(true_column, true_self_selector, false_column,
false_self_selector, count);
return Status::OK();
}
void execute_case_selector(const ColumnPtr& cond_column, const Selector* executor_selector,
size_t executor_count, const Selector* current_selector,
Selector& matched_executor_selector, Selector& matched_current_selector,
Selector& not_matched_executor_selector,
Selector& not_matched_current_selector) {
ConditionColumnView condition_view =
ConditionColumnView::create(cond_column, executor_selector, executor_count);
if (current_selector == nullptr) {
// If current_selector is nullptr, this is the first condition column execution,
// so self_index and current_index are the same
auto null_func = [&](size_t self_index, size_t executor_index) {
not_matched_current_selector.push_back(self_index);
not_matched_executor_selector.push_back(executor_index);
};
auto true_func = [&](size_t self_index, size_t executor_index) {
matched_current_selector.push_back(self_index);
matched_executor_selector.push_back(executor_index);
};
auto false_func = [&](size_t self_index, size_t executor_index) {
not_matched_current_selector.push_back(self_index);
not_matched_executor_selector.push_back(executor_index);
};
condition_view.for_each(null_func, true_func, false_func);
} else {
// If current_selector is not nullptr, this is not the first condition column execution,
// so self_index and current_index are different.
// We need to determine current_index based on self_index and current_selector
const auto& current_selector_data = *current_selector;
DCHECK_EQ(current_selector_data.size(), executor_count);
auto null_func = [&](size_t self_index, size_t executor_index) {
not_matched_current_selector.push_back(current_selector_data[self_index]);
not_matched_executor_selector.push_back(executor_index);
};
auto true_func = [&](size_t self_index, size_t executor_index) {
matched_current_selector.push_back(current_selector_data[self_index]);
matched_executor_selector.push_back(executor_index);
};
auto false_func = [&](size_t self_index, size_t executor_index) {
not_matched_current_selector.push_back(current_selector_data[self_index]);
not_matched_executor_selector.push_back(executor_index);
};
condition_view.for_each(null_func, true_func, false_func);
}
}
Status ShortCircuitCaseExpr::execute_column(VExprContext* context, const Block* block,
Selector* selector, size_t count,
ColumnPtr& result_column) const {
DCHECK(_open_finished || block == nullptr) << debug_string();
DCHECK(selector == nullptr || selector->size() == count);
// Structure: WHEN expr1 THEN result1 [WHEN expr2 THEN result2 ...] [ELSE else_result]
// _children layout: [when1, then1, when2, then2, ..., else?]
//
// Examples:
// CASE WHEN a THEN b END -> children=[a,b], size=2, num_branches=2 (b + null)
// CASE WHEN a THEN b ELSE c END -> children=[a,b,c], size=3, num_branches=2 (b + c)
// CASE WHEN a THEN b WHEN c THEN d END -> children=[a,b,c,d], size=4, num_branches=3 (b + d + null)
//
// num_branches = number of when/then pairs + 1 (for else or null output)
const size_t num_branches = _children.size() / 2 + 1;
std::vector<ColumnAndSelector> columns_and_selectors;
columns_and_selectors.resize(num_branches);
Selector* executor_selector = selector;
size_t executor_count = count;
Selector* current_selector = nullptr;
Selector left_not_matched_executor_selector;
Selector left_not_matched_current_selector;
int64_t executed_branches = 0;
for (int64_t i = 0; i < static_cast<int64_t>(_children.size()) - _has_else_expr; i += 2) {
executed_branches++;
ColumnPtr when_column_ptr;
RETURN_IF_ERROR(try_early_return_on_empty(_children[i], context, block, executor_selector,
executor_count, when_column_ptr));
DCHECK(executor_selector == nullptr ||
executor_selector->size() == when_column_ptr->size());
Selector matched_executor_selector;
Selector matched_current_selector;
Selector not_matched_executor_selector;
Selector not_matched_current_selector;
execute_case_selector(when_column_ptr, executor_selector, executor_count, current_selector,
matched_executor_selector, matched_current_selector,
not_matched_executor_selector, not_matched_current_selector);
ColumnPtr then_column_ptr;
RETURN_IF_ERROR(try_early_return_on_empty(
_children[i + 1], context, block, &matched_executor_selector,
matched_executor_selector.size(), then_column_ptr));
columns_and_selectors[i / 2].column = then_column_ptr;
columns_and_selectors[i / 2].selector.swap(matched_current_selector);
left_not_matched_executor_selector.swap(not_matched_executor_selector);
left_not_matched_current_selector.swap(not_matched_current_selector);
executor_selector = &left_not_matched_executor_selector;
executor_count = left_not_matched_executor_selector.size();
current_selector = &left_not_matched_current_selector;
if (executor_count == 0) {
columns_and_selectors.resize(executed_branches);
// All rows have been matched; no need to process other branch
result_column = dispatch_fill_columns(columns_and_selectors, count);
return Status::OK();
}
}
// handle the else branch
if (_has_else_expr) {
DCHECK_EQ(columns_and_selectors.size(), (_children.size() + 1) / 2);
ColumnPtr else_column_ptr;
RETURN_IF_ERROR(try_early_return_on_empty(_children.back(), context, block,
executor_selector, executor_count,
else_column_ptr));
columns_and_selectors.back().column = else_column_ptr;
columns_and_selectors.back().selector.swap(left_not_matched_current_selector);
} else {
// no else branch, all remaining rows are null
columns_and_selectors.back().selector.swap(left_not_matched_current_selector);
}
result_column = dispatch_fill_columns(columns_and_selectors, count);
return Status::OK();
}
// For ifnull(expr1, expr2), we distinguish between null and not-null rows of expr1:
// - Not-null rows: Use expr1's value directly (already computed), only need self_selector
// to filter the column. No need for executor_selector since we don't execute further.
// - Null rows: Need executor_selector to execute expr2, and current_selector for result filling.
//
// This differs from IF where both branches need to execute child expressions.
void execute_ifnull_selector(const ColumnPtr& cond_column, const Selector* selector, size_t count,
Selector& null_executor_selector, Selector& null_current_selector,
Selector& not_null_self_selector) {
ConditionColumnNullView condition_view =
ConditionColumnNullView::create(cond_column, selector, count);
null_executor_selector.reserve(count);
null_current_selector.reserve(count);
not_null_self_selector.reserve(count);
auto null_func = [&](size_t self_index, size_t executor_index) {
null_current_selector.push_back(self_index);
null_executor_selector.push_back(executor_index);
};
auto not_null_func = [&](size_t self_index, size_t executor_index) {
not_null_self_selector.push_back(self_index);
};
condition_view.for_each(null_func, not_null_func);
}
Status ShortCircuitIfNullExpr::execute_column(VExprContext* context, const Block* block,
Selector* selector, size_t count,
ColumnPtr& result_column) const {
DCHECK(_open_finished || block == nullptr) << debug_string();
DCHECK(selector == nullptr || selector->size() == count);
ColumnPtr expr1_column;
RETURN_IF_ERROR(
try_early_return_on_empty(_children[0], context, block, selector, count, expr1_column));
DCHECK_EQ(expr1_column->size(), count);
Selector null_executor_selector;
Selector null_current_selector;
Selector not_null_self_selector;
execute_ifnull_selector(expr1_column, selector, count, null_executor_selector,
null_current_selector, not_null_self_selector);
// filter not null part
expr1_column = filter_column_with_selector(expr1_column, &not_null_self_selector,
not_null_self_selector.size());
ColumnPtr expr2_column;
RETURN_IF_ERROR(try_early_return_on_empty(_children[1], context, block, &null_executor_selector,
null_executor_selector.size(), expr2_column));
result_column = dispatch_fill_columns(expr1_column, not_null_self_selector, expr2_column,
null_current_selector, count);
return Status::OK();
}
void execute_coalesce_selector(const ColumnPtr& cond_column, const Selector* executor_selector,
size_t executor_count, const Selector* current_selector,
Selector& null_executor_selector, Selector& null_current_selector,
Selector& not_null_current_selector,
Selector& not_null_self_selector) {
ConditionColumnNullView condition_view =
ConditionColumnNullView::create(cond_column, executor_selector, executor_count);
if (current_selector == nullptr) {
// If current_selector is nullptr, this is the first condition column execution,
// so self_index and current_index are the same
auto null_func = [&](size_t self_index, size_t executor_index) {
null_current_selector.push_back(self_index);
null_executor_selector.push_back(executor_index);
};
auto not_null_func = [&](size_t self_index, size_t executor_index) {
not_null_current_selector.push_back(self_index);
not_null_self_selector.push_back(self_index);
};
condition_view.for_each(null_func, not_null_func);
} else {
// If current_selector is not nullptr, this is not the first condition column execution,
// so self_index and current_index are different.
// We need to determine current_index based on self_index and current_selector
const auto& current_selector_data = *current_selector;
DCHECK_EQ(current_selector_data.size(), executor_count);
auto null_func = [&](size_t self_index, size_t executor_index) {
null_current_selector.push_back(current_selector_data[self_index]);
null_executor_selector.push_back(executor_index);
};
auto not_null_func = [&](size_t self_index, size_t executor_index) {
not_null_current_selector.push_back(current_selector_data[self_index]);
not_null_self_selector.push_back(self_index);
};
condition_view.for_each(null_func, not_null_func);
}
}
Status ShortCircuitCoalesceExpr::execute_column(VExprContext* context, const Block* block,
Selector* selector, size_t count,
ColumnPtr& result_column) const {
DCHECK(_open_finished || block == nullptr) << debug_string();
DCHECK(selector == nullptr || selector->size() == count);
std::vector<ColumnAndSelector> columns_and_selectors;
columns_and_selectors.resize(_children.size() +
1); // the last one represents the selector for null output
Selector* executor_selector = selector;
size_t executor_count = count;
Selector* current_selector = nullptr;
Selector left_null_executor_selector;
Selector left_null_current_selector;
int64_t executed_branches = 0;
for (int64_t i = 0; i < _children.size(); ++i) {
executed_branches++;
ColumnPtr child_column_ptr;
RETURN_IF_ERROR(try_early_return_on_empty(_children[i], context, block, executor_selector,
executor_count, child_column_ptr));
DCHECK(executor_selector == nullptr ||
executor_selector->size() == child_column_ptr->size());
Selector null_executor_selector;
Selector null_current_selector;
Selector not_null_current_selector;
Selector not_null_self_selector;
execute_coalesce_selector(child_column_ptr, executor_selector, executor_count,
current_selector, null_executor_selector, null_current_selector,
not_null_current_selector, not_null_self_selector);
// use not_null_self_selector to filter child_column_ptr
child_column_ptr = filter_column_with_selector(child_column_ptr, &not_null_self_selector,
not_null_self_selector.size());
columns_and_selectors[i].column = child_column_ptr;
columns_and_selectors[i].selector.swap(not_null_current_selector);
left_null_executor_selector.swap(null_executor_selector);
left_null_current_selector.swap(null_current_selector);
executor_selector = &left_null_executor_selector;
executor_count = left_null_executor_selector.size();
current_selector = &left_null_current_selector;
if (executor_count == 0) {
columns_and_selectors.resize(executed_branches);
// All rows have been matched; no need to process other branch
result_column = dispatch_fill_columns(columns_and_selectors, count);
return Status::OK();
}
}
// the remaining null rows at the end
columns_and_selectors.back().selector.swap(left_null_current_selector);
result_column = dispatch_fill_columns(columns_and_selectors, count);
return Status::OK();
}
} // namespace doris::vectorized