blob: b26f01cc2664c55e54499709b31371377466a08c [file]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "exprs/vcondition_expr.h"
#include <gen_cpp/Exprs_types.h>
#include <gen_cpp/Types_types.h>
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include <cmath>
#include <limits>
#include <memory>
#include <vector>
#include "core/column/column_nullable.h"
#include "core/column/column_vector.h"
#include "core/data_type/data_type_nullable.h"
#include "core/data_type/data_type_number.h"
#include "exprs/vexpr.h"
#include "exprs/vexpr_context.h"
namespace doris {
// Build a minimal TExprNode as the input of VectorizedCoalesceExpr.
// Only fields required by the VExpr base ctor (so that create_data_type works) are set.
static TExprNode make_coalesce_node(TPrimitiveType::type ptype, bool is_nullable) {
TExprNode node;
node.node_type = TExprNodeType::FUNCTION_CALL;
node.num_children = 0;
node.__set_is_nullable(is_nullable);
TTypeDesc type_desc;
TTypeNode type_node;
type_node.type = TTypeNodeType::SCALAR;
TScalarType scalar_type;
scalar_type.__set_type(ptype);
type_node.__set_scalar_type(scalar_type);
type_desc.types.push_back(type_node);
node.__set_type(type_desc);
TFunction fn;
TFunctionName fn_name;
fn_name.function_name = "coalesce";
fn.name = fn_name;
node.__set_fn(fn);
return node;
}
// Mock child expression: returns the pre-injected ColumnPtr / DataTypePtr to the parent expr.
// Behavior is modeled after MockVExprForTryCast in try_cast_expr_test.cpp.
class MockChildVExpr : public VExpr {
public:
MockChildVExpr(ColumnPtr column, DataTypePtr type)
: _column(std::move(column)), _type(std::move(type)) {}
MOCK_CONST_METHOD0(clone, VExprSPtr());
const std::string& expr_name() const override { return _name; }
Status execute(VExprContext* context, Block* block, int* result_column_id) const override {
block->insert({_column, _type, "mock_child"});
*result_column_id = static_cast<int>(block->columns()) - 1;
return Status::OK();
}
Status execute_column_impl(VExprContext* /*context*/, const Block* /*block*/,
const Selector* /*selector*/, size_t /*count*/,
ColumnPtr& result_column) const override {
result_column = _column;
return Status::OK();
}
DataTypePtr execute_type(const Block* /*block*/) const override { return _type; }
private:
ColumnPtr _column;
DataTypePtr _type;
std::string _name = "mock_child";
};
// Helper: build a nullable Float64 column from a list of (value, is_null) pairs.
static ColumnPtr make_nullable_float64_column(const std::vector<std::pair<double, bool>>& values) {
auto nested = ColumnFloat64::create();
auto null_map = ColumnUInt8::create();
for (auto& [v, is_null] : values) {
nested->insert_value(v);
null_map->insert_value(is_null ? 1 : 0);
}
return ColumnNullable::create(std::move(nested), std::move(null_map));
}
// Helper: build a non-nullable Float64 column from a list of doubles.
static ColumnPtr make_float64_column(const std::vector<double>& values) {
auto col = ColumnFloat64::create();
for (auto v : values) {
col->insert_value(v);
}
return col;
}
// Helper: extract the Float64 value at `row` from the result column,
// handling both nullable and non-nullable cases.
static double get_float64_value(const ColumnPtr& column, size_t row, bool* is_null = nullptr) {
if (const auto* nullable = check_and_get_column<ColumnNullable>(column.get())) {
if (is_null) {
*is_null = nullable->is_null_at(row);
}
const auto& nested =
assert_cast<const ColumnFloat64&>(nullable->get_nested_column()).get_data();
return nested[row];
}
if (is_null) {
*is_null = false;
}
return assert_cast<const ColumnFloat64*>(column.get())->get_data()[row];
}
class VConditionExprCoalesceTest : public ::testing::Test {};
// Targets the fix: after some rows of col0 are filled, NaN/Inf values in the
// same rows of later columns must not pollute the already-filled result via
// "value * 0" arithmetic.
// Input:
// col0 (nullable, double): [1.0, NULL]
// col1 (non-nullable, double): [NaN, 100.0]
// Expected coalesce result: [1.0, 100.0]
TEST_F(VConditionExprCoalesceTest, Float64_NaN_NotPolluteResult) {
auto coalesce_node = make_coalesce_node(TPrimitiveType::DOUBLE, /*is_nullable=*/true);
auto coalesce_expr = VectorizedCoalesceExpr::create_shared(coalesce_node);
// _data_type is already set to Nullable(Float64) by the base ctor; reassert it explicitly
// through the public data_type() accessor (the underlying field is protected).
coalesce_expr->data_type() =
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeFloat64>());
const double kNaN = std::numeric_limits<double>::quiet_NaN();
auto col0 = make_nullable_float64_column({{1.0, false}, {0.0, true}});
auto col1 = make_float64_column({kNaN, 100.0});
auto child0 = std::make_shared<MockChildVExpr>(
col0, std::make_shared<DataTypeNullable>(std::make_shared<DataTypeFloat64>()));
auto child1 = std::make_shared<MockChildVExpr>(col1, std::make_shared<DataTypeFloat64>());
coalesce_expr->add_child(child0);
coalesce_expr->add_child(child1);
VExprContext context(coalesce_expr);
ColumnPtr result;
auto st = coalesce_expr->execute_column_impl(&context, /*block=*/nullptr,
/*selector=*/nullptr, /*count=*/2, result);
ASSERT_TRUE(st.ok()) << st.to_string();
ASSERT_TRUE(result.get() != nullptr);
ASSERT_EQ(result->size(), 2);
bool is_null = false;
auto v0 = get_float64_value(result, 0, &is_null);
EXPECT_FALSE(is_null);
EXPECT_FALSE(std::isnan(v0)) << "row0 must not be polluted by NaN, got " << v0;
EXPECT_DOUBLE_EQ(v0, 1.0);
auto v1 = get_float64_value(result, 1, &is_null);
EXPECT_FALSE(is_null);
EXPECT_DOUBLE_EQ(v1, 100.0);
}
// Same shape as above, but the polluting value in the later column is +Inf:
// col0 (nullable, double): [2.0, NULL]
// col1 (non-nullable, double): [+Inf, 200.0]
// Expected coalesce result: [2.0, 200.0]
TEST_F(VConditionExprCoalesceTest, Float64_Inf_NotPolluteResult) {
auto coalesce_node = make_coalesce_node(TPrimitiveType::DOUBLE, /*is_nullable=*/true);
auto coalesce_expr = VectorizedCoalesceExpr::create_shared(coalesce_node);
coalesce_expr->data_type() =
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeFloat64>());
const double kInf = std::numeric_limits<double>::infinity();
auto col0 = make_nullable_float64_column({{2.0, false}, {0.0, true}});
auto col1 = make_float64_column({kInf, 200.0});
coalesce_expr->add_child(std::make_shared<MockChildVExpr>(
col0, std::make_shared<DataTypeNullable>(std::make_shared<DataTypeFloat64>())));
coalesce_expr->add_child(
std::make_shared<MockChildVExpr>(col1, std::make_shared<DataTypeFloat64>()));
VExprContext context(coalesce_expr);
ColumnPtr result;
auto st = coalesce_expr->execute_column_impl(&context, /*block=*/nullptr,
/*selector=*/nullptr, /*count=*/2, result);
ASSERT_TRUE(st.ok()) << st.to_string();
ASSERT_EQ(result->size(), 2);
bool is_null = false;
auto v0 = get_float64_value(result, 0, &is_null);
EXPECT_FALSE(is_null);
EXPECT_FALSE(std::isnan(v0)) << "row0 must not be polluted by Inf*0, got " << v0;
EXPECT_TRUE(std::isfinite(v0));
EXPECT_DOUBLE_EQ(v0, 2.0);
auto v1 = get_float64_value(result, 1, &is_null);
EXPECT_FALSE(is_null);
EXPECT_DOUBLE_EQ(v1, 200.0);
}
// Verify that when NaN itself is the valid non-null value picked by coalesce,
// the result must preserve the NaN (the fix should not over-correct).
// col0 (nullable, double): [NaN, NULL]
// col1 (non-nullable, double): [10.0, 20.0]
// Expected: row0 takes col0's NaN (non-null, value is NaN); row1 takes col1's 20.0.
TEST_F(VConditionExprCoalesceTest, Float64_NaN_PreservedWhenSelected) {
auto coalesce_node = make_coalesce_node(TPrimitiveType::DOUBLE, /*is_nullable=*/true);
auto coalesce_expr = VectorizedCoalesceExpr::create_shared(coalesce_node);
coalesce_expr->data_type() =
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeFloat64>());
const double kNaN = std::numeric_limits<double>::quiet_NaN();
auto col0 = make_nullable_float64_column({{kNaN, false}, {0.0, true}});
auto col1 = make_float64_column({10.0, 20.0});
coalesce_expr->add_child(std::make_shared<MockChildVExpr>(
col0, std::make_shared<DataTypeNullable>(std::make_shared<DataTypeFloat64>())));
coalesce_expr->add_child(
std::make_shared<MockChildVExpr>(col1, std::make_shared<DataTypeFloat64>()));
VExprContext context(coalesce_expr);
ColumnPtr result;
auto st = coalesce_expr->execute_column_impl(&context, /*block=*/nullptr,
/*selector=*/nullptr, /*count=*/2, result);
ASSERT_TRUE(st.ok()) << st.to_string();
ASSERT_EQ(result->size(), 2);
bool is_null = false;
auto v0 = get_float64_value(result, 0, &is_null);
EXPECT_FALSE(is_null);
EXPECT_TRUE(std::isnan(v0)) << "row0 should preserve NaN from col0";
auto v1 = get_float64_value(result, 1, &is_null);
EXPECT_FALSE(is_null);
EXPECT_DOUBLE_EQ(v1, 20.0);
}
// Float32 variant: same shape as the Float64 NaN case.
TEST_F(VConditionExprCoalesceTest, Float32_NaN_NotPolluteResult) {
auto coalesce_node = make_coalesce_node(TPrimitiveType::FLOAT, /*is_nullable=*/true);
auto coalesce_expr = VectorizedCoalesceExpr::create_shared(coalesce_node);
coalesce_expr->data_type() =
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeFloat32>());
const float kNaN = std::numeric_limits<float>::quiet_NaN();
auto nested0 = ColumnFloat32::create();
nested0->insert_value(1.5f);
nested0->insert_value(0.0f);
auto null_map0 = ColumnUInt8::create();
null_map0->insert_value(0);
null_map0->insert_value(1);
ColumnPtr col0 = ColumnNullable::create(std::move(nested0), std::move(null_map0));
auto nested1 = ColumnFloat32::create();
nested1->insert_value(kNaN);
nested1->insert_value(7.5f);
ColumnPtr col1 = std::move(nested1);
coalesce_expr->add_child(std::make_shared<MockChildVExpr>(
col0, std::make_shared<DataTypeNullable>(std::make_shared<DataTypeFloat32>())));
coalesce_expr->add_child(
std::make_shared<MockChildVExpr>(col1, std::make_shared<DataTypeFloat32>()));
VExprContext context(coalesce_expr);
ColumnPtr result;
auto st = coalesce_expr->execute_column_impl(&context, /*block=*/nullptr,
/*selector=*/nullptr, /*count=*/2, result);
ASSERT_TRUE(st.ok()) << st.to_string();
ASSERT_EQ(result->size(), 2);
auto get_f32 = [&](size_t row, bool* is_null) -> float {
if (const auto* nullable = check_and_get_column<ColumnNullable>(result.get())) {
*is_null = nullable->is_null_at(row);
return assert_cast<const ColumnFloat32&>(nullable->get_nested_column()).get_data()[row];
}
*is_null = false;
return assert_cast<const ColumnFloat32*>(result.get())->get_data()[row];
};
bool is_null = false;
float v0 = get_f32(0, &is_null);
EXPECT_FALSE(is_null);
EXPECT_FALSE(std::isnan(v0)) << "row0 must not be polluted by NaN, got " << v0;
EXPECT_FLOAT_EQ(v0, 1.5f);
float v1 = get_f32(1, &is_null);
EXPECT_FALSE(is_null);
EXPECT_FLOAT_EQ(v1, 7.5f);
}
// Regression: verify that the non-floating (integer) path is unaffected by the Float branch change.
TEST_F(VConditionExprCoalesceTest, Int32_NormalPathStillWorks) {
auto coalesce_node = make_coalesce_node(TPrimitiveType::INT, /*is_nullable=*/true);
auto coalesce_expr = VectorizedCoalesceExpr::create_shared(coalesce_node);
coalesce_expr->data_type() =
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt32>());
auto nested0 = ColumnInt32::create();
nested0->insert_value(11);
nested0->insert_value(0);
auto null_map0 = ColumnUInt8::create();
null_map0->insert_value(0);
null_map0->insert_value(1);
ColumnPtr col0 = ColumnNullable::create(std::move(nested0), std::move(null_map0));
auto nested1 = ColumnInt32::create();
nested1->insert_value(99);
nested1->insert_value(22);
ColumnPtr col1 = std::move(nested1);
coalesce_expr->add_child(std::make_shared<MockChildVExpr>(
col0, std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt32>())));
coalesce_expr->add_child(
std::make_shared<MockChildVExpr>(col1, std::make_shared<DataTypeInt32>()));
VExprContext context(coalesce_expr);
ColumnPtr result;
auto st = coalesce_expr->execute_column_impl(&context, /*block=*/nullptr,
/*selector=*/nullptr, /*count=*/2, result);
ASSERT_TRUE(st.ok()) << st.to_string();
ASSERT_EQ(result->size(), 2);
auto get_int = [&](size_t row, bool* is_null) -> int32_t {
if (const auto* nullable = check_and_get_column<ColumnNullable>(result.get())) {
*is_null = nullable->is_null_at(row);
return assert_cast<const ColumnInt32&>(nullable->get_nested_column()).get_data()[row];
}
*is_null = false;
return assert_cast<const ColumnInt32*>(result.get())->get_data()[row];
};
bool is_null = false;
EXPECT_EQ(get_int(0, &is_null), 11);
EXPECT_FALSE(is_null);
EXPECT_EQ(get_int(1, &is_null), 22);
EXPECT_FALSE(is_null);
}
} // namespace doris