be/test/vec/function/function_test_util.h - doris - Git at Google

 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.

 #include <gtest/gtest-message.h>
 #include <gtest/gtest-test-part.h>
 #include <mysql/mysql.h>

 #include <cstdint>
 #include <ctime>
 #include <memory>
 #include <string>
 #include <type_traits>
 #include <utility>
 #include <vector>

 #include "common/status.h"
 #include "olap/olap_common.h"
 #include "runtime/define_primitive_type.h"
 #include "runtime/types.h"
 #include "testutil/any_type.h"
 #include "testutil/function_utils.h"
 #include "testutil/test_util.h"
 #include "udf/udf.h"
 #include "util/bitmap_value.h"
 #include "util/jsonb_utils.h"
 #include "vec/columns/column.h"
 #include "vec/columns/column_complex.h"
 #include "vec/columns/column_const.h"
 #include "vec/columns/column_nullable.h"
 #include "vec/common/string_ref.h"
 #include "vec/core/block.h"
 #include "vec/core/column_numbers.h"
 #include "vec/core/column_with_type_and_name.h"
 #include "vec/core/field.h"
 #include "vec/core/types.h"
 #include "vec/core/wide_integer.h"
 #include "vec/data_types/data_type.h"
 #include "vec/data_types/data_type_bitmap.h"
 #include "vec/data_types/data_type_hll.h"
 #include "vec/data_types/data_type_nullable.h"
 #include "vec/data_types/data_type_number.h"
 #include "vec/data_types/data_type_string.h"
 #include "vec/data_types/data_type_time.h"
 #include "vec/functions/simple_function_factory.h"

 namespace doris::vectorized {

 class DataTypeJsonb;
 class TableFunction;
 template <typename T>
 class DataTypeDecimal;
 using InputDataSet = std::vector<std::vector<AnyType>>; // without result
 using CellSet = std::vector<AnyType>;
 using Expect = AnyType;
 using Row = std::pair<CellSet, Expect>;
 using DataSet = std::vector<Row>;
 using InputTypeSet = std::vector<AnyType>;

 // FIXME: should use exception or expected to deal null value.
 int64_t str_to_date_time(std::string datetime_str, bool data_time = true);
 uint32_t str_to_date_v2(std::string datetime_str, std::string datetime_format);
 uint64_t str_to_datetime_v2(std::string datetime_str, std::string datetime_format);

 struct Nullable {
     TypeIndex tp;
 };

 struct Notnull {
     TypeIndex tp;
 };

 struct ConstedNotnull {
     TypeIndex tp;
 };

 namespace ut_type {
 using BOOLEAN = uint8_t;
 using TINYINT = int8_t;
 using SMALLINT = int16_t;
 using INT = int32_t;
 using BIGINT = int64_t;
 using LARGEINT = int128_t;

 using VARCHAR = std::string;
 using CHAR = std::string;
 using STRING = std::string;

 using DOUBLE = double;
 using FLOAT = float;

 using IPV4 = uint32_t;
 using IPV6 = uint128_t;

 // cell constructors. could also use from_int_frac if you'd like
 inline auto DECIMALV2 = Decimal128V2::double_to_decimal;
 inline auto DECIMAL32 = [](int32_t x, int32_t y) { return Decimal32::from_int_frac(x, y, 5); };
 inline auto DECIMAL64 = [](int64_t x, int64_t y) { return Decimal64::from_int_frac(x, y, 9); };
 inline auto DECIMAL128V3 = [](int128_t x, int128_t y) {
     return Decimal128V3::from_int_frac(x, y, 20);
 };
 inline auto DECIMAL256 = [](wide::Int256 x, wide::Int256 y) {
     return Decimal256::from_int_frac(x, y, 40);
 };

 inline auto DECIMALFIELD = [](double v) {
     return DecimalField<Decimal128V2>(Decimal128V2::double_to_decimal(v), 9);
 };

 using DATETIME = std::string;

 template <typename T>
 struct DataTypeTraits;

 template <>
 struct DataTypeTraits<DataTypeInt8> {
     using type = Int8;
 };

 template <>
 struct DataTypeTraits<DataTypeInt16> {
     using type = Int16;
 };

 template <>
 struct DataTypeTraits<DataTypeInt32> {
     using type = Int32;
 };

 template <>
 struct DataTypeTraits<DataTypeInt64> {
     using type = Int64;
 };

 template <>
 struct DataTypeTraits<DataTypeInt128> {
     using type = Int128;
 };

 template <>
 struct DataTypeTraits<DataTypeFloat32> {
     using type = Float32;
 };

 template <>
 struct DataTypeTraits<DataTypeFloat64> {
     using type = Float64;
 };

 template <typename To, typename From>
 constexpr decltype(auto) convert_to(From value) {
     using ToType = typename DataTypeTraits<To>::type;
     return ToType(value);
 }

 template <typename T>
 constexpr TypeIndex get_type_index() {
     if constexpr (std::is_same_v<T, DataTypeInt8>) {
         return TypeIndex::Int8;
     } else if constexpr (std::is_same_v<T, DataTypeInt16>) {
         return TypeIndex::Int16;
     } else if constexpr (std::is_same_v<T, DataTypeInt32>) {
         return TypeIndex::Int32;
     } else if constexpr (std::is_same_v<T, DataTypeInt64>) {
         return TypeIndex::Int64;
     } else if constexpr (std::is_same_v<T, DataTypeInt128>) {
         return TypeIndex::Int128;
     } else if constexpr (std::is_same_v<T, DataTypeFloat32>) {
         return TypeIndex::Float32;
     } else if constexpr (std::is_same_v<T, DataTypeFloat64>) {
         return TypeIndex::Float64;
     }
 }

 struct UTDataTypeDesc {
     DataTypePtr data_type;
     doris::TypeDescriptor type_desc;
     std::string col_name;
     bool is_const = false;
     bool is_nullable = true;
 };
 using UTDataTypeDescs = std::vector<UTDataTypeDesc>;

 } // namespace ut_type

 size_t type_index_to_data_type(const std::vector<AnyType>& input_types, size_t index,
                                ut_type::UTDataTypeDesc& ut_desc, DataTypePtr& type);
 bool parse_ut_data_type(const std::vector<AnyType>& input_types, ut_type::UTDataTypeDescs& descs);

 bool insert_cell(MutableColumnPtr& column, DataTypePtr type_ptr, const AnyType& cell);

 Block* create_block_from_inputset(const InputTypeSet& input_types, const InputDataSet& input_set);

 Block* process_table_function(TableFunction* fn, Block* input_block,
                               const InputTypeSet& output_types);
 void check_vec_table_function(TableFunction* fn, const InputTypeSet& input_types,
                               const InputDataSet& input_set, const InputTypeSet& output_types,
                               const InputDataSet& output_set);

 // Null values are represented by Null()
 // The type of the constant column is represented as follows: Consted {TypeIndex::String}
 // A DataSet with a constant column can only have one row of data
 template <typename ReturnType, bool nullable = false>
 Status check_function(const std::string& func_name, const InputTypeSet& input_types,
                       const DataSet& data_set, bool expect_fail = false) {
     // 1.0 create data type
     ut_type::UTDataTypeDescs descs;
     EXPECT_TRUE(parse_ut_data_type(input_types, descs));

     // 1.1 insert data and create block
     auto row_size = data_set.size();
     Block block;
     for (size_t i = 0; i < descs.size(); ++i) {
         auto& desc = descs[i];
         auto column = desc.data_type->create_column();
         column->reserve(row_size);

         auto type_ptr = desc.data_type->is_nullable()
                                 ? ((DataTypeNullable*)(desc.data_type.get()))->get_nested_type()
                                 : desc.data_type;
         for (int j = 0; j < row_size; j++) {
             EXPECT_TRUE(insert_cell(column, type_ptr, data_set[j].first[i]));
         }

         if (desc.is_const) {
             column = ColumnConst::create(std::move(column), row_size);
         }
         block.insert({std::move(column), desc.data_type, desc.col_name});
     }

     // 1.2 prepare args for function call
     ColumnNumbers arguments;
     std::vector<doris::TypeDescriptor> arg_types;
     std::vector<std::shared_ptr<ColumnPtrWrapper>> constant_col_ptrs;
     std::vector<std::shared_ptr<ColumnPtrWrapper>> constant_cols;
     for (size_t i = 0; i < descs.size(); ++i) {
         auto& desc = descs[i];
         arguments.push_back(static_cast<unsigned int>(i));
         arg_types.push_back(desc.type_desc);
         if (desc.is_const) {
             constant_col_ptrs.push_back(
                     std::make_shared<ColumnPtrWrapper>(block.get_by_position(i).column));
             constant_cols.push_back(constant_col_ptrs.back());
         } else {
             constant_cols.push_back(nullptr);
         }
     }

     // 2. execute function
     auto return_type = nullable ? make_nullable(std::make_shared<ReturnType>())
                                 : std::make_shared<ReturnType>();
     auto func = SimpleFunctionFactory::instance().get_function(
             func_name, block.get_columns_with_type_and_name(), return_type);
     EXPECT_TRUE(func != nullptr);

     doris::TypeDescriptor fn_ctx_return;
     if constexpr (std::is_same_v<ReturnType, DataTypeUInt8>) {
         fn_ctx_return.type = doris::PrimitiveType::TYPE_BOOLEAN;
     } else if constexpr (std::is_same_v<ReturnType, DataTypeInt32>) {
         fn_ctx_return.type = doris::PrimitiveType::TYPE_INT;
     } else if constexpr (std::is_same_v<ReturnType, DataTypeFloat64> ||
                          std::is_same_v<ReturnType, DataTypeTimeV2>) {
         fn_ctx_return.type = doris::PrimitiveType::TYPE_DOUBLE;
     } else if constexpr (std::is_same_v<ReturnType, DateTime>) {
         fn_ctx_return.type = doris::PrimitiveType::TYPE_DATETIME;
     } else if (std::is_same_v<ReturnType, DateV2>) {
         fn_ctx_return.type = doris::PrimitiveType::TYPE_DATEV2;
     } else if (std::is_same_v<ReturnType, DateTimeV2>) {
         fn_ctx_return.type = doris::PrimitiveType::TYPE_DATETIMEV2;
     } else if (std::is_same_v<ReturnType, Decimal128V2>) {
         fn_ctx_return.type = doris::PrimitiveType::TYPE_DECIMALV2;
     } else if (std::is_same_v<ReturnType, Decimal32>) {
         fn_ctx_return.type = doris::PrimitiveType::TYPE_DECIMAL32;
     } else if (std::is_same_v<ReturnType, Decimal64>) {
         fn_ctx_return.type = doris::PrimitiveType::TYPE_DECIMAL64;
     } else if (std::is_same_v<ReturnType, Decimal128V3>) {
         fn_ctx_return.type = doris::PrimitiveType::TYPE_DECIMAL128I;
     } else if (std::is_same_v<ReturnType, Decimal256>) {
         fn_ctx_return.type = doris::PrimitiveType::TYPE_DECIMAL256;
     } else {
         fn_ctx_return.type = doris::PrimitiveType::INVALID_TYPE;
     }

     FunctionUtils fn_utils(fn_ctx_return, arg_types, 0);
     auto* fn_ctx = fn_utils.get_fn_ctx();
     fn_ctx->set_constant_cols(constant_cols);
     static_cast<void>(func->open(fn_ctx, FunctionContext::FRAGMENT_LOCAL));
     static_cast<void>(func->open(fn_ctx, FunctionContext::THREAD_LOCAL));

     block.insert({nullptr, return_type, "result"});

     auto result = block.columns() - 1;
     auto st = func->execute(fn_ctx, block, arguments, result, row_size);
     if (expect_fail) {
         EXPECT_NE(Status::OK(), st);
         return st;
     } else {
         EXPECT_EQ(Status::OK(), st);
     }

     static_cast<void>(func->close(fn_ctx, FunctionContext::THREAD_LOCAL));
     static_cast<void>(func->close(fn_ctx, FunctionContext::FRAGMENT_LOCAL));

     // 3. check the result of function
     ColumnPtr column = block.get_columns()[result];
     EXPECT_TRUE(column);
     if (const auto* column_str = check_and_get_column<ColumnString>(column.get())) {
         column_str->sanity_check();
     }

     for (int i = 0; i < row_size; ++i) {
         // update current line
         if (row_size > 1) {
             TestCaseInfo::cur_cast_line = i;
         }
         auto check_column_data = [&]() {
             if constexpr (std::is_same_v<ReturnType, DataTypeJsonb>) {
                 const auto& expect_data = any_cast<String>(data_set[i].second);
                 auto s = column->get_data_at(i);
                 if (expect_data.empty()) {
                     // zero size result means invalid
                     EXPECT_EQ(0, s.size) << " invalid result size should be 0 at row " << i;
                 } else {
                     // convert jsonb binary value to json string to compare with expected json text
                     EXPECT_EQ(expect_data, JsonbToJson::jsonb_to_json_string(s.data, s.size))
                             << " at row " << i;
                 }
             } else {
                 Field field;
                 column->get(i, field);

                 const auto& expect_data =
                         any_cast<typename ReturnType::FieldType>(data_set[i].second);

                 if constexpr (std::is_same_v<ReturnType, DataTypeDecimal<Decimal128V2>>) {
                     const auto& column_data = field.get<DecimalField<Decimal128V2>>().get_value();
                     EXPECT_EQ(expect_data.value, column_data.value) << " at row " << i;
                 } else if constexpr (std::is_same_v<ReturnType, DataTypeBitMap>) {
                     const ColumnBitmap* bitmap_col = nullptr;
                     if constexpr (nullable) {
                         const auto* nullable_column =
                                 assert_cast<const ColumnNullable*>(column.get());
                         bitmap_col = assert_cast<const ColumnBitmap*>(
                                 nullable_column->get_nested_column_ptr().get());
                     } else {
                         bitmap_col = assert_cast<const ColumnBitmap*>(column.get());
                     }
                     EXPECT_EQ(expect_data.to_string(), bitmap_col->get_element(i).to_string())
                             << " at row " << i;
                 } else if constexpr (std::is_same_v<ReturnType, DataTypeHLL>) {
                     const ColumnHLL* hll_col = nullptr;
                     if constexpr (nullable) {
                         const auto* nullable_column =
                                 assert_cast<const ColumnNullable*>(column.get());
                         hll_col = assert_cast<const ColumnHLL*>(
                                 nullable_column->get_nested_column_ptr().get());
                     } else {
                         hll_col = assert_cast<const ColumnHLL*>(column.get());
                     }
                     EXPECT_EQ(expect_data.to_string(), hll_col->get_element(i).to_string())
                             << " at row " << i;
                 } else if constexpr (std::is_same_v<ReturnType, DataTypeFloat32> ||
                                      std::is_same_v<ReturnType, DataTypeFloat64> ||
                                      std::is_same_v<ReturnType, DataTypeTimeV2>) {
                     const auto& column_data = field.get<DataTypeFloat64::FieldType>();
                     EXPECT_DOUBLE_EQ(expect_data, column_data) << " at row " << i;
                 } else {
                     const auto& column_data = field.get<typename ReturnType::FieldType>();
                     EXPECT_EQ(expect_data, column_data) << " at row " << i;
                 }
             }
         };

         if constexpr (nullable) {
             bool is_null = data_set[i].second.type() == &typeid(Null);
             EXPECT_EQ(is_null, column->is_null_at(i)) << " at row " << i;
             if (!is_null) {
                 check_column_data();
             }
         } else {
             check_column_data();
         }
     }

     return Status::OK();
 }

 using BaseInputTypeSet = std::vector<TypeIndex>;

 // Each parameter may be decorated with 'const', but each invocation of 'check_function' can only handle one state of the parameters.
 // If there are 'n' parameters, it would require manually calling 'check_function' 2^n times, whereas through this function, only one
 // invocation is needed.
 template <typename ReturnType, bool nullable = false>
 void check_function_all_arg_comb(const std::string& func_name, const BaseInputTypeSet& base_set,
                                  const DataSet& data_set) {
     size_t arg_cnt = base_set.size();
     TestCaseInfo::arg_size = static_cast<int>(arg_cnt);
     // Consider each parameter as a bit, if the j-th bit is 1, the j-th parameter is const; otherwise, it is not.
     for (int i = 0; i < (1 << arg_cnt); i++) {
         InputTypeSet input_types {};
         for (int j = 0; j < arg_cnt; j++) {
             if ((1 << j) & i) {
                 input_types.emplace_back(Consted {static_cast<TypeIndex>(base_set[j])});
             } else {
                 input_types.emplace_back(static_cast<TypeIndex>(base_set[j]));
             }
         }

         TestCaseInfo::arg_const_info = i, TestCaseInfo::cur_cast_line = -1;
         // exists parameter are const
         if (i != 0) {
             for (const auto& line : data_set) {
                 DataSet tmp_set {line};
                 static_cast<void>(check_function<ReturnType, nullable>(func_name, input_types,
                                                                        tmp_set, false));
             }
         } else {
             static_cast<void>(
                     check_function<ReturnType, nullable>(func_name, input_types, data_set));
         }
     }
 }
 } // namespace doris::vectorized
	// Licensed to the Apache Software Foundation (ASF) under one
	// or more contributor license agreements. See the NOTICE file
	// distributed with this work for additional information
	// regarding copyright ownership. The ASF licenses this file
	// to you under the Apache License, Version 2.0 (the
	// "License"); you may not use this file except in compliance
	// with the License. You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing,
	// software distributed under the License is distributed on an
	// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	// KIND, either express or implied. See the License for the
	// specific language governing permissions and limitations
	// under the License.

	#include <gtest/gtest-message.h>
	#include <gtest/gtest-test-part.h>
	#include <mysql/mysql.h>

	#include <cstdint>
	#include <ctime>
	#include <memory>
	#include <string>
	#include <type_traits>
	#include <utility>
	#include <vector>

	#include "common/status.h"
	#include "olap/olap_common.h"
	#include "runtime/define_primitive_type.h"
	#include "runtime/types.h"
	#include "testutil/any_type.h"
	#include "testutil/function_utils.h"
	#include "testutil/test_util.h"
	#include "udf/udf.h"
	#include "util/bitmap_value.h"
	#include "util/jsonb_utils.h"
	#include "vec/columns/column.h"
	#include "vec/columns/column_complex.h"
	#include "vec/columns/column_const.h"
	#include "vec/columns/column_nullable.h"
	#include "vec/common/string_ref.h"
	#include "vec/core/block.h"
	#include "vec/core/column_numbers.h"
	#include "vec/core/column_with_type_and_name.h"
	#include "vec/core/field.h"
	#include "vec/core/types.h"
	#include "vec/core/wide_integer.h"
	#include "vec/data_types/data_type.h"
	#include "vec/data_types/data_type_bitmap.h"
	#include "vec/data_types/data_type_hll.h"
	#include "vec/data_types/data_type_nullable.h"
	#include "vec/data_types/data_type_number.h"
	#include "vec/data_types/data_type_string.h"
	#include "vec/data_types/data_type_time.h"
	#include "vec/functions/simple_function_factory.h"

	namespace doris::vectorized {

	class DataTypeJsonb;
	class TableFunction;
	template <typename T>
	class DataTypeDecimal;
	using InputDataSet = std::vector<std::vector<AnyType>>; // without result
	using CellSet = std::vector<AnyType>;
	using Expect = AnyType;
	using Row = std::pair<CellSet, Expect>;
	using DataSet = std::vector<Row>;
	using InputTypeSet = std::vector<AnyType>;

	// FIXME: should use exception or expected to deal null value.
	int64_t str_to_date_time(std::string datetime_str, bool data_time = true);
	uint32_t str_to_date_v2(std::string datetime_str, std::string datetime_format);
	uint64_t str_to_datetime_v2(std::string datetime_str, std::string datetime_format);

	struct Nullable {
	TypeIndex tp;
	};

	struct Notnull {
	TypeIndex tp;
	};

	struct ConstedNotnull {
	TypeIndex tp;
	};

	namespace ut_type {
	using BOOLEAN = uint8_t;
	using TINYINT = int8_t;
	using SMALLINT = int16_t;
	using INT = int32_t;
	using BIGINT = int64_t;
	using LARGEINT = int128_t;

	using VARCHAR = std::string;
	using CHAR = std::string;
	using STRING = std::string;

	using DOUBLE = double;
	using FLOAT = float;

	using IPV4 = uint32_t;
	using IPV6 = uint128_t;

	// cell constructors. could also use from_int_frac if you'd like
	inline auto DECIMALV2 = Decimal128V2::double_to_decimal;
	inline auto DECIMAL32 = [](int32_t x, int32_t y) { return Decimal32::from_int_frac(x, y, 5); };
	inline auto DECIMAL64 = [](int64_t x, int64_t y) { return Decimal64::from_int_frac(x, y, 9); };
	inline auto DECIMAL128V3 = [](int128_t x, int128_t y) {
	return Decimal128V3::from_int_frac(x, y, 20);
	};
	inline auto DECIMAL256 = [](wide::Int256 x, wide::Int256 y) {
	return Decimal256::from_int_frac(x, y, 40);
	};

	inline auto DECIMALFIELD = [](double v) {
	return DecimalField<Decimal128V2>(Decimal128V2::double_to_decimal(v), 9);
	};

	using DATETIME = std::string;

	template <typename T>
	struct DataTypeTraits;

	template <>
	struct DataTypeTraits<DataTypeInt8> {
	using type = Int8;
	};

	template <>
	struct DataTypeTraits<DataTypeInt16> {
	using type = Int16;
	};

	template <>
	struct DataTypeTraits<DataTypeInt32> {
	using type = Int32;
	};

	template <>
	struct DataTypeTraits<DataTypeInt64> {
	using type = Int64;
	};

	template <>
	struct DataTypeTraits<DataTypeInt128> {
	using type = Int128;
	};

	template <>
	struct DataTypeTraits<DataTypeFloat32> {
	using type = Float32;
	};

	template <>
	struct DataTypeTraits<DataTypeFloat64> {
	using type = Float64;
	};

	template <typename To, typename From>
	constexpr decltype(auto) convert_to(From value) {
	using ToType = typename DataTypeTraits<To>::type;
	return ToType(value);
	}

	template <typename T>
	constexpr TypeIndex get_type_index() {
	if constexpr (std::is_same_v<T, DataTypeInt8>) {
	return TypeIndex::Int8;
	} else if constexpr (std::is_same_v<T, DataTypeInt16>) {
	return TypeIndex::Int16;
	} else if constexpr (std::is_same_v<T, DataTypeInt32>) {
	return TypeIndex::Int32;
	} else if constexpr (std::is_same_v<T, DataTypeInt64>) {
	return TypeIndex::Int64;
	} else if constexpr (std::is_same_v<T, DataTypeInt128>) {
	return TypeIndex::Int128;
	} else if constexpr (std::is_same_v<T, DataTypeFloat32>) {
	return TypeIndex::Float32;
	} else if constexpr (std::is_same_v<T, DataTypeFloat64>) {
	return TypeIndex::Float64;
	}
	}

	struct UTDataTypeDesc {
	DataTypePtr data_type;
	doris::TypeDescriptor type_desc;
	std::string col_name;
	bool is_const = false;
	bool is_nullable = true;
	};
	using UTDataTypeDescs = std::vector<UTDataTypeDesc>;

	} // namespace ut_type

	size_t type_index_to_data_type(const std::vector<AnyType>& input_types, size_t index,
	ut_type::UTDataTypeDesc& ut_desc, DataTypePtr& type);
	bool parse_ut_data_type(const std::vector<AnyType>& input_types, ut_type::UTDataTypeDescs& descs);

	bool insert_cell(MutableColumnPtr& column, DataTypePtr type_ptr, const AnyType& cell);

	Block* create_block_from_inputset(const InputTypeSet& input_types, const InputDataSet& input_set);

	Block* process_table_function(TableFunction* fn, Block* input_block,
	const InputTypeSet& output_types);
	void check_vec_table_function(TableFunction* fn, const InputTypeSet& input_types,
	const InputDataSet& input_set, const InputTypeSet& output_types,
	const InputDataSet& output_set);

	// Null values are represented by Null()
	// The type of the constant column is represented as follows: Consted {TypeIndex::String}
	// A DataSet with a constant column can only have one row of data
	template <typename ReturnType, bool nullable = false>
	Status check_function(const std::string& func_name, const InputTypeSet& input_types,
	const DataSet& data_set, bool expect_fail = false) {
	// 1.0 create data type
	ut_type::UTDataTypeDescs descs;
	EXPECT_TRUE(parse_ut_data_type(input_types, descs));

	// 1.1 insert data and create block
	auto row_size = data_set.size();
	Block block;
	for (size_t i = 0; i < descs.size(); ++i) {
	auto& desc = descs[i];
	auto column = desc.data_type->create_column();
	column->reserve(row_size);

	auto type_ptr = desc.data_type->is_nullable()
	? ((DataTypeNullable*)(desc.data_type.get()))->get_nested_type()
	: desc.data_type;
	for (int j = 0; j < row_size; j++) {
	EXPECT_TRUE(insert_cell(column, type_ptr, data_set[j].first[i]));
	}

	if (desc.is_const) {
	column = ColumnConst::create(std::move(column), row_size);
	}
	block.insert({std::move(column), desc.data_type, desc.col_name});
	}

	// 1.2 prepare args for function call
	ColumnNumbers arguments;
	std::vector<doris::TypeDescriptor> arg_types;
	std::vector<std::shared_ptr<ColumnPtrWrapper>> constant_col_ptrs;
	std::vector<std::shared_ptr<ColumnPtrWrapper>> constant_cols;
	for (size_t i = 0; i < descs.size(); ++i) {
	auto& desc = descs[i];
	arguments.push_back(static_cast<unsigned int>(i));
	arg_types.push_back(desc.type_desc);
	if (desc.is_const) {
	constant_col_ptrs.push_back(
	std::make_shared<ColumnPtrWrapper>(block.get_by_position(i).column));
	constant_cols.push_back(constant_col_ptrs.back());
	} else {
	constant_cols.push_back(nullptr);
	}
	}

	// 2. execute function
	auto return_type = nullable ? make_nullable(std::make_shared<ReturnType>())
	: std::make_shared<ReturnType>();
	auto func = SimpleFunctionFactory::instance().get_function(
	func_name, block.get_columns_with_type_and_name(), return_type);
	EXPECT_TRUE(func != nullptr);

	doris::TypeDescriptor fn_ctx_return;
	if constexpr (std::is_same_v<ReturnType, DataTypeUInt8>) {
	fn_ctx_return.type = doris::PrimitiveType::TYPE_BOOLEAN;
	} else if constexpr (std::is_same_v<ReturnType, DataTypeInt32>) {
	fn_ctx_return.type = doris::PrimitiveType::TYPE_INT;
	} else if constexpr (std::is_same_v<ReturnType, DataTypeFloat64> \|\|
	std::is_same_v<ReturnType, DataTypeTimeV2>) {
	fn_ctx_return.type = doris::PrimitiveType::TYPE_DOUBLE;
	} else if constexpr (std::is_same_v<ReturnType, DateTime>) {
	fn_ctx_return.type = doris::PrimitiveType::TYPE_DATETIME;
	} else if (std::is_same_v<ReturnType, DateV2>) {
	fn_ctx_return.type = doris::PrimitiveType::TYPE_DATEV2;
	} else if (std::is_same_v<ReturnType, DateTimeV2>) {
	fn_ctx_return.type = doris::PrimitiveType::TYPE_DATETIMEV2;
	} else if (std::is_same_v<ReturnType, Decimal128V2>) {
	fn_ctx_return.type = doris::PrimitiveType::TYPE_DECIMALV2;
	} else if (std::is_same_v<ReturnType, Decimal32>) {
	fn_ctx_return.type = doris::PrimitiveType::TYPE_DECIMAL32;
	} else if (std::is_same_v<ReturnType, Decimal64>) {
	fn_ctx_return.type = doris::PrimitiveType::TYPE_DECIMAL64;
	} else if (std::is_same_v<ReturnType, Decimal128V3>) {
	fn_ctx_return.type = doris::PrimitiveType::TYPE_DECIMAL128I;
	} else if (std::is_same_v<ReturnType, Decimal256>) {
	fn_ctx_return.type = doris::PrimitiveType::TYPE_DECIMAL256;
	} else {
	fn_ctx_return.type = doris::PrimitiveType::INVALID_TYPE;
	}

	FunctionUtils fn_utils(fn_ctx_return, arg_types, 0);
	auto* fn_ctx = fn_utils.get_fn_ctx();
	fn_ctx->set_constant_cols(constant_cols);
	static_cast<void>(func->open(fn_ctx, FunctionContext::FRAGMENT_LOCAL));
	static_cast<void>(func->open(fn_ctx, FunctionContext::THREAD_LOCAL));

	block.insert({nullptr, return_type, "result"});

	auto result = block.columns() - 1;
	auto st = func->execute(fn_ctx, block, arguments, result, row_size);
	if (expect_fail) {
	EXPECT_NE(Status::OK(), st);
	return st;
	} else {
	EXPECT_EQ(Status::OK(), st);
	}

	static_cast<void>(func->close(fn_ctx, FunctionContext::THREAD_LOCAL));
	static_cast<void>(func->close(fn_ctx, FunctionContext::FRAGMENT_LOCAL));

	// 3. check the result of function
	ColumnPtr column = block.get_columns()[result];
	EXPECT_TRUE(column);
	if (const auto* column_str = check_and_get_column<ColumnString>(column.get())) {
	column_str->sanity_check();
	}

	for (int i = 0; i < row_size; ++i) {
	// update current line
	if (row_size > 1) {
	TestCaseInfo::cur_cast_line = i;
	}
	auto check_column_data = [&]() {
	if constexpr (std::is_same_v<ReturnType, DataTypeJsonb>) {
	const auto& expect_data = any_cast<String>(data_set[i].second);
	auto s = column->get_data_at(i);
	if (expect_data.empty()) {
	// zero size result means invalid
	EXPECT_EQ(0, s.size) << " invalid result size should be 0 at row " << i;
	} else {
	// convert jsonb binary value to json string to compare with expected json text
	EXPECT_EQ(expect_data, JsonbToJson::jsonb_to_json_string(s.data, s.size))
	<< " at row " << i;
	}
	} else {
	Field field;
	column->get(i, field);

	const auto& expect_data =
	any_cast<typename ReturnType::FieldType>(data_set[i].second);

	if constexpr (std::is_same_v<ReturnType, DataTypeDecimal<Decimal128V2>>) {
	const auto& column_data = field.get<DecimalField<Decimal128V2>>().get_value();
	EXPECT_EQ(expect_data.value, column_data.value) << " at row " << i;
	} else if constexpr (std::is_same_v<ReturnType, DataTypeBitMap>) {
	const ColumnBitmap* bitmap_col = nullptr;
	if constexpr (nullable) {
	const auto* nullable_column =
	assert_cast<const ColumnNullable*>(column.get());
	bitmap_col = assert_cast<const ColumnBitmap*>(
	nullable_column->get_nested_column_ptr().get());
	} else {
	bitmap_col = assert_cast<const ColumnBitmap*>(column.get());
	}
	EXPECT_EQ(expect_data.to_string(), bitmap_col->get_element(i).to_string())
	<< " at row " << i;
	} else if constexpr (std::is_same_v<ReturnType, DataTypeHLL>) {
	const ColumnHLL* hll_col = nullptr;
	if constexpr (nullable) {
	const auto* nullable_column =
	assert_cast<const ColumnNullable*>(column.get());
	hll_col = assert_cast<const ColumnHLL*>(
	nullable_column->get_nested_column_ptr().get());
	} else {
	hll_col = assert_cast<const ColumnHLL*>(column.get());
	}
	EXPECT_EQ(expect_data.to_string(), hll_col->get_element(i).to_string())
	<< " at row " << i;
	} else if constexpr (std::is_same_v<ReturnType, DataTypeFloat32> \|\|
	std::is_same_v<ReturnType, DataTypeFloat64> \|\|
	std::is_same_v<ReturnType, DataTypeTimeV2>) {
	const auto& column_data = field.get<DataTypeFloat64::FieldType>();
	EXPECT_DOUBLE_EQ(expect_data, column_data) << " at row " << i;
	} else {
	const auto& column_data = field.get<typename ReturnType::FieldType>();
	EXPECT_EQ(expect_data, column_data) << " at row " << i;
	}
	}
	};

	if constexpr (nullable) {
	bool is_null = data_set[i].second.type() == &typeid(Null);
	EXPECT_EQ(is_null, column->is_null_at(i)) << " at row " << i;
	if (!is_null) {
	check_column_data();
	}
	} else {
	check_column_data();
	}
	}

	return Status::OK();
	}

	using BaseInputTypeSet = std::vector<TypeIndex>;

	// Each parameter may be decorated with 'const', but each invocation of 'check_function' can only handle one state of the parameters.
	// If there are 'n' parameters, it would require manually calling 'check_function' 2^n times, whereas through this function, only one
	// invocation is needed.
	template <typename ReturnType, bool nullable = false>
	void check_function_all_arg_comb(const std::string& func_name, const BaseInputTypeSet& base_set,
	const DataSet& data_set) {
	size_t arg_cnt = base_set.size();
	TestCaseInfo::arg_size = static_cast<int>(arg_cnt);
	// Consider each parameter as a bit, if the j-th bit is 1, the j-th parameter is const; otherwise, it is not.
	for (int i = 0; i < (1 << arg_cnt); i++) {
	InputTypeSet input_types {};
	for (int j = 0; j < arg_cnt; j++) {
	if ((1 << j) & i) {
	input_types.emplace_back(Consted {static_cast<TypeIndex>(base_set[j])});
	} else {
	input_types.emplace_back(static_cast<TypeIndex>(base_set[j]));
	}
	}

	TestCaseInfo::arg_const_info = i, TestCaseInfo::cur_cast_line = -1;
	// exists parameter are const
	if (i != 0) {
	for (const auto& line : data_set) {
	DataSet tmp_set {line};
	static_cast<void>(check_function<ReturnType, nullable>(func_name, input_types,
	tmp_set, false));
	}
	} else {
	static_cast<void>(
	check_function<ReturnType, nullable>(func_name, input_types, data_set));
	}
	}
	}
	} // namespace doris::vectorized