cpp-ch/local-engine/Functions/SparkFunctionRoundHalfUp.h - incubator-gluten - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *    http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 #pragma once

 #include <Functions/FunctionsRound.h>

 namespace DB::ErrorCodes
 {
 extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
 }

 namespace local_engine
 {

 template <typename T, DB::Vectorize vectorize>
 class BaseFloatRoundingHalfUpComputation;

 #ifdef __SSE4_1__

 /// vectorized implementation for x86

 template <>
 class BaseFloatRoundingHalfUpComputation<Float32, DB::Vectorize::Yes>
 {
 public:
     using ScalarType = Float32;
     using VectorType = __m128;
     static const size_t data_count = 4;

     static VectorType load(const ScalarType * in) { return _mm_loadu_ps(in); }
     static VectorType load1(const ScalarType in) { return _mm_load1_ps(&in); }
     static void store(ScalarType * out, VectorType val) { _mm_storeu_ps(out, val); }
     static VectorType multiply(VectorType val, VectorType scale) { return _mm_mul_ps(val, scale); }
     static VectorType divide(VectorType val, VectorType scale) { return _mm_div_ps(val, scale); }
     template <DB::RoundingMode mode>
     static VectorType apply(VectorType val)
     {
         ScalarType tempFloatsIn[data_count];
         ScalarType tempFloatsOut[data_count];
         store(tempFloatsIn, val);
         for (size_t i = 0; i < data_count; ++i)
             tempFloatsOut[i] = std::roundf(tempFloatsIn[i]);

         return load(tempFloatsOut);
     }

     static VectorType prepare(size_t scale) { return load1(scale); }
 };

 template <>
 class BaseFloatRoundingHalfUpComputation<Float64, DB::Vectorize::Yes>
 {
 public:
     using ScalarType = Float64;
     using VectorType = __m128d;
     static const size_t data_count = 2;

     static VectorType load(const ScalarType * in) { return _mm_loadu_pd(in); }
     static VectorType load1(const ScalarType in) { return _mm_load1_pd(&in); }
     static void store(ScalarType * out, VectorType val) { _mm_storeu_pd(out, val); }
     static VectorType multiply(VectorType val, VectorType scale) { return _mm_mul_pd(val, scale); }
     static VectorType divide(VectorType val, VectorType scale) { return _mm_div_pd(val, scale); }
     template <DB::RoundingMode mode>
     static VectorType apply(VectorType val)
     {
         ScalarType tempFloatsIn[data_count];
         ScalarType tempFloatsOut[data_count];
         store(tempFloatsIn, val);
         for (size_t i = 0; i < data_count; ++i)
             tempFloatsOut[i] = std::round(tempFloatsIn[i]);

         return load(tempFloatsOut);
     }

     static VectorType prepare(size_t scale) { return load1(scale); }
 };

 /// end __SSE4_1__
 #endif

 /// Sequential implementation for ARM. Also used for scalar arguments

 template <typename T>
 class BaseFloatRoundingHalfUpComputation<T, DB::Vectorize::No>
 {
 public:
     using ScalarType = T;
     using VectorType = T;
     static const size_t data_count = 1;

     static VectorType load(const ScalarType * in) { return *in; }
     static VectorType load1(const ScalarType in) { return in; }
     static VectorType store(ScalarType * out, ScalarType val) { return *out = val;}
     static VectorType multiply(VectorType val, VectorType scale) { return val * scale; }
     static VectorType divide(VectorType val, VectorType scale) { return val / scale; }
     template <DB::RoundingMode mode>
     static VectorType apply(VectorType val)
     {
         if constexpr (std::is_same_v<ScalarType, Float32>)
         {
             return std::roundf(val);
         }
         else
         {
             return std::round(val);
         }
     }

     static VectorType prepare(size_t scale)
     {
         return load1(scale);
     }
 };

 template <>
 class BaseFloatRoundingHalfUpComputation<BFloat16, DB::Vectorize::No>
 {
 public:
     using ScalarType = BFloat16;
     using VectorType = BFloat16;
     static const size_t data_count = 1;

     static VectorType load(const ScalarType * in) { return *in; }
     static VectorType load1(const ScalarType in) { return in; }
     static VectorType store(ScalarType * out, ScalarType val) { return *out = val;}
     static VectorType multiply(VectorType val, VectorType scale) { return val * scale; }
     static VectorType divide(VectorType val, VectorType scale) { return val / scale; }
     template <DB::RoundingMode mode>
     static VectorType apply(VectorType val)
     {
         return BFloat16(std::roundf(static_cast<Float32>(val)));
     }

     static VectorType prepare(size_t scale)
     {
         return load1(BFloat16(static_cast<Float32>(scale)));
     }
 };


 /** Implementation of low-level round-off functions for floating-point values.
   */
 template <typename T, DB::RoundingMode rounding_mode, DB::ScaleMode scale_mode, DB::Vectorize vectorize>
 class FloatRoundingHalfUpComputation : public BaseFloatRoundingHalfUpComputation<T, vectorize>
 {
     using Base = BaseFloatRoundingHalfUpComputation<T, vectorize>;

 public:
     static inline void compute(const T * __restrict in, const typename Base::VectorType & scale, T * __restrict out)
     {
         auto val = Base::load(in);

         if (scale_mode == DB::ScaleMode::Positive)
             val = Base::multiply(val, scale);
         else if (scale_mode == DB::ScaleMode::Negative)
             val = Base::divide(val, scale);

         val = Base::template apply<rounding_mode>(val);

         if (scale_mode == DB::ScaleMode::Positive)
             val = Base::divide(val, scale);
         else if (scale_mode == DB::ScaleMode::Negative)
             val = Base::multiply(val, scale);

         Base::store(out, val);
     }
 };


 /** Implementing high-level rounding functions.
   */
 template <typename T, DB::RoundingMode rounding_mode, DB::ScaleMode scale_mode>
 struct FloatRoundingHalfUpImpl
 {
 private:
     static_assert(!DB::is_decimal<T>);

     template <DB::Vectorize vectorize =
 #ifdef __SSE4_1__
     std::is_same_v<T, BFloat16> ? DB::Vectorize::No : DB::Vectorize::Yes
 #else
     DB::Vectorize::No
 #endif
     >
     using Op = FloatRoundingHalfUpComputation<T, rounding_mode, scale_mode, vectorize>;
     using Data = std::array<T, Op<>::data_count>;
     using ColumnType = DB::ColumnVector<T>;
     using Container = typename ColumnType::Container;

 public:
     static NO_INLINE void apply(const Container & in, size_t scale, Container & out)
     {
         auto mm_scale = Op<>::prepare(scale);

         const size_t data_count = std::tuple_size<Data>();

         const T * end_in = in.data() + in.size();
         const T * limit = in.data() + in.size() / data_count * data_count;

         const T * __restrict p_in = in.data();
         T * __restrict p_out = out.data();

         while (p_in < limit)
         {
             Op<>::compute(p_in, mm_scale, p_out);
             p_in += data_count;
             p_out += data_count;
         }

         if (p_in < end_in)
         {
             Data tmp_src{{}};
             Data tmp_dst;

             size_t tail_size_bytes = (end_in - p_in) * sizeof(*p_in);

             memcpy(&tmp_src, p_in, tail_size_bytes);
             Op<>::compute(reinterpret_cast<T *>(&tmp_src), mm_scale, reinterpret_cast<T *>(&tmp_dst));
             memcpy(p_out, &tmp_dst, tail_size_bytes);
         }
     }
 };


 /** Select the appropriate processing algorithm depending on the scale.
   */
 template <typename T, DB::RoundingMode rounding_mode, DB::TieBreakingMode tie_breaking_mode>
 struct DispatcherRoundingHalfUp
 {
     template <DB::ScaleMode scale_mode>
     using FunctionRoundingImpl = std::conditional_t<
         std::is_floating_point_v<T> || std::is_same_v<T, BFloat16>,
         FloatRoundingHalfUpImpl<T, rounding_mode, scale_mode>,
         DB::IntegerRoundingImpl<T, rounding_mode, scale_mode, tie_breaking_mode>>;

     static DB::ColumnPtr apply(const DB::IColumn * col_general, DB::Scale scale_arg)
     {
         const auto * const col = checkAndGetColumn<DB::ColumnVector<T>>(col_general);
         auto col_res = DB::ColumnVector<T>::create();

         typename DB::ColumnVector<T>::Container & vec_res = col_res->getData();
         vec_res.resize_exact(col->getData().size());

         if (!vec_res.empty())
         {
             if (scale_arg == 0)
             {
                 size_t scale = 1;
                 FunctionRoundingImpl<DB::ScaleMode::Zero>::apply(col->getData(), scale, vec_res);
             }
             else if (scale_arg > 0)
             {
                 size_t scale = intExp10(scale_arg);
                 FunctionRoundingImpl<DB::ScaleMode::Positive>::apply(col->getData(), scale, vec_res);
             }
             else
             {
                 size_t scale = intExp10(-scale_arg);
                 FunctionRoundingImpl<DB::ScaleMode::Negative>::apply(col->getData(), scale, vec_res);
             }
         }

         return col_res;
     }
 };

 template <DB::is_decimal T, DB::RoundingMode rounding_mode, DB::TieBreakingMode tie_breaking_mode>
 struct DispatcherRoundingHalfUp<T, rounding_mode, tie_breaking_mode>
 {
 public:
     static DB::ColumnPtr apply(const DB::IColumn * col_general, DB::Scale scale_arg)
     {
         const auto * const col = checkAndGetColumn<DB::ColumnDecimal<T>>(col_general);
         const typename DB::ColumnDecimal<T>::Container & vec_src = col->getData();

         auto col_res = DB::ColumnDecimal<T>::create(vec_src.size(), col->getScale());
         auto & vec_res = col_res->getData();

         if (!vec_res.empty())
             DB::DecimalRoundingImpl<T, rounding_mode, tie_breaking_mode>::apply(col->getData(), col->getScale(), vec_res, scale_arg);

         return col_res;
     }
 };

 /** A template for functions that round the value of an input parameter of type
   * (U)Int8/16/32/64, Float32/64 or Decimal32/64/128, and accept an additional optional parameter (default is 0).
   */
 template <typename Name, DB::RoundingMode rounding_mode, DB::TieBreakingMode tie_breaking_mode>
 class FunctionRoundingHalfUp : public DB::IFunction
 {
 public:
     static constexpr auto name = "roundHalfUp";
     static DB::FunctionPtr create(DB::ContextPtr) { return std::make_shared<FunctionRoundingHalfUp>(); }

     String getName() const override { return name; }

     bool isVariadic() const override { return true; }
     size_t getNumberOfArguments() const override { return 0; }
     bool isSuitableForShortCircuitArgumentsExecution(const DB::DataTypesWithConstInfo & /*arguments*/) const override { return false; }

     /// Get result types by argument types. If the function does not apply to these arguments, throw an exception.
     DB::DataTypePtr getReturnTypeImpl(const DB::DataTypes & arguments) const override
     {
         if ((arguments.empty()) || (arguments.size() > 2))
             throw DB::Exception(
                 DB::ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
                 "Number of arguments for function {} doesn't match: passed {}, should be 1 or 2.",
                 getName(),
                 arguments.size());

         for (const auto & type : arguments)
             if (!isNumber(type))
                 throw DB::Exception(
                     DB::ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", arguments[0]->getName(), getName());

         return arguments[0];
     }

     static DB::Scale getScaleArg(const DB::ColumnsWithTypeAndName & arguments)
     {
         if (arguments.size() == 2)
         {
             const DB::IColumn & scale_column = *arguments[1].column;
             if (!isColumnConst(scale_column))
                 throw DB::Exception(DB::ErrorCodes::ILLEGAL_COLUMN, "DB::Scale argument for rounding functions must be constant");

             DB::Field scale_field = assert_cast<const DB::ColumnConst &>(scale_column).getField();
             if (scale_field.getType() != DB::Field::Types::UInt64 && scale_field.getType() != DB::Field::Types::Int64)
                 throw DB::Exception(DB::ErrorCodes::ILLEGAL_COLUMN, "DB::Scale argument for rounding functions must have integer type");

             Int64 scale64 = scale_field.safeGet<Int64>();
             if (scale64 > std::numeric_limits<DB::Scale>::max() || scale64 < std::numeric_limits<DB::Scale>::min())
                 throw DB::Exception(DB::ErrorCodes::ARGUMENT_OUT_OF_BOUND, "DB::Scale argument for rounding function is too large");

             return scale64;
         }
         return 0;
     }

     bool useDefaultImplementationForConstants() const override { return true; }
     DB::ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }

     DB::ColumnPtr executeImpl(const DB::ColumnsWithTypeAndName & arguments, const DB::DataTypePtr &, size_t /*input_rows_count*/) const override
     {
         const DB::ColumnWithTypeAndName & column = arguments[0];
         DB::Scale scale_arg = getScaleArg(arguments);

         DB::ColumnPtr res;
         auto call = [&](const auto & types) -> bool
         {
             using Types = std::decay_t<decltype(types)>;
             using DataType = typename Types::LeftType;

             if constexpr (DB::IsDataTypeNumber<DataType> || DB::IsDataTypeDecimal<DataType>)
             {
                 using FieldType = typename DataType::FieldType;
                 res = DispatcherRoundingHalfUp<FieldType, rounding_mode, tie_breaking_mode>::apply(column.column.get(), scale_arg);
                 return true;
             }
             return false;
         };

         if (!callOnIndexAndDataType<void>(column.type->getTypeId(), call))
             throw DB::Exception(DB::ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", column.name, getName());

         return res;
     }

     bool hasInformationAboutMonotonicity() const override { return true; }

     Monotonicity getMonotonicityForRange(const DB::IDataType &, const DB::Field &, const DB::Field &) const override
     {
         return {.is_monotonic = true, .is_always_monotonic = true};
     }
 };


 struct NameRoundHalfUp
 {
     static constexpr auto name = "roundHalfUp";
 };

 using FunctionRoundHalfUp = FunctionRoundingHalfUp<NameRoundHalfUp, DB::RoundingMode::Round, DB::TieBreakingMode::Auto>;

 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	#pragma once

	#include <Functions/FunctionsRound.h>

	namespace DB::ErrorCodes
	{
	extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
	}

	namespace local_engine
	{

	template <typename T, DB::Vectorize vectorize>
	class BaseFloatRoundingHalfUpComputation;

	#ifdef __SSE4_1__

	/// vectorized implementation for x86

	template <>
	class BaseFloatRoundingHalfUpComputation<Float32, DB::Vectorize::Yes>
	{
	public:
	using ScalarType = Float32;
	using VectorType = __m128;
	static const size_t data_count = 4;

	static VectorType load(const ScalarType * in) { return _mm_loadu_ps(in); }
	static VectorType load1(const ScalarType in) { return _mm_load1_ps(&in); }
	static void store(ScalarType * out, VectorType val) { _mm_storeu_ps(out, val); }
	static VectorType multiply(VectorType val, VectorType scale) { return _mm_mul_ps(val, scale); }
	static VectorType divide(VectorType val, VectorType scale) { return _mm_div_ps(val, scale); }
	template <DB::RoundingMode mode>
	static VectorType apply(VectorType val)
	{
	ScalarType tempFloatsIn[data_count];
	ScalarType tempFloatsOut[data_count];
	store(tempFloatsIn, val);
	for (size_t i = 0; i < data_count; ++i)
	tempFloatsOut[i] = std::roundf(tempFloatsIn[i]);

	return load(tempFloatsOut);
	}

	static VectorType prepare(size_t scale) { return load1(scale); }
	};

	template <>
	class BaseFloatRoundingHalfUpComputation<Float64, DB::Vectorize::Yes>
	{
	public:
	using ScalarType = Float64;
	using VectorType = __m128d;
	static const size_t data_count = 2;

	static VectorType load(const ScalarType * in) { return _mm_loadu_pd(in); }
	static VectorType load1(const ScalarType in) { return _mm_load1_pd(&in); }
	static void store(ScalarType * out, VectorType val) { _mm_storeu_pd(out, val); }
	static VectorType multiply(VectorType val, VectorType scale) { return _mm_mul_pd(val, scale); }
	static VectorType divide(VectorType val, VectorType scale) { return _mm_div_pd(val, scale); }
	template <DB::RoundingMode mode>
	static VectorType apply(VectorType val)
	{
	ScalarType tempFloatsIn[data_count];
	ScalarType tempFloatsOut[data_count];
	store(tempFloatsIn, val);
	for (size_t i = 0; i < data_count; ++i)
	tempFloatsOut[i] = std::round(tempFloatsIn[i]);

	return load(tempFloatsOut);
	}

	static VectorType prepare(size_t scale) { return load1(scale); }
	};

	/// end __SSE4_1__
	#endif

	/// Sequential implementation for ARM. Also used for scalar arguments

	template <typename T>
	class BaseFloatRoundingHalfUpComputation<T, DB::Vectorize::No>
	{
	public:
	using ScalarType = T;
	using VectorType = T;
	static const size_t data_count = 1;

	static VectorType load(const ScalarType * in) { return *in; }
	static VectorType load1(const ScalarType in) { return in; }
	static VectorType store(ScalarType * out, ScalarType val) { return *out = val;}
	static VectorType multiply(VectorType val, VectorType scale) { return val * scale; }
	static VectorType divide(VectorType val, VectorType scale) { return val / scale; }
	template <DB::RoundingMode mode>
	static VectorType apply(VectorType val)
	{
	if constexpr (std::is_same_v<ScalarType, Float32>)
	{
	return std::roundf(val);
	}
	else
	{
	return std::round(val);
	}
	}

	static VectorType prepare(size_t scale)
	{
	return load1(scale);
	}
	};

	template <>
	class BaseFloatRoundingHalfUpComputation<BFloat16, DB::Vectorize::No>
	{
	public:
	using ScalarType = BFloat16;
	using VectorType = BFloat16;
	static const size_t data_count = 1;

	static VectorType load(const ScalarType * in) { return *in; }
	static VectorType load1(const ScalarType in) { return in; }
	static VectorType store(ScalarType * out, ScalarType val) { return *out = val;}
	static VectorType multiply(VectorType val, VectorType scale) { return val * scale; }
	static VectorType divide(VectorType val, VectorType scale) { return val / scale; }
	template <DB::RoundingMode mode>
	static VectorType apply(VectorType val)
	{
	return BFloat16(std::roundf(static_cast<Float32>(val)));
	}

	static VectorType prepare(size_t scale)
	{
	return load1(BFloat16(static_cast<Float32>(scale)));
	}
	};


	/** Implementation of low-level round-off functions for floating-point values.
	*/
	template <typename T, DB::RoundingMode rounding_mode, DB::ScaleMode scale_mode, DB::Vectorize vectorize>
	class FloatRoundingHalfUpComputation : public BaseFloatRoundingHalfUpComputation<T, vectorize>
	{
	using Base = BaseFloatRoundingHalfUpComputation<T, vectorize>;

	public:
	static inline void compute(const T * __restrict in, const typename Base::VectorType & scale, T * __restrict out)
	{
	auto val = Base::load(in);

	if (scale_mode == DB::ScaleMode::Positive)
	val = Base::multiply(val, scale);
	else if (scale_mode == DB::ScaleMode::Negative)
	val = Base::divide(val, scale);

	val = Base::template apply<rounding_mode>(val);

	if (scale_mode == DB::ScaleMode::Positive)
	val = Base::divide(val, scale);
	else if (scale_mode == DB::ScaleMode::Negative)
	val = Base::multiply(val, scale);

	Base::store(out, val);
	}
	};


	/** Implementing high-level rounding functions.
	*/
	template <typename T, DB::RoundingMode rounding_mode, DB::ScaleMode scale_mode>
	struct FloatRoundingHalfUpImpl
	{
	private:
	static_assert(!DB::is_decimal<T>);

	template <DB::Vectorize vectorize =
	#ifdef __SSE4_1__
	std::is_same_v<T, BFloat16> ? DB::Vectorize::No : DB::Vectorize::Yes
	#else
	DB::Vectorize::No
	#endif
	>
	using Op = FloatRoundingHalfUpComputation<T, rounding_mode, scale_mode, vectorize>;
	using Data = std::array<T, Op<>::data_count>;
	using ColumnType = DB::ColumnVector<T>;
	using Container = typename ColumnType::Container;

	public:
	static NO_INLINE void apply(const Container & in, size_t scale, Container & out)
	{
	auto mm_scale = Op<>::prepare(scale);

	const size_t data_count = std::tuple_size<Data>();

	const T * end_in = in.data() + in.size();
	const T * limit = in.data() + in.size() / data_count * data_count;

	const T * __restrict p_in = in.data();
	T * __restrict p_out = out.data();

	while (p_in < limit)
	{
	Op<>::compute(p_in, mm_scale, p_out);
	p_in += data_count;
	p_out += data_count;
	}

	if (p_in < end_in)
	{
	Data tmp_src{{}};
	Data tmp_dst;

	size_t tail_size_bytes = (end_in - p_in) * sizeof(*p_in);

	memcpy(&tmp_src, p_in, tail_size_bytes);
	Op<>::compute(reinterpret_cast<T >(&tmp_src), mm_scale, reinterpret_cast<T >(&tmp_dst));
	memcpy(p_out, &tmp_dst, tail_size_bytes);
	}
	}
	};


	/** Select the appropriate processing algorithm depending on the scale.
	*/
	template <typename T, DB::RoundingMode rounding_mode, DB::TieBreakingMode tie_breaking_mode>
	struct DispatcherRoundingHalfUp
	{
	template <DB::ScaleMode scale_mode>
	using FunctionRoundingImpl = std::conditional_t<
	std::is_floating_point_v<T> \|\| std::is_same_v<T, BFloat16>,
	FloatRoundingHalfUpImpl<T, rounding_mode, scale_mode>,
	DB::IntegerRoundingImpl<T, rounding_mode, scale_mode, tie_breaking_mode>>;

	static DB::ColumnPtr apply(const DB::IColumn * col_general, DB::Scale scale_arg)
	{
	const auto * const col = checkAndGetColumn<DB::ColumnVector<T>>(col_general);
	auto col_res = DB::ColumnVector<T>::create();

	typename DB::ColumnVector<T>::Container & vec_res = col_res->getData();
	vec_res.resize_exact(col->getData().size());

	if (!vec_res.empty())
	{
	if (scale_arg == 0)
	{
	size_t scale = 1;
	FunctionRoundingImpl<DB::ScaleMode::Zero>::apply(col->getData(), scale, vec_res);
	}
	else if (scale_arg > 0)
	{
	size_t scale = intExp10(scale_arg);
	FunctionRoundingImpl<DB::ScaleMode::Positive>::apply(col->getData(), scale, vec_res);
	}
	else
	{
	size_t scale = intExp10(-scale_arg);
	FunctionRoundingImpl<DB::ScaleMode::Negative>::apply(col->getData(), scale, vec_res);
	}
	}

	return col_res;
	}
	};

	template <DB::is_decimal T, DB::RoundingMode rounding_mode, DB::TieBreakingMode tie_breaking_mode>
	struct DispatcherRoundingHalfUp<T, rounding_mode, tie_breaking_mode>
	{
	public:
	static DB::ColumnPtr apply(const DB::IColumn * col_general, DB::Scale scale_arg)
	{
	const auto * const col = checkAndGetColumn<DB::ColumnDecimal<T>>(col_general);
	const typename DB::ColumnDecimal<T>::Container & vec_src = col->getData();

	auto col_res = DB::ColumnDecimal<T>::create(vec_src.size(), col->getScale());
	auto & vec_res = col_res->getData();

	if (!vec_res.empty())
	DB::DecimalRoundingImpl<T, rounding_mode, tie_breaking_mode>::apply(col->getData(), col->getScale(), vec_res, scale_arg);

	return col_res;
	}
	};

	/** A template for functions that round the value of an input parameter of type
	* (U)Int8/16/32/64, Float32/64 or Decimal32/64/128, and accept an additional optional parameter (default is 0).
	*/
	template <typename Name, DB::RoundingMode rounding_mode, DB::TieBreakingMode tie_breaking_mode>
	class FunctionRoundingHalfUp : public DB::IFunction
	{
	public:
	static constexpr auto name = "roundHalfUp";
	static DB::FunctionPtr create(DB::ContextPtr) { return std::make_shared<FunctionRoundingHalfUp>(); }

	String getName() const override { return name; }

	bool isVariadic() const override { return true; }
	size_t getNumberOfArguments() const override { return 0; }
	bool isSuitableForShortCircuitArgumentsExecution(const DB::DataTypesWithConstInfo & /arguments/) const override { return false; }

	/// Get result types by argument types. If the function does not apply to these arguments, throw an exception.
	DB::DataTypePtr getReturnTypeImpl(const DB::DataTypes & arguments) const override
	{
	if ((arguments.empty()) \|\| (arguments.size() > 2))
	throw DB::Exception(
	DB::ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
	"Number of arguments for function {} doesn't match: passed {}, should be 1 or 2.",
	getName(),
	arguments.size());

	for (const auto & type : arguments)
	if (!isNumber(type))
	throw DB::Exception(
	DB::ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", arguments[0]->getName(), getName());

	return arguments[0];
	}

	static DB::Scale getScaleArg(const DB::ColumnsWithTypeAndName & arguments)
	{
	if (arguments.size() == 2)
	{
	const DB::IColumn & scale_column = *arguments[1].column;
	if (!isColumnConst(scale_column))
	throw DB::Exception(DB::ErrorCodes::ILLEGAL_COLUMN, "DB::Scale argument for rounding functions must be constant");

	DB::Field scale_field = assert_cast<const DB::ColumnConst &>(scale_column).getField();
	if (scale_field.getType() != DB::Field::Types::UInt64 && scale_field.getType() != DB::Field::Types::Int64)
	throw DB::Exception(DB::ErrorCodes::ILLEGAL_COLUMN, "DB::Scale argument for rounding functions must have integer type");

	Int64 scale64 = scale_field.safeGet<Int64>();
	if (scale64 > std::numeric_limits<DB::Scale>::max() \|\| scale64 < std::numeric_limits<DB::Scale>::min())
	throw DB::Exception(DB::ErrorCodes::ARGUMENT_OUT_OF_BOUND, "DB::Scale argument for rounding function is too large");

	return scale64;
	}
	return 0;
	}

	bool useDefaultImplementationForConstants() const override { return true; }
	DB::ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }

	DB::ColumnPtr executeImpl(const DB::ColumnsWithTypeAndName & arguments, const DB::DataTypePtr &, size_t /input_rows_count/) const override
	{
	const DB::ColumnWithTypeAndName & column = arguments[0];
	DB::Scale scale_arg = getScaleArg(arguments);

	DB::ColumnPtr res;
	auto call = [&](const auto & types) -> bool
	{
	using Types = std::decay_t<decltype(types)>;
	using DataType = typename Types::LeftType;

	if constexpr (DB::IsDataTypeNumber<DataType> \|\| DB::IsDataTypeDecimal<DataType>)
	{
	using FieldType = typename DataType::FieldType;
	res = DispatcherRoundingHalfUp<FieldType, rounding_mode, tie_breaking_mode>::apply(column.column.get(), scale_arg);
	return true;
	}
	return false;
	};

	if (!callOnIndexAndDataType<void>(column.type->getTypeId(), call))
	throw DB::Exception(DB::ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", column.name, getName());

	return res;
	}

	bool hasInformationAboutMonotonicity() const override { return true; }

	Monotonicity getMonotonicityForRange(const DB::IDataType &, const DB::Field &, const DB::Field &) const override
	{
	return {.is_monotonic = true, .is_always_monotonic = true};
	}
	};


	struct NameRoundHalfUp
	{
	static constexpr auto name = "roundHalfUp";
	};

	using FunctionRoundHalfUp = FunctionRoundingHalfUp<NameRoundHalfUp, DB::RoundingMode::Round, DB::TieBreakingMode::Auto>;

	}