blob: e37b618525cb26b0b87470644483c58981caea8a [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <cstdint>
#include <cstring>
// IWYU pragma: no_include <bits/std_abs.h>
#include <dlfcn.h>
#include <cmath>
#include <string>
#include <type_traits>
#include "common/status.h"
#include "util/debug/leak_annotations.h"
#include "vec/columns/column.h"
#include "vec/columns/column_string.h"
#include "vec/columns/column_vector.h"
#include "vec/core/types.h"
#include "vec/data_types/data_type_string.h"
#include "vec/data_types/number_traits.h"
#include "vec/functions/function_binary_arithmetic.h"
#include "vec/functions/function_const.h"
#include "vec/functions/function_math_log.h"
#include "vec/functions/function_math_unary.h"
#include "vec/functions/function_math_unary_alway_nullable.h"
#include "vec/functions/function_totype.h"
#include "vec/functions/function_unary_arithmetic.h"
#include "vec/functions/simple_function_factory.h"
#include "vec/utils/stringop_substring.h"
namespace doris::vectorized {
struct LnImpl;
struct Log10Impl;
struct Log2Impl;
struct AcosName {
static constexpr auto name = "acos";
// https://dev.mysql.com/doc/refman/8.4/en/mathematical-functions.html#function_acos
static constexpr bool is_invalid_input(Float64 x) { return x < -1 || x > 1; }
};
using FunctionAcos =
FunctionMathUnaryAlwayNullable<UnaryFunctionPlainAlwayNullable<AcosName, std::acos>>;
struct AcoshName {
static constexpr auto name = "acosh";
static constexpr bool is_invalid_input(Float64 x) { return x < 1; }
};
using FunctionAcosh =
FunctionMathUnaryAlwayNullable<UnaryFunctionPlainAlwayNullable<AcoshName, std::acosh>>;
struct AsinName {
static constexpr auto name = "asin";
// https://dev.mysql.com/doc/refman/8.4/en/mathematical-functions.html#function_asin
static constexpr bool is_invalid_input(Float64 x) { return x < -1 || x > 1; }
};
using FunctionAsin =
FunctionMathUnaryAlwayNullable<UnaryFunctionPlainAlwayNullable<AsinName, std::asin>>;
struct AsinhName {
static constexpr auto name = "asinh";
};
using FunctionAsinh = FunctionMathUnary<UnaryFunctionPlain<AsinhName, std::asinh>>;
struct AtanName {
static constexpr auto name = "atan";
};
using FunctionAtan = FunctionMathUnary<UnaryFunctionPlain<AtanName, std::atan>>;
struct AtanhName {
static constexpr auto name = "atanh";
static constexpr bool is_invalid_input(Float64 x) { return x <= -1 || x >= 1; }
};
using FunctionAtanh =
FunctionMathUnaryAlwayNullable<UnaryFunctionPlainAlwayNullable<AtanhName, std::atanh>>;
template <PrimitiveType AType, PrimitiveType BType>
struct Atan2Impl {
using A = typename PrimitiveTypeTraits<AType>::ColumnItemType;
using B = typename PrimitiveTypeTraits<BType>::ColumnItemType;
static constexpr PrimitiveType ResultType = TYPE_DOUBLE;
static const constexpr bool allow_decimal = false;
template <PrimitiveType type>
static inline double apply(A a, B b) {
return std::atan2((double)a, (double)b);
}
};
struct Atan2Name {
static constexpr auto name = "atan2";
};
using FunctionAtan2 = FunctionBinaryArithmetic<Atan2Impl, Atan2Name, false>;
struct CosName {
static constexpr auto name = "cos";
};
using FunctionCos = FunctionMathUnary<UnaryFunctionPlain<CosName, std::cos>>;
struct CoshName {
static constexpr auto name = "cosh";
};
using FunctionCosh = FunctionMathUnary<UnaryFunctionPlain<CoshName, std::cosh>>;
struct EImpl {
static constexpr auto name = "e";
static constexpr double value = 2.7182818284590452353602874713526624977572470;
};
using FunctionE = FunctionMathConstFloat64<EImpl>;
struct PiImpl {
static constexpr auto name = "pi";
static constexpr double value = 3.1415926535897932384626433832795028841971693;
};
using FunctionPi = FunctionMathConstFloat64<PiImpl>;
struct ExpName {
static constexpr auto name = "exp";
};
using FunctionExp = FunctionMathUnary<UnaryFunctionPlain<ExpName, std::exp>>;
struct LogName {
static constexpr auto name = "log";
};
template <PrimitiveType AType, PrimitiveType BType>
struct LogImpl {
using A = typename PrimitiveTypeTraits<AType>::CppNativeType;
using B = typename PrimitiveTypeTraits<BType>::CppNativeType;
static constexpr PrimitiveType ResultType = TYPE_DOUBLE;
using Traits = NumberTraits::BinaryOperatorTraits<AType, BType>;
static const constexpr bool allow_decimal = false;
static constexpr double EPSILON = 1e-9;
template <PrimitiveType Result = ResultType>
static void apply(const typename Traits::ArrayA& a, B b,
typename PrimitiveTypeTraits<Result>::ColumnType::Container& c,
typename Traits::ArrayNull& null_map) {
size_t size = c.size();
UInt8 is_null = b <= 0;
memset(null_map.data(), is_null, size);
if (!is_null) {
for (size_t i = 0; i < size; i++) {
if (a[i] <= 0 || std::fabs(a[i] - 1.0) < EPSILON) {
null_map[i] = 1;
} else {
c[i] = static_cast<Float64>(std::log(static_cast<Float64>(b)) /
std::log(static_cast<Float64>(a[i])));
}
}
}
}
template <PrimitiveType Result>
static inline typename PrimitiveTypeTraits<Result>::CppNativeType apply(A a, B b,
UInt8& is_null) {
is_null = a <= 0 || b <= 0 || std::fabs(a - 1.0) < EPSILON;
return static_cast<Float64>(std::log(static_cast<Float64>(b)) /
std::log(static_cast<Float64>(a)));
}
};
using FunctionLog = FunctionBinaryArithmetic<LogImpl, LogName, true>;
template <typename A>
struct SignImpl {
static constexpr PrimitiveType ResultType = TYPE_TINYINT;
static inline UInt8 apply(A a) {
if constexpr (IsDecimalNumber<A> || std::is_floating_point_v<A>)
return static_cast<UInt8>(a < A(0) ? -1 : a == A(0) ? 0 : 1);
else if constexpr (std::is_signed_v<A>)
return static_cast<UInt8>(a < 0 ? -1 : a == 0 ? 0 : 1);
else if constexpr (std::is_unsigned_v<A>)
return static_cast<UInt8>(a == 0 ? 0 : 1);
}
};
struct NameSign {
static constexpr auto name = "sign";
};
using FunctionSign = FunctionUnaryArithmetic<SignImpl, NameSign>;
template <typename A>
struct AbsImpl {
static constexpr PrimitiveType ResultType = NumberTraits::ResultOfAbs<A>::Type;
static inline typename PrimitiveTypeTraits<ResultType>::ColumnItemType apply(A a) {
if constexpr (IsDecimalNumber<A>)
return a < A(0) ? A(-a) : a;
else if constexpr (std::is_integral_v<A> && std::is_signed_v<A>)
return a < A(0) ? static_cast<typename PrimitiveTypeTraits<ResultType>::ColumnItemType>(
~a) +
1
: a;
else if constexpr (std::is_integral_v<A> && std::is_unsigned_v<A>)
return static_cast<typename PrimitiveTypeTraits<ResultType>::ColumnItemType>(a);
else if constexpr (std::is_floating_point_v<A>)
return static_cast<typename PrimitiveTypeTraits<ResultType>::ColumnItemType>(
std::abs(a));
}
};
struct NameAbs {
static constexpr auto name = "abs";
};
template <typename A>
struct ResultOfUnaryFunc;
template <>
struct ResultOfUnaryFunc<UInt8> {
static constexpr PrimitiveType ResultType = TYPE_BOOLEAN;
};
template <>
struct ResultOfUnaryFunc<Int8> {
static constexpr PrimitiveType ResultType = TYPE_TINYINT;
};
template <>
struct ResultOfUnaryFunc<Int16> {
static constexpr PrimitiveType ResultType = TYPE_SMALLINT;
};
template <>
struct ResultOfUnaryFunc<Int32> {
static constexpr PrimitiveType ResultType = TYPE_INT;
};
template <>
struct ResultOfUnaryFunc<Int64> {
static constexpr PrimitiveType ResultType = TYPE_BIGINT;
};
template <>
struct ResultOfUnaryFunc<Int128> {
static constexpr PrimitiveType ResultType = TYPE_LARGEINT;
};
template <>
struct ResultOfUnaryFunc<Decimal32> {
static constexpr PrimitiveType ResultType = TYPE_DECIMAL32;
};
template <>
struct ResultOfUnaryFunc<Decimal64> {
static constexpr PrimitiveType ResultType = TYPE_DECIMAL64;
};
template <>
struct ResultOfUnaryFunc<Decimal128V3> {
static constexpr PrimitiveType ResultType = TYPE_DECIMAL128I;
};
template <>
struct ResultOfUnaryFunc<Decimal128V2> {
static constexpr PrimitiveType ResultType = TYPE_DECIMALV2;
};
template <>
struct ResultOfUnaryFunc<Decimal256> {
static constexpr PrimitiveType ResultType = TYPE_DECIMAL256;
};
template <>
struct ResultOfUnaryFunc<float> {
static constexpr PrimitiveType ResultType = TYPE_FLOAT;
};
template <>
struct ResultOfUnaryFunc<double> {
static constexpr PrimitiveType ResultType = TYPE_DOUBLE;
};
using FunctionAbs = FunctionUnaryArithmetic<AbsImpl, NameAbs>;
template <typename A>
struct NegativeImpl {
static constexpr PrimitiveType ResultType = ResultOfUnaryFunc<A>::ResultType;
static inline typename PrimitiveTypeTraits<ResultType>::ColumnItemType apply(A a) { return -a; }
};
struct NameNegative {
static constexpr auto name = "negative";
};
using FunctionNegative = FunctionUnaryArithmetic<NegativeImpl, NameNegative>;
template <typename A>
struct PositiveImpl {
static constexpr PrimitiveType ResultType = ResultOfUnaryFunc<A>::ResultType;
static inline typename PrimitiveTypeTraits<ResultType>::ColumnItemType apply(A a) {
return static_cast<typename PrimitiveTypeTraits<ResultType>::ColumnItemType>(a);
}
};
struct NamePositive {
static constexpr auto name = "positive";
};
using FunctionPositive = FunctionUnaryArithmetic<PositiveImpl, NamePositive>;
struct UnaryFunctionPlainSin {
using Type = DataTypeFloat64;
static constexpr auto name = "sin";
using FuncType = double (*)(double);
static FuncType get_sin_func() {
#ifndef BE_TEST
void* handle = dlopen("libm.so.6", RTLD_LAZY);
if (handle) {
if (auto sin_func = (double (*)(double))dlsym(handle, "sin"); sin_func) {
return sin_func;
}
dlclose(handle);
}
#endif
return std::sin;
}
static void execute(const double* src, double* dst) {
static auto sin_func = get_sin_func();
*dst = sin_func(*src);
}
};
using FunctionSin = FunctionMathUnary<UnaryFunctionPlainSin>;
struct SinhName {
static constexpr auto name = "sinh";
};
using FunctionSinh = FunctionMathUnary<UnaryFunctionPlain<SinhName, std::sinh>>;
struct SqrtName {
static constexpr auto name = "sqrt";
// https://dev.mysql.com/doc/refman/8.4/en/mathematical-functions.html#function_sqrt
static constexpr bool is_invalid_input(Float64 x) { return x < 0; }
};
using FunctionSqrt =
FunctionMathUnaryAlwayNullable<UnaryFunctionPlainAlwayNullable<SqrtName, std::sqrt>>;
struct CbrtName {
static constexpr auto name = "cbrt";
};
using FunctionCbrt = FunctionMathUnary<UnaryFunctionPlain<CbrtName, std::cbrt>>;
struct TanName {
static constexpr auto name = "tan";
};
using FunctionTan = FunctionMathUnary<UnaryFunctionPlain<TanName, std::tan>>;
struct TanhName {
static constexpr auto name = "tanh";
};
using FunctionTanh = FunctionMathUnary<UnaryFunctionPlain<TanhName, std::tanh>>;
template <typename A>
struct RadiansImpl {
static constexpr PrimitiveType ResultType = ResultOfUnaryFunc<A>::ResultType;
static inline typename PrimitiveTypeTraits<ResultType>::ColumnItemType apply(A a) {
return static_cast<typename PrimitiveTypeTraits<ResultType>::ColumnItemType>(
a * PiImpl::value / 180.0);
}
};
struct NameRadians {
static constexpr auto name = "radians";
};
using FunctionRadians = FunctionUnaryArithmetic<RadiansImpl, NameRadians>;
template <typename A>
struct DegreesImpl {
static constexpr PrimitiveType ResultType = ResultOfUnaryFunc<A>::ResultType;
static inline typename PrimitiveTypeTraits<ResultType>::ColumnItemType apply(A a) {
return static_cast<typename PrimitiveTypeTraits<ResultType>::ColumnItemType>(a * 180.0 /
PiImpl::value);
}
};
struct NameDegrees {
static constexpr auto name = "degrees";
};
using FunctionDegrees = FunctionUnaryArithmetic<DegreesImpl, NameDegrees>;
struct NameBin {
static constexpr auto name = "bin";
};
struct BinImpl {
using ReturnType = DataTypeString;
static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_BIGINT;
using Type = Int64;
using ReturnColumnType = ColumnString;
static std::string bin_impl(Int64 value) {
uint64_t n = static_cast<uint64_t>(value);
const size_t max_bits = sizeof(uint64_t) * 8;
char result[max_bits];
uint32_t index = max_bits;
do {
result[--index] = '0' + (n & 1);
} while (n >>= 1);
return std::string(result + index, max_bits - index);
}
static Status vector(const ColumnInt64::Container& data, ColumnString::Chars& res_data,
ColumnString::Offsets& res_offsets) {
res_offsets.resize(data.size());
size_t input_size = res_offsets.size();
for (size_t i = 0; i < input_size; ++i) {
StringOP::push_value_string(bin_impl(data[i]), i, res_data, res_offsets);
}
return Status::OK();
}
};
using FunctionBin = FunctionUnaryToType<BinImpl, NameBin>;
template <PrimitiveType AType, PrimitiveType BType>
struct PowImpl {
using A = typename PrimitiveTypeTraits<AType>::ColumnItemType;
using B = typename PrimitiveTypeTraits<BType>::ColumnItemType;
static constexpr PrimitiveType ResultType = TYPE_DOUBLE;
static const constexpr bool allow_decimal = false;
template <PrimitiveType type>
static inline double apply(A a, B b) {
/// Next everywhere, static_cast - so that there is no wrong result in expressions of the form Int64 c = UInt32(a) * Int32(-1).
return std::pow((double)a, (double)b);
}
};
struct PowName {
static constexpr auto name = "pow";
};
using FunctionPow = FunctionBinaryArithmetic<PowImpl, PowName, false>;
class FunctionNormalCdf : public IFunction {
public:
static constexpr auto name = "normal_cdf";
String get_name() const override { return name; }
static FunctionPtr create() { return std::make_shared<FunctionNormalCdf>(); }
DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
return make_nullable(std::make_shared<DataTypeFloat64>());
}
DataTypes get_variadic_argument_types_impl() const override {
return {std::make_shared<DataTypeFloat64>(), std::make_shared<DataTypeFloat64>(),
std::make_shared<DataTypeFloat64>()};
}
size_t get_number_of_arguments() const override { return 3; }
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
uint32_t result, size_t input_rows_count) const override {
auto result_column = ColumnFloat64::create(input_rows_count);
auto result_null_map_column = ColumnUInt8::create(input_rows_count, 0);
auto& result_data = result_column->get_data();
NullMap& result_null_map =
assert_cast<ColumnUInt8*>(result_null_map_column.get())->get_data();
ColumnPtr argument_columns[3];
bool col_const[3];
size_t argument_size = arguments.size();
for (int i = 0; i < argument_size; ++i) {
argument_columns[i] = block.get_by_position(arguments[i]).column;
col_const[i] = is_column_const(*argument_columns[i]);
if (col_const[i]) {
argument_columns[i] =
static_cast<const ColumnConst&>(*argument_columns[i]).get_data_column_ptr();
}
}
auto* mean_col = assert_cast<const ColumnFloat64*>(argument_columns[0].get());
auto* sd_col = assert_cast<const ColumnFloat64*>(argument_columns[1].get());
auto* value_col = assert_cast<const ColumnFloat64*>(argument_columns[2].get());
result_column->reserve(input_rows_count);
for (size_t i = 0; i < input_rows_count; ++i) {
double mean = mean_col->get_element(index_check_const(i, col_const[0]));
double sd = sd_col->get_element(index_check_const(i, col_const[1]));
double v = value_col->get_element(index_check_const(i, col_const[2]));
if (!check_argument(sd)) [[unlikely]] {
result_null_map[i] = true;
continue;
}
result_data[i] = calculate_cell(mean, sd, v);
}
block.get_by_position(result).column =
ColumnNullable::create(std::move(result_column), std::move(result_null_map_column));
return Status::OK();
}
static bool check_argument(double sd) { return sd > 0; }
static double calculate_cell(double mean, double sd, double v) {
#ifdef __APPLE__
const double sqrt2 = std::sqrt(2);
#else
constexpr double sqrt2 = std::numbers::sqrt2;
#endif
return 0.5 * (std::erf((v - mean) / (sd * sqrt2)) + 1);
}
};
// TODO: Now math may cause one thread compile time too long, because the function in math
// so mush. Split it to speed up compile time in the future
void register_function_math(SimpleFunctionFactory& factory) {
factory.register_function<FunctionAcos>();
factory.register_function<FunctionAcosh>();
factory.register_function<FunctionAsin>();
factory.register_function<FunctionAsinh>();
factory.register_function<FunctionAtan>();
factory.register_function<FunctionAtanh>();
factory.register_function<FunctionAtan2>();
factory.register_function<FunctionCos>();
factory.register_function<FunctionCosh>();
factory.register_function<FunctionE>();
factory.register_alias("ln", "dlog1");
factory.register_function<FunctionLog>();
factory.register_function<FunctionMathLog<ImplLn>>();
factory.register_function<FunctionMathLog<ImplLog2>>();
factory.register_function<FunctionMathLog<ImplLog10>>();
factory.register_alias("log10", "dlog10");
factory.register_function<FunctionPi>();
factory.register_function<FunctionSign>();
factory.register_function<FunctionAbs>();
factory.register_function<FunctionNegative>();
factory.register_function<FunctionPositive>();
factory.register_function<FunctionSin>();
factory.register_function<FunctionSinh>();
factory.register_function<FunctionSqrt>();
factory.register_alias("sqrt", "dsqrt");
factory.register_function<FunctionCbrt>();
factory.register_function<FunctionTan>();
factory.register_function<FunctionTanh>();
factory.register_function<FunctionPow>();
factory.register_alias("pow", "power");
factory.register_alias("pow", "dpow");
factory.register_alias("pow", "fpow");
factory.register_function<FunctionExp>();
factory.register_alias("exp", "dexp");
factory.register_function<FunctionRadians>();
factory.register_function<FunctionDegrees>();
factory.register_function<FunctionBin>();
factory.register_function<FunctionNormalCdf>();
}
} // namespace doris::vectorized