blob: 22407109d0b840a5c10f7235c69bca6c57d7f066 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// This file is copied from
// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exprs/aggregate-functions.h
// and modified by Doris
#pragma once
#include "udf/udf.h"
namespace doris {
class HllSetResolver;
class HybridSetBase;
// Collection of builtin aggregate functions. Aggregate functions implement
// the various phases of the aggregation: Init(), Update(), Serialize(), Merge(),
// and Finalize(). Not all functions need to implement all of the steps and
// some of the parts can be reused across different aggregate functions.
// This functions are implemented using the UDA interface.
class AggregateFunctions {
public:
// Initializes dst to nullptr.
static void init_null(doris_udf::FunctionContext*, doris_udf::AnyVal* dst);
// Initializes dst to nullptr and sets dst->ptr to nullptr.
static void init_null_string(doris_udf::FunctionContext* c, doris_udf::StringVal* dst);
// Initializes dst to 0 and is_null = true.
template <typename T>
static void init_zero(doris_udf::FunctionContext*, T* dst);
// Initializes dst to 0 and is_null = true.
template <typename T>
static void init_zero_null(doris_udf::FunctionContext*, T* dst);
// Initializes dst to 0.
template <typename T>
static void init_zero_not_null(doris_udf::FunctionContext*, T* dst);
template <typename SRC_VAL, typename DST_VAL>
static void sum_remove(doris_udf::FunctionContext* ctx, const SRC_VAL& src, DST_VAL* dst);
// doris_udf::StringVal GetValue() function that returns a copy of src
static doris_udf::StringVal string_val_get_value(doris_udf::FunctionContext* ctx,
const doris_udf::StringVal& src);
static doris_udf::StringVal string_val_serialize_or_finalize(doris_udf::FunctionContext* ctx,
const doris_udf::StringVal& src);
// Implementation of Count and Count(*)
static void count_update(doris_udf::FunctionContext*, const doris_udf::AnyVal& src,
doris_udf::BigIntVal* dst);
static void count_merge(doris_udf::FunctionContext*, const doris_udf::BigIntVal& src,
doris_udf::BigIntVal* dst);
static void count_remove(doris_udf::FunctionContext*, const doris_udf::AnyVal& src,
doris_udf::BigIntVal* dst);
static void count_star_update(doris_udf::FunctionContext*, doris_udf::BigIntVal* dst);
static void count_star_remove(FunctionContext*, BigIntVal* dst);
// Implementation of percentile
static void percentile_init(FunctionContext* ctx, StringVal* dst);
template <typename T>
static void percentile_update(FunctionContext* ctx, const T& src, const DoubleVal& quantile,
StringVal* dst);
static void percentile_merge(FunctionContext* ctx, const StringVal& src, StringVal* dst);
static StringVal percentile_serialize(FunctionContext* ctx, const StringVal& state_sv);
static DoubleVal percentile_finalize(FunctionContext* ctx, const StringVal& src);
// Implementation of percentile_approx
static void percentile_approx_init(doris_udf::FunctionContext* ctx, doris_udf::StringVal* dst);
template <typename T>
static void percentile_approx_update(FunctionContext* ctx, const T& src,
const DoubleVal& quantile, StringVal* dst);
template <typename T>
static void percentile_approx_update(FunctionContext* ctx, const T& src,
const DoubleVal& quantile,
const DoubleVal& digest_compression, StringVal* dst);
static void percentile_approx_merge(FunctionContext* ctx, const StringVal& src, StringVal* dst);
static DoubleVal percentile_approx_finalize(FunctionContext* ctx, const StringVal& src);
static StringVal percentile_approx_serialize(FunctionContext* ctx, const StringVal& state_sv);
// Implementation of Avg.
// TODO: Change this to use a fixed-sized BufferVal as intermediate type.
static void avg_init(doris_udf::FunctionContext* ctx, doris_udf::StringVal* dst);
template <typename T>
static void avg_update(doris_udf::FunctionContext* ctx, const T& src,
doris_udf::StringVal* dst);
template <typename T>
static void avg_remove(doris_udf::FunctionContext* ctx, const T& src,
doris_udf::StringVal* dst);
static void avg_merge(FunctionContext* ctx, const StringVal& src, StringVal* dst);
static doris_udf::DoubleVal avg_get_value(doris_udf::FunctionContext* ctx,
const doris_udf::StringVal& val);
static doris_udf::DoubleVal avg_finalize(doris_udf::FunctionContext* ctx,
const doris_udf::StringVal& val);
// Avg for timestamp. Uses avg_init() and AvgMerge().
static void timestamp_avg_update(doris_udf::FunctionContext* ctx,
const doris_udf::DateTimeVal& src, doris_udf::StringVal* dst);
static void timestamp_avg_remove(doris_udf::FunctionContext* ctx,
const doris_udf::DateTimeVal& src, doris_udf::StringVal* dst);
static doris_udf::DateTimeVal timestamp_avg_get_value(doris_udf::FunctionContext* ctx,
const doris_udf::StringVal& val);
static doris_udf::DateTimeVal timestamp_avg_finalize(doris_udf::FunctionContext* ctx,
const doris_udf::StringVal& val);
// Avg for decimals.
static void decimalv2_avg_init(doris_udf::FunctionContext* ctx, doris_udf::StringVal* dst);
static void decimalv2_avg_update(doris_udf::FunctionContext* ctx,
const doris_udf::DecimalV2Val& src, doris_udf::StringVal* dst);
static void decimalv2_avg_merge(FunctionContext* ctx, const doris_udf::StringVal& src,
doris_udf::StringVal* dst);
static doris_udf::StringVal decimalv2_avg_serialize(doris_udf::FunctionContext* ctx,
const doris_udf::StringVal& src);
static void decimalv2_avg_remove(doris_udf::FunctionContext* ctx,
const doris_udf::DecimalV2Val& src, doris_udf::StringVal* dst);
static doris_udf::DecimalV2Val decimalv2_avg_get_value(doris_udf::FunctionContext* ctx,
const doris_udf::StringVal& val);
static doris_udf::DecimalV2Val decimalv2_avg_finalize(doris_udf::FunctionContext* ctx,
const doris_udf::StringVal& val);
// SumUpdate, SumMerge
template <typename SRC_VAL, typename DST_VAL>
static void sum(doris_udf::FunctionContext*, const SRC_VAL& src, DST_VAL* dst);
// MinInit
template <typename T>
static void min_init(doris_udf::FunctionContext*, T* dst);
// MinUpdate/MinMerge
template <typename T>
static void min(doris_udf::FunctionContext*, const T& src, T* dst);
// MaxInit
template <typename T>
static void max_init(doris_udf::FunctionContext*, T* dst);
// MaxUpdate/MaxMerge
template <typename T>
static void max(doris_udf::FunctionContext*, const T& src, T* dst);
// AnyInit
template <typename T>
static void any_init(doris_udf::FunctionContext*, T* dst);
// AnyUpdate/AnyMerge
template <typename T>
static void any(doris_udf::FunctionContext*, const T& src, T* dst);
// String concat
static void string_concat(doris_udf::FunctionContext*, const doris_udf::StringVal& src,
const doris_udf::StringVal& separator, doris_udf::StringVal* result);
/// String concat
static void string_concat_update(FunctionContext*, const StringVal& src, StringVal* result);
static void string_concat_update(FunctionContext*, const StringVal& src,
const StringVal& separator, StringVal* result);
static void string_concat_merge(FunctionContext*, const StringVal& src, StringVal* result);
static StringVal string_concat_finalize(FunctionContext*, const StringVal& src);
// Probabilistic Counting (PC), a distinct estimate algorithms.
// Probabilistic Counting with Stochastic Averaging (PCSA) is a variant
// of PC that runs faster and usually gets equally accurate results.
static void pc_init(doris_udf::FunctionContext*, doris_udf::StringVal* slot);
template <typename T>
static void pc_update(doris_udf::FunctionContext*, const T& src, doris_udf::StringVal* dst);
template <typename T>
static void pcsa_update(doris_udf::FunctionContext*, const T& src, doris_udf::StringVal* dst);
static void pc_merge(doris_udf::FunctionContext*, const doris_udf::StringVal& src,
doris_udf::StringVal* dst);
static doris_udf::StringVal pc_finalize(doris_udf::FunctionContext*,
const doris_udf::StringVal& src);
static doris_udf::StringVal pcsa_finalize(doris_udf::FunctionContext*,
const doris_udf::StringVal& src);
// count and sum distinct algorithm in multi distinct
template <typename T>
static void count_or_sum_distinct_numeric_init(doris_udf::FunctionContext* ctx,
doris_udf::StringVal* dst);
template <typename T>
static void count_or_sum_distinct_numeric_update(FunctionContext* ctx, T& src, StringVal* dst);
template <typename T>
static void count_or_sum_distinct_numeric_merge(FunctionContext* ctx, StringVal& src,
StringVal* dst);
template <typename T>
static StringVal count_or_sum_distinct_numeric_serialize(FunctionContext* ctx,
const StringVal& state_sv);
template <typename T>
static BigIntVal count_or_sum_distinct_numeric_finalize(FunctionContext* ctx,
const StringVal& state_sv);
// count distinct in multi distinct for string
static void count_distinct_string_init(doris_udf::FunctionContext* ctx,
doris_udf::StringVal* dst);
static void count_distinct_string_update(FunctionContext* ctx, StringVal& src, StringVal* dst);
static void count_distinct_string_merge(FunctionContext* ctx, StringVal& src, StringVal* dst);
static StringVal count_distinct_string_serialize(FunctionContext* ctx,
const StringVal& state_sv);
static BigIntVal count_distinct_string_finalize(FunctionContext* ctx,
const StringVal& state_sv);
// count distinct in multi distinct for decimal
static void count_or_sum_distinct_decimal_init(doris_udf::FunctionContext* ctx,
doris_udf::StringVal* dst);
static void count_or_sum_distinct_decimalv2_init(doris_udf::FunctionContext* ctx,
doris_udf::StringVal* dst);
static void count_or_sum_distinct_decimalv2_update(FunctionContext* ctx, DecimalV2Val& src,
StringVal* dst);
static void count_or_sum_distinct_decimal_merge(FunctionContext* ctx, StringVal& src,
StringVal* dst);
static void count_or_sum_distinct_decimalv2_merge(FunctionContext* ctx, StringVal& src,
StringVal* dst);
static StringVal count_or_sum_distinct_decimalv2_serialize(FunctionContext* ctx,
const StringVal& state_sv);
static BigIntVal count_distinct_decimalv2_finalize(FunctionContext* ctx,
const StringVal& state_sv);
static DecimalV2Val sum_distinct_decimalv2_finalize(FunctionContext* ctx,
const StringVal& state_sv);
// count distinct in multi distinct for Date
static void count_distinct_date_init(doris_udf::FunctionContext* ctx,
doris_udf::StringVal* dst);
static void count_distinct_date_update(FunctionContext* ctx, DateTimeVal& src, StringVal* dst);
static void count_distinct_date_merge(FunctionContext* ctx, StringVal& src, StringVal* dst);
static StringVal count_distinct_date_serialize(FunctionContext* ctx, const StringVal& state_sv);
static BigIntVal count_distinct_date_finalize(FunctionContext* ctx, const StringVal& state_sv);
template <typename T>
static BigIntVal sum_distinct_bigint_finalize(FunctionContext* ctx, const StringVal& state_sv);
template <typename T>
static LargeIntVal sum_distinct_largeint_finalize(FunctionContext* ctx,
const StringVal& state_sv);
template <typename T>
static DoubleVal sum_distinct_double_finalize(FunctionContext* ctx, const StringVal& state_sv);
/// Knuth's variance algorithm, more numerically stable than canonical stddev
/// algorithms; reference implementation:
/// http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Online_algorithm
static void knuth_var_init(FunctionContext* context, StringVal* val);
template <typename T>
static void knuth_var_update(FunctionContext* context, const T& input, StringVal* val);
template <typename T>
static void knuth_var_remove(FunctionContext* context, const T& src, StringVal* dst);
static void knuth_var_merge(FunctionContext* context, const StringVal& src, StringVal* dst);
static DoubleVal knuth_var_finalize(FunctionContext* context, const StringVal& val);
/// Calculates the biased variance, uses KnuthVar Init-Update-Merge functions
static DoubleVal knuth_var_pop_finalize(FunctionContext* context, const StringVal& val);
/// Calculates STDDEV, uses KnuthVar Init-Update-Merge functions
static DoubleVal knuth_stddev_finalize(FunctionContext* context, const StringVal& val);
/// Calculates the biased STDDEV, uses KnuthVar Init-Update-Merge functions
static DoubleVal knuth_stddev_pop_finalize(FunctionContext* context, const StringVal& val);
static DoubleVal knuth_var_get_value(FunctionContext* ctx, const StringVal& state_sv);
static DoubleVal knuth_var_pop_get_value(FunctionContext* context, const StringVal& val);
static DoubleVal knuth_stddev_get_value(FunctionContext* ctx, const StringVal& state_sv);
static DoubleVal knuth_stddev_pop_get_value(FunctionContext* context, const StringVal& val);
// variance/stddev for decimals.
static void decimalv2_knuth_var_init(FunctionContext* context, StringVal* val);
static void knuth_var_remove(FunctionContext* ctx, const DecimalV2Val& src, StringVal* dst);
static void knuth_var_update(FunctionContext* context, const DecimalV2Val& src, StringVal* val);
static void decimalv2_knuth_var_merge(FunctionContext* context, const StringVal& src,
StringVal* val);
static DecimalV2Val decimalv2_knuth_var_finalize(FunctionContext* context,
const StringVal& val);
static DecimalV2Val decimalv2_knuth_var_pop_finalize(FunctionContext* context,
const StringVal& val);
static DecimalV2Val decimalv2_knuth_stddev_finalize(FunctionContext* context,
const StringVal& val);
static DecimalV2Val decimalv2_knuth_stddev_pop_finalize(FunctionContext* context,
const StringVal& val);
static DecimalV2Val decimalv2_knuth_var_get_value(FunctionContext* ctx,
const StringVal& state_sv);
static DecimalV2Val decimalv2_knuth_var_pop_get_value(FunctionContext* context,
const StringVal& val);
static DecimalV2Val decimalv2_knuth_stddev_get_value(FunctionContext* context,
const StringVal& val);
static DecimalV2Val decimalv2_knuth_stddev_pop_get_value(FunctionContext* context,
const StringVal& val);
/// ----------------------------- Analytic Functions ---------------------------------
/// Analytic functions implement the UDA interface (except Merge(), Serialize()) and are
/// used internally by the AnalyticEvalNode. Some analytic functions store intermediate
/// state as a StringVal which is needed for multiple calls to Finalize(), so some fns
/// also implement a (private) GetValue() method to just return the value. In that
/// case, Finalize() is only called at the end to clean up.
// Initializes the state for RANK and DENSE_RANK
static void rank_init(doris_udf::FunctionContext*, doris_udf::StringVal* slot);
// Update state for RANK
static void rank_update(doris_udf::FunctionContext*, doris_udf::StringVal* dst);
// Update state for DENSE_RANK
static void dense_rank_update(doris_udf::FunctionContext*, doris_udf::StringVal* dst);
// Returns the result for RANK and prepares the state for the next Update().
static doris_udf::BigIntVal rank_get_value(doris_udf::FunctionContext*,
doris_udf::StringVal& src);
// Returns the result for DENSE_RANK and prepares the state for the next Update().
// TODO: Implement DENSE_RANK with a single doris_udf::BigIntVal. Requires src can be modified,
// AggFnEvaluator would need to handle copying the src doris_udf::AnyVal back into the src slot.
static doris_udf::BigIntVal dense_rank_get_value(doris_udf::FunctionContext*,
doris_udf::StringVal& src);
// Returns the result for RANK and DENSE_RANK and cleans up intermediate state in src.
static doris_udf::BigIntVal rank_finalize(doris_udf::FunctionContext*,
doris_udf::StringVal& src);
// Implements LAST_VALUE.
template <typename T>
static void last_val_update(doris_udf::FunctionContext*, const T& src, T* dst);
template <typename T>
static void last_val_remove(doris_udf::FunctionContext*, const T& src, T* dst);
// Implements FIRST_VALUE.
template <typename T>
static void first_val_update(doris_udf::FunctionContext*, const T& src, T* dst);
// Implements FIRST_VALUE for some windows that require rewrites during planning.
// The doris_udf::BigIntVal is unused by first_val_rewrite_update() (it is used by the
// AnalyticEvalNode).
template <typename T>
static void first_val_rewrite_update(doris_udf::FunctionContext*, const T& src,
const doris_udf::BigIntVal&, T* dst);
// OffsetFn*() implement LAG and LEAD. Init() sets the default value (the last
// constant parameter) as dst.
template <typename T>
static void offset_fn_init(doris_udf::FunctionContext*, T* dst);
// Update() takes all the parameters to LEAD/LAG, including the integer offset and
// the default value, neither which are needed by Update(). (The offset is already
// used in the window for the analytic fn evaluation and the default value is set
// in Init().
template <typename T>
static void offset_fn_update(doris_udf::FunctionContext*, const T& src,
const doris_udf::BigIntVal&, const T&, T* dst);
// windowFunnel
static void window_funnel_init(FunctionContext* ctx, StringVal* dst);
static void window_funnel_update(FunctionContext* ctx, const BigIntVal& window,
const StringVal& mode, const DateTimeVal& timestamp,
int num_cond, const BooleanVal* conds, StringVal* dst);
static void window_funnel_merge(FunctionContext* ctx, const StringVal& src, StringVal* dst);
static StringVal window_funnel_serialize(FunctionContext* ctx, const StringVal& src);
static IntVal window_funnel_finalize(FunctionContext* ctx, const StringVal& src);
// todo(kks): keep following HLL methods only for backward compatibility, we should remove these methods
// when doris 0.12 release
static void hll_init(doris_udf::FunctionContext*, doris_udf::StringVal* slot);
template <typename T>
static void hll_update(doris_udf::FunctionContext*, const T& src, doris_udf::StringVal* dst);
static void hll_merge(doris_udf::FunctionContext*, const doris_udf::StringVal& src,
doris_udf::StringVal* dst);
static doris_udf::StringVal hll_finalize(doris_udf::FunctionContext*,
const doris_udf::StringVal& src);
static void hll_union_agg_init(doris_udf::FunctionContext*, doris_udf::HllVal* slot);
// fill all register according to hll set type
static void hll_union_agg_update(doris_udf::FunctionContext*, const doris_udf::HllVal& src,
doris_udf::HllVal* dst);
// merge the register value
static void hll_union_agg_merge(doris_udf::FunctionContext*, const doris_udf::HllVal& src,
doris_udf::HllVal* dst);
// return result
static doris_udf::BigIntVal hll_union_agg_finalize(doris_udf::FunctionContext*,
const doris_udf::HllVal& src);
// calculate result
static int64_t hll_algorithm(uint8_t* pdata, int data_len);
static int64_t hll_algorithm(const StringVal& dst) { return hll_algorithm(dst.ptr, dst.len); }
static int64_t hll_algorithm(const HllVal& dst) {
return hll_algorithm(dst.ptr + 1, dst.len - 1);
}
// HLL value type aggregate to HLL value type
static void hll_raw_agg_init(doris_udf::FunctionContext*, doris_udf::HllVal* slot);
static void hll_raw_agg_update(doris_udf::FunctionContext*, const doris_udf::HllVal& src,
doris_udf::HllVal* dst);
static void hll_raw_agg_merge(doris_udf::FunctionContext*, const doris_udf::HllVal& src,
doris_udf::HllVal* dst);
// return result which is HLL type
static doris_udf::HllVal hll_raw_agg_finalize(doris_udf::FunctionContext*,
const doris_udf::HllVal& src);
};
} // namespace doris