blob: 89af2c2023fe4b0a98c418edc9d68affc462e71d [file] [log] [blame]
* percentile.c
* Support functions for inverse distribution functions.
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
#include "postgres.h"
#include "utils/builtins.h"
#include "utils/datum.h"
#include "utils/lsyscache.h"
* Information for percentile functions.
* The prev_value is only for percentile_cont, where we need to remember the
* prior value to interpolate two values. The target position is the
* row position we want given the total row count and percentage.
* The ceiled and floored target positions are rounded target positions.
* The row number tracks the current logical row position.
typedef struct
Datum prev_value; /* the target value at the prior row */
float8 tp; /* target position */
float8 ctp; /* ceiled target position */
float8 ftp; /* floored target position */
int64 rn; /* current row number */
} PercentileInfo;
* transition function for percentile_cont().
* The actual arguments are:
* (state_value, percentage, target_value, peer_count, total_count)
* The result of percentile_cont() is the interpolated value from
* value expressions at consecutive rows that are indicated by the
* argument, in the order specified by the WITHIN GROUP.
* We compute these values at the first stage of this transition:
* tp = (total_count - 1) * percentage + 1
* ftp = floor(tp)
* ctp = ceil(tp)
* rn0 = (current logical row position)
* rn1 = rn0 + peer_count
* tv = (target value)
* And the result is calculated as:
* result = SUM(
* WHEN rn0 <= ftp AND rn1 > ctp THEN tv
* WHEN rn0 <= ftp AND rn1 > ftp THEN tv * (ctp - tp)
* WHEN rn0 >= ctp THEN tv * (tp - ftp)
* )
* Note that we use FmgrInfo's fn_extra to store the per-group information.
* fn_extra is not initialized by the executor in the group boundaries,
* so we clean it when the value is found. That said, we assume we
* always found the required value in each group, and if not, something
* is wrong.
int64 pc = PG_GETARG_INT64(3);
int64 rn0, rn1;
PercentileInfo *info;
/* Ignore NULL inputs for percentage and target value */
if (!fcinfo->flinfo->fn_extra)
float8 percentage = PG_GETARG_FLOAT8(1);
int64 tc = PG_GETARG_INT64(4);
if (percentage < 0.0 || percentage > 1.0)
errmsg("input is out of range"),
errhint("Argument to percentile function must be between 0.0 and 1.0.")));
info = (PercentileInfo *) MemoryContextAllocZero(
fcinfo->flinfo->fn_mcxt, sizeof(PercentileInfo));
info->tp = (tc - 1) * percentage + 1;
info->ftp = floor(info->tp);
info->ctp = ceil(info->tp);
info->rn = 1;
fcinfo->flinfo->fn_extra = info;
info = (PercentileInfo *) fcinfo->flinfo->fn_extra;
rn0 = info->rn;
rn1 = rn0 + pc;
info->rn += pc;
if (rn0 <= info->ftp && rn1 > info->ctp)
/* Clean up, so the next group can see NULL for fn_extra */
fcinfo->flinfo->fn_extra = NULL;
else if (rn0 <= info->ftp && rn1 > info->ftp)
Oid resulttype;
bool byval;
int16 len;
resulttype = get_fn_expr_rettype(fcinfo->flinfo);
get_typlenbyval(resulttype, &len, &byval);
info->prev_value = datumCopy(PG_GETARG_DATUM(2), byval, len);
else if (rn0 >= info->ctp)
Datum prev = info->prev_value;
Datum tv = PG_GETARG_DATUM(2);
float8 tp = info->tp;
float8 ctp = info->ctp;
float8 ftp = info->ftp;
Oid resulttype = get_fn_expr_rettype(fcinfo->flinfo);
/* Clean up, so the next group can see NULL for fn_extra */
fcinfo->flinfo->fn_extra = NULL;
if (resulttype == FLOAT8OID)
PG_RETURN_FLOAT8(DatumGetFloat8(prev) * (ctp - tp) +
DatumGetFloat8(tv) * (tp - ftp));
else if (resulttype == TIMESTAMPOID)
Datum interval;
interval = DirectFunctionCall2(timestamp_mi, tv, prev);
interval = DirectFunctionCall2(interval_mul, interval,
Float8GetDatum(tp - ftp));
prev, interval));
else if (resulttype == TIMESTAMPTZOID)
Datum interval;
interval = DirectFunctionCall2(timestamp_mi, tv, prev);
interval = DirectFunctionCall2(interval_mul, interval,
Float8GetDatum(tp - ftp));
prev, interval));
else if (resulttype == INTERVALOID)
Datum val1, val2;
val1 = DirectFunctionCall2(interval_mul, prev, Float8GetDatum(ctp - tp));
val2 = DirectFunctionCall2(interval_mul, tv, Float8GetDatum(tp - ftp));
PG_RETURN_DATUM(DirectFunctionCall2(interval_pl, val1, val2));
else /* Should not happen. */
elog(ERROR, "unexpected result type: %d", (int) resulttype);
* transition function for percentile_disc().
* The actual arguments are:
* (state_value, percentage, target_value, peer_count, total_count)
* The result of percentile_disc() is the the first value whose position
* in the cumulative distribution of values, specified by the WITHIN GROUP
* clause, is equal to or greater than the percentage specified. And
* the definition of the cumulative distribution, i.e. CUME_DIST(), is
* following.
* count(*) OVER (ORDER BY tv) / count(*) OVER ()
* Let accum_count be the numerator and total_count be the denominator.
* Let p be the percentage given as the argument. Now, the result of
* percentile_disc() is at the first row position that satisfies
* p >= accum_count / total_count
* Converting this to
* p * total_count >= accum_count
* and because the left hand side is fraction row position, we take ceil
* ceil(p * total_count)
* This is the row position where the result is at.
int64 pc = PG_GETARG_INT64(3);
int64 rn0, rn1;
PercentileInfo *info;
/* Ignore NULL inputs for percentage and target value */
if (!fcinfo->flinfo->fn_extra)
float8 percentage = PG_GETARG_FLOAT8(1);
int64 tc = PG_GETARG_INT64(4);
if (percentage < 0.0 || percentage > 1.0)
errmsg("input is out of range"),
errhint("Argument to percentile function must be between 0.0 and 1.0.")));
info = (PercentileInfo *) MemoryContextAllocZero(
fcinfo->flinfo->fn_mcxt, sizeof(PercentileInfo));
if (percentage == 0.0)
info->tp = 1.0;
info->tp = ceil(tc * percentage);
info->ctp = info->ftp = info->tp;
info->rn = 1;
fcinfo->flinfo->fn_extra = info;
info = (PercentileInfo *) fcinfo->flinfo->fn_extra;
rn0 = info->rn;
rn1 = rn0 + pc;
info->rn += pc;
if (rn0 <= info->tp && rn1 > info->tp)
/* Clean up, so the next group can see NULL for fn_extra */
fcinfo->flinfo->fn_extra = NULL;