| /*------------------------------------------------------------------------- |
| * |
| * percentile.c |
| * Support functions for inverse distribution functions. |
| * |
| * |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| * |
| *------------------------------------------------------------------------- |
| */ |
| #include "postgres.h" |
| #include "utils/builtins.h" |
| #include "utils/datum.h" |
| #include "utils/lsyscache.h" |
| |
| /* |
| * Information for percentile functions. |
| * The prev_value is only for percentile_cont, where we need to remember the |
| * prior value to interpolate two values. The target position is the |
| * row position we want given the total row count and percentage. |
| * The ceiled and floored target positions are rounded target positions. |
| * The row number tracks the current logical row position. |
| */ |
| typedef struct |
| { |
| Datum prev_value; /* the target value at the prior row */ |
| float8 tp; /* target position */ |
| float8 ctp; /* ceiled target position */ |
| float8 ftp; /* floored target position */ |
| int64 rn; /* current row number */ |
| } PercentileInfo; |
| |
| /* |
| * transition function for percentile_cont(). |
| * |
| * The actual arguments are: |
| * (state_value, percentage, target_value, peer_count, total_count) |
| * |
| * The result of percentile_cont() is the interpolated value from |
| * value expressions at consecutive rows that are indicated by the |
| * argument, in the order specified by the WITHIN GROUP. |
| * |
| * We compute these values at the first stage of this transition: |
| * |
| * tp = (total_count - 1) * percentage + 1 |
| * ftp = floor(tp) |
| * ctp = ceil(tp) |
| * rn0 = (current logical row position) |
| * rn1 = rn0 + peer_count |
| * tv = (target value) |
| * |
| * And the result is calculated as: |
| * |
| * result = SUM( |
| * CASE |
| * WHEN rn0 <= ftp AND rn1 > ctp THEN tv |
| * WHEN rn0 <= ftp AND rn1 > ftp THEN tv * (ctp - tp) |
| * WHEN rn0 >= ctp THEN tv * (tp - ftp) |
| * END |
| * ) |
| * |
| * Note that we use FmgrInfo's fn_extra to store the per-group information. |
| * fn_extra is not initialized by the executor in the group boundaries, |
| * so we clean it when the value is found. That said, we assume we |
| * always found the required value in each group, and if not, something |
| * is wrong. |
| */ |
| Datum |
| percentile_cont_trans(PG_FUNCTION_ARGS) |
| { |
| int64 pc = PG_GETARG_INT64(3); |
| int64 rn0, rn1; |
| PercentileInfo *info; |
| |
| if (!PG_ARGISNULL(0)) |
| PG_RETURN_DATUM(PG_GETARG_DATUM(0)); |
| |
| /* Ignore NULL inputs for percentage and target value */ |
| if (PG_ARGISNULL(1) || PG_ARGISNULL(2)) |
| PG_RETURN_NULL(); |
| |
| if (!fcinfo->flinfo->fn_extra) |
| { |
| float8 percentage = PG_GETARG_FLOAT8(1); |
| int64 tc = PG_GETARG_INT64(4); |
| |
| if (percentage < 0.0 || percentage > 1.0) |
| ereport(ERROR, |
| (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), |
| errmsg("input is out of range"), |
| errhint("Argument to percentile function must be between 0.0 and 1.0."))); |
| info = (PercentileInfo *) MemoryContextAllocZero( |
| fcinfo->flinfo->fn_mcxt, sizeof(PercentileInfo)); |
| info->tp = (tc - 1) * percentage + 1; |
| info->ftp = floor(info->tp); |
| info->ctp = ceil(info->tp); |
| info->rn = 1; |
| fcinfo->flinfo->fn_extra = info; |
| } |
| else |
| info = (PercentileInfo *) fcinfo->flinfo->fn_extra; |
| |
| rn0 = info->rn; |
| rn1 = rn0 + pc; |
| info->rn += pc; |
| if (rn0 <= info->ftp && rn1 > info->ctp) |
| { |
| /* Clean up, so the next group can see NULL for fn_extra */ |
| pfree(info); |
| fcinfo->flinfo->fn_extra = NULL; |
| |
| PG_RETURN_DATUM(PG_GETARG_DATUM(2)); |
| } |
| else if (rn0 <= info->ftp && rn1 > info->ftp) |
| { |
| Oid resulttype; |
| bool byval; |
| int16 len; |
| |
| resulttype = get_fn_expr_rettype(fcinfo->flinfo); |
| get_typlenbyval(resulttype, &len, &byval); |
| info->prev_value = datumCopy(PG_GETARG_DATUM(2), byval, len); |
| } |
| else if (rn0 >= info->ctp) |
| { |
| Datum prev = info->prev_value; |
| Datum tv = PG_GETARG_DATUM(2); |
| float8 tp = info->tp; |
| float8 ctp = info->ctp; |
| float8 ftp = info->ftp; |
| Oid resulttype = get_fn_expr_rettype(fcinfo->flinfo); |
| |
| /* Clean up, so the next group can see NULL for fn_extra */ |
| pfree(info); |
| fcinfo->flinfo->fn_extra = NULL; |
| |
| if (resulttype == FLOAT8OID) |
| { |
| PG_RETURN_FLOAT8(DatumGetFloat8(prev) * (ctp - tp) + |
| DatumGetFloat8(tv) * (tp - ftp)); |
| } |
| else if (resulttype == TIMESTAMPOID) |
| { |
| Datum interval; |
| |
| interval = DirectFunctionCall2(timestamp_mi, tv, prev); |
| interval = DirectFunctionCall2(interval_mul, interval, |
| Float8GetDatum(tp - ftp)); |
| PG_RETURN_DATUM(DirectFunctionCall2(timestamp_pl_interval, |
| prev, interval)); |
| } |
| else if (resulttype == TIMESTAMPTZOID) |
| { |
| Datum interval; |
| |
| interval = DirectFunctionCall2(timestamp_mi, tv, prev); |
| interval = DirectFunctionCall2(interval_mul, interval, |
| Float8GetDatum(tp - ftp)); |
| PG_RETURN_DATUM(DirectFunctionCall2(timestamptz_pl_interval, |
| prev, interval)); |
| } |
| else if (resulttype == INTERVALOID) |
| { |
| Datum val1, val2; |
| |
| val1 = DirectFunctionCall2(interval_mul, prev, Float8GetDatum(ctp - tp)); |
| val2 = DirectFunctionCall2(interval_mul, tv, Float8GetDatum(tp - ftp)); |
| |
| PG_RETURN_DATUM(DirectFunctionCall2(interval_pl, val1, val2)); |
| } |
| else /* Should not happen. */ |
| elog(ERROR, "unexpected result type: %d", (int) resulttype); |
| } |
| |
| PG_RETURN_NULL(); |
| } |
| |
| /* |
| * transition function for percentile_disc(). |
| * |
| * The actual arguments are: |
| * (state_value, percentage, target_value, peer_count, total_count) |
| * |
| * The result of percentile_disc() is the the first value whose position |
| * in the cumulative distribution of values, specified by the WITHIN GROUP |
| * clause, is equal to or greater than the percentage specified. And |
| * the definition of the cumulative distribution, i.e. CUME_DIST(), is |
| * following. |
| * |
| * count(*) OVER (ORDER BY tv) / count(*) OVER () |
| * |
| * Let accum_count be the numerator and total_count be the denominator. |
| * Let p be the percentage given as the argument. Now, the result of |
| * percentile_disc() is at the first row position that satisfies |
| * |
| * p >= accum_count / total_count |
| * |
| * Converting this to |
| * |
| * p * total_count >= accum_count |
| * |
| * and because the left hand side is fraction row position, we take ceil |
| * |
| * ceil(p * total_count) |
| * |
| * This is the row position where the result is at. |
| */ |
| Datum |
| percentile_disc_trans(PG_FUNCTION_ARGS) |
| { |
| int64 pc = PG_GETARG_INT64(3); |
| int64 rn0, rn1; |
| PercentileInfo *info; |
| |
| if (!PG_ARGISNULL(0)) |
| PG_RETURN_DATUM(PG_GETARG_DATUM(0)); |
| |
| /* Ignore NULL inputs for percentage and target value */ |
| if (PG_ARGISNULL(1) || PG_ARGISNULL(2)) |
| PG_RETURN_NULL(); |
| |
| if (!fcinfo->flinfo->fn_extra) |
| { |
| float8 percentage = PG_GETARG_FLOAT8(1); |
| int64 tc = PG_GETARG_INT64(4); |
| |
| if (percentage < 0.0 || percentage > 1.0) |
| ereport(ERROR, |
| (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), |
| errmsg("input is out of range"), |
| errhint("Argument to percentile function must be between 0.0 and 1.0."))); |
| info = (PercentileInfo *) MemoryContextAllocZero( |
| fcinfo->flinfo->fn_mcxt, sizeof(PercentileInfo)); |
| if (percentage == 0.0) |
| info->tp = 1.0; |
| else |
| info->tp = ceil(tc * percentage); |
| info->ctp = info->ftp = info->tp; |
| info->rn = 1; |
| fcinfo->flinfo->fn_extra = info; |
| } |
| else |
| info = (PercentileInfo *) fcinfo->flinfo->fn_extra; |
| |
| rn0 = info->rn; |
| rn1 = rn0 + pc; |
| info->rn += pc; |
| if (rn0 <= info->tp && rn1 > info->tp) |
| { |
| /* Clean up, so the next group can see NULL for fn_extra */ |
| pfree(info); |
| fcinfo->flinfo->fn_extra = NULL; |
| |
| PG_RETURN_DATUM(PG_GETARG_DATUM(2)); |
| } |
| |
| PG_RETURN_NULL(); |
| } |