| /*------------------------------------------------------------------------- |
| * |
| * mcv.c |
| * POSTGRES multivariate MCV lists |
| * |
| * |
| * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group |
| * Portions Copyright (c) 1994, Regents of the University of California |
| * |
| * IDENTIFICATION |
| * src/backend/statistics/mcv.c |
| * |
| *------------------------------------------------------------------------- |
| */ |
| #include "postgres.h" |
| |
| #include <math.h> |
| |
| #include "access/htup_details.h" |
| #include "catalog/pg_collation.h" |
| #include "catalog/pg_statistic_ext.h" |
| #include "catalog/pg_statistic_ext_data.h" |
| #include "fmgr.h" |
| #include "funcapi.h" |
| #include "nodes/nodeFuncs.h" |
| #include "optimizer/clauses.h" |
| #include "statistics/extended_stats_internal.h" |
| #include "statistics/statistics.h" |
| #include "utils/array.h" |
| #include "utils/builtins.h" |
| #include "utils/bytea.h" |
| #include "utils/fmgroids.h" |
| #include "utils/fmgrprotos.h" |
| #include "utils/lsyscache.h" |
| #include "utils/selfuncs.h" |
| #include "utils/syscache.h" |
| #include "utils/typcache.h" |
| |
| /* |
| * Computes size of a serialized MCV item, depending on the number of |
| * dimensions (columns) the statistic is defined on. The datum values are |
| * stored in a separate array (deduplicated, to minimize the size), and |
| * so the serialized items only store uint16 indexes into that array. |
| * |
| * Each serialized item stores (in this order): |
| * |
| * - indexes to values (ndim * sizeof(uint16)) |
| * - null flags (ndim * sizeof(bool)) |
| * - frequency (sizeof(double)) |
| * - base_frequency (sizeof(double)) |
| * |
| * There is no alignment padding within an MCV item. |
| * So in total each MCV item requires this many bytes: |
| * |
| * ndim * (sizeof(uint16) + sizeof(bool)) + 2 * sizeof(double) |
| */ |
| #define ITEM_SIZE(ndims) \ |
| ((ndims) * (sizeof(uint16) + sizeof(bool)) + 2 * sizeof(double)) |
| |
| /* |
| * Used to compute size of serialized MCV list representation. |
| */ |
| #define MinSizeOfMCVList \ |
| (VARHDRSZ + sizeof(uint32) * 3 + sizeof(AttrNumber)) |
| |
| /* |
| * Size of the serialized MCV list, excluding the space needed for |
| * deduplicated per-dimension values. The macro is meant to be used |
| * when it's not yet safe to access the serialized info about amount |
| * of data for each column. |
| */ |
| #define SizeOfMCVList(ndims,nitems) \ |
| ((MinSizeOfMCVList + sizeof(Oid) * (ndims)) + \ |
| ((ndims) * sizeof(DimensionInfo)) + \ |
| ((nitems) * ITEM_SIZE(ndims))) |
| |
| static MultiSortSupport build_mss(StatsBuildData *data); |
| |
| static SortItem *build_distinct_groups(int numrows, SortItem *items, |
| MultiSortSupport mss, int *ndistinct); |
| |
| static SortItem **build_column_frequencies(SortItem *groups, int ngroups, |
| MultiSortSupport mss, int *ncounts); |
| |
| static int count_distinct_groups(int numrows, SortItem *items, |
| MultiSortSupport mss); |
| |
| /* |
| * Compute new value for bitmap item, considering whether it's used for |
| * clauses connected by AND/OR. |
| */ |
| #define RESULT_MERGE(value, is_or, match) \ |
| ((is_or) ? ((value) || (match)) : ((value) && (match))) |
| |
| /* |
| * When processing a list of clauses, the bitmap item may get set to a value |
| * such that additional clauses can't change it. For example, when processing |
| * a list of clauses connected to AND, as soon as the item gets set to 'false' |
| * then it'll remain like that. Similarly clauses connected by OR and 'true'. |
| * |
| * Returns true when the value in the bitmap can't change no matter how the |
| * remaining clauses are evaluated. |
| */ |
| #define RESULT_IS_FINAL(value, is_or) ((is_or) ? (value) : (!(value))) |
| |
| /* |
| * get_mincount_for_mcv_list |
| * Determine the minimum number of times a value needs to appear in |
| * the sample for it to be included in the MCV list. |
| * |
| * We want to keep only values that appear sufficiently often in the |
| * sample that it is reasonable to extrapolate their sample frequencies to |
| * the entire table. We do this by placing an upper bound on the relative |
| * standard error of the sample frequency, so that any estimates the |
| * planner generates from the MCV statistics can be expected to be |
| * reasonably accurate. |
| * |
| * Since we are sampling without replacement, the sample frequency of a |
| * particular value is described by a hypergeometric distribution. A |
| * common rule of thumb when estimating errors in this situation is to |
| * require at least 10 instances of the value in the sample, in which case |
| * the distribution can be approximated by a normal distribution, and |
| * standard error analysis techniques can be applied. Given a sample size |
| * of n, a population size of N, and a sample frequency of p=cnt/n, the |
| * standard error of the proportion p is given by |
| * SE = sqrt(p*(1-p)/n) * sqrt((N-n)/(N-1)) |
| * where the second term is the finite population correction. To get |
| * reasonably accurate planner estimates, we impose an upper bound on the |
| * relative standard error of 20% -- i.e., SE/p < 0.2. This 20% relative |
| * error bound is fairly arbitrary, but has been found empirically to work |
| * well. Rearranging this formula gives a lower bound on the number of |
| * instances of the value seen: |
| * cnt > n*(N-n) / (N-n+0.04*n*(N-1)) |
| * This bound is at most 25, and approaches 0 as n approaches 0 or N. The |
| * case where n approaches 0 cannot happen in practice, since the sample |
| * size is at least 300. The case where n approaches N corresponds to |
| * sampling the whole table, in which case it is reasonable to keep |
| * the whole MCV list (have no lower bound), so it makes sense to apply |
| * this formula for all inputs, even though the above derivation is |
| * technically only valid when the right hand side is at least around 10. |
| * |
| * An alternative way to look at this formula is as follows -- assume that |
| * the number of instances of the value seen scales up to the entire |
| * table, so that the population count is K=N*cnt/n. Then the distribution |
| * in the sample is a hypergeometric distribution parameterised by N, n |
| * and K, and the bound above is mathematically equivalent to demanding |
| * that the standard deviation of that distribution is less than 20% of |
| * its mean. Thus the relative errors in any planner estimates produced |
| * from the MCV statistics are likely to be not too large. |
| */ |
| static double |
| get_mincount_for_mcv_list(int samplerows, double totalrows) |
| { |
| double n = samplerows; |
| double N = totalrows; |
| double numer, |
| denom; |
| |
| numer = n * (N - n); |
| denom = N - n + 0.04 * n * (N - 1); |
| |
| /* Guard against division by zero (possible if n = N = 1) */ |
| if (denom == 0.0) |
| return 0.0; |
| |
| return numer / denom; |
| } |
| |
| /* |
| * Builds MCV list from the set of sampled rows. |
| * |
| * The algorithm is quite simple: |
| * |
| * (1) sort the data (default collation, '<' for the data type) |
| * |
| * (2) count distinct groups, decide how many to keep |
| * |
| * (3) build the MCV list using the threshold determined in (2) |
| * |
| * (4) remove rows represented by the MCV from the sample |
| * |
| */ |
| MCVList * |
| statext_mcv_build(StatsBuildData *data, double totalrows, int stattarget) |
| { |
| int i, |
| numattrs, |
| numrows, |
| ngroups, |
| nitems; |
| double mincount; |
| SortItem *items; |
| SortItem *groups; |
| MCVList *mcvlist = NULL; |
| MultiSortSupport mss; |
| |
| /* comparator for all the columns */ |
| mss = build_mss(data); |
| |
| /* sort the rows */ |
| items = build_sorted_items(data, &nitems, mss, |
| data->nattnums, data->attnums); |
| |
| if (!items) |
| return NULL; |
| |
| /* for convenience */ |
| numattrs = data->nattnums; |
| numrows = data->numrows; |
| |
| /* transform the sorted rows into groups (sorted by frequency) */ |
| groups = build_distinct_groups(nitems, items, mss, &ngroups); |
| |
| /* |
| * The maximum number of MCV items to store, based on the statistics |
| * target we computed for the statistics object (from the target set for |
| * the object itself, attributes and the system default). In any case, we |
| * can't keep more groups than we have available. |
| */ |
| nitems = stattarget; |
| if (nitems > ngroups) |
| nitems = ngroups; |
| |
| /* |
| * Decide how many items to keep in the MCV list. We can't use the same |
| * algorithm as per-column MCV lists, because that only considers the |
| * actual group frequency - but we're primarily interested in how the |
| * actual frequency differs from the base frequency (product of simple |
| * per-column frequencies, as if the columns were independent). |
| * |
| * Using the same algorithm might exclude items that are close to the |
| * "average" frequency of the sample. But that does not say whether the |
| * observed frequency is close to the base frequency or not. We also need |
| * to consider unexpectedly uncommon items (again, compared to the base |
| * frequency), and the single-column algorithm does not have to. |
| * |
| * We simply decide how many items to keep by computing the minimum count |
| * using get_mincount_for_mcv_list() and then keep all items that seem to |
| * be more common than that. |
| */ |
| mincount = get_mincount_for_mcv_list(numrows, totalrows); |
| |
| /* |
| * Walk the groups until we find the first group with a count below the |
| * mincount threshold (the index of that group is the number of groups we |
| * want to keep). |
| */ |
| for (i = 0; i < nitems; i++) |
| { |
| if (groups[i].count < mincount) |
| { |
| nitems = i; |
| break; |
| } |
| } |
| |
| /* |
| * At this point, we know the number of items for the MCV list. There |
| * might be none (for uniform distribution with many groups), and in that |
| * case, there will be no MCV list. Otherwise, construct the MCV list. |
| */ |
| if (nitems > 0) |
| { |
| int j; |
| SortItem key; |
| MultiSortSupport tmp; |
| |
| /* frequencies for values in each attribute */ |
| SortItem **freqs; |
| int *nfreqs; |
| |
| /* used to search values */ |
| tmp = (MultiSortSupport) palloc(offsetof(MultiSortSupportData, ssup) |
| + sizeof(SortSupportData)); |
| |
| /* compute frequencies for values in each column */ |
| nfreqs = (int *) palloc0(sizeof(int) * numattrs); |
| freqs = build_column_frequencies(groups, ngroups, mss, nfreqs); |
| |
| /* |
| * Allocate the MCV list structure, set the global parameters. |
| */ |
| mcvlist = (MCVList *) palloc0(offsetof(MCVList, items) + |
| sizeof(MCVItem) * nitems); |
| |
| mcvlist->magic = STATS_MCV_MAGIC; |
| mcvlist->type = STATS_MCV_TYPE_BASIC; |
| mcvlist->ndimensions = numattrs; |
| mcvlist->nitems = nitems; |
| |
| /* store info about data type OIDs */ |
| for (i = 0; i < numattrs; i++) |
| mcvlist->types[i] = data->stats[i]->attrtypid; |
| |
| /* Copy the first chunk of groups into the result. */ |
| for (i = 0; i < nitems; i++) |
| { |
| /* just point to the proper place in the list */ |
| MCVItem *item = &mcvlist->items[i]; |
| |
| item->values = (Datum *) palloc(sizeof(Datum) * numattrs); |
| item->isnull = (bool *) palloc(sizeof(bool) * numattrs); |
| |
| /* copy values for the group */ |
| memcpy(item->values, groups[i].values, sizeof(Datum) * numattrs); |
| memcpy(item->isnull, groups[i].isnull, sizeof(bool) * numattrs); |
| |
| /* groups should be sorted by frequency in descending order */ |
| Assert((i == 0) || (groups[i - 1].count >= groups[i].count)); |
| |
| /* group frequency */ |
| item->frequency = (double) groups[i].count / numrows; |
| |
| /* base frequency, if the attributes were independent */ |
| item->base_frequency = 1.0; |
| for (j = 0; j < numattrs; j++) |
| { |
| SortItem *freq; |
| |
| /* single dimension */ |
| tmp->ndims = 1; |
| tmp->ssup[0] = mss->ssup[j]; |
| |
| /* fill search key */ |
| key.values = &groups[i].values[j]; |
| key.isnull = &groups[i].isnull[j]; |
| |
| freq = (SortItem *) bsearch_arg(&key, freqs[j], nfreqs[j], |
| sizeof(SortItem), |
| multi_sort_compare, tmp); |
| |
| item->base_frequency *= ((double) freq->count) / numrows; |
| } |
| } |
| |
| pfree(nfreqs); |
| pfree(freqs); |
| } |
| |
| pfree(items); |
| pfree(groups); |
| |
| return mcvlist; |
| } |
| |
| /* |
| * build_mss |
| * Build a MultiSortSupport for the given StatsBuildData. |
| */ |
| static MultiSortSupport |
| build_mss(StatsBuildData *data) |
| { |
| int i; |
| int numattrs = data->nattnums; |
| |
| /* Sort by multiple columns (using array of SortSupport) */ |
| MultiSortSupport mss = multi_sort_init(numattrs); |
| |
| /* prepare the sort functions for all the attributes */ |
| for (i = 0; i < numattrs; i++) |
| { |
| VacAttrStats *colstat = data->stats[i]; |
| TypeCacheEntry *type; |
| |
| type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR); |
| if (type->lt_opr == InvalidOid) /* shouldn't happen */ |
| elog(ERROR, "cache lookup failed for ordering operator for type %u", |
| colstat->attrtypid); |
| |
| multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid); |
| } |
| |
| return mss; |
| } |
| |
| /* |
| * count_distinct_groups |
| * Count distinct combinations of SortItems in the array. |
| * |
| * The array is assumed to be sorted according to the MultiSortSupport. |
| */ |
| static int |
| count_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss) |
| { |
| int i; |
| int ndistinct; |
| |
| ndistinct = 1; |
| for (i = 1; i < numrows; i++) |
| { |
| /* make sure the array really is sorted */ |
| Assert(multi_sort_compare(&items[i], &items[i - 1], mss) >= 0); |
| |
| if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0) |
| ndistinct += 1; |
| } |
| |
| return ndistinct; |
| } |
| |
| /* |
| * compare_sort_item_count |
| * Comparator for sorting items by count (frequencies) in descending |
| * order. |
| */ |
| static int |
| compare_sort_item_count(const void *a, const void *b, void *arg) |
| { |
| SortItem *ia = (SortItem *) a; |
| SortItem *ib = (SortItem *) b; |
| |
| if (ia->count == ib->count) |
| return 0; |
| else if (ia->count > ib->count) |
| return -1; |
| |
| return 1; |
| } |
| |
| /* |
| * build_distinct_groups |
| * Build an array of SortItems for distinct groups and counts matching |
| * items. |
| * |
| * The 'items' array is assumed to be sorted. |
| */ |
| static SortItem * |
| build_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss, |
| int *ndistinct) |
| { |
| int i, |
| j; |
| int ngroups = count_distinct_groups(numrows, items, mss); |
| |
| SortItem *groups = (SortItem *) palloc(ngroups * sizeof(SortItem)); |
| |
| j = 0; |
| groups[0] = items[0]; |
| groups[0].count = 1; |
| |
| for (i = 1; i < numrows; i++) |
| { |
| /* Assume sorted in ascending order. */ |
| Assert(multi_sort_compare(&items[i], &items[i - 1], mss) >= 0); |
| |
| /* New distinct group detected. */ |
| if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0) |
| { |
| groups[++j] = items[i]; |
| groups[j].count = 0; |
| } |
| |
| groups[j].count++; |
| } |
| |
| /* ensure we filled the expected number of distinct groups */ |
| Assert(j + 1 == ngroups); |
| |
| /* Sort the distinct groups by frequency (in descending order). */ |
| qsort_interruptible(groups, ngroups, sizeof(SortItem), |
| compare_sort_item_count, NULL); |
| |
| *ndistinct = ngroups; |
| return groups; |
| } |
| |
| /* compare sort items (single dimension) */ |
| static int |
| sort_item_compare(const void *a, const void *b, void *arg) |
| { |
| SortSupport ssup = (SortSupport) arg; |
| SortItem *ia = (SortItem *) a; |
| SortItem *ib = (SortItem *) b; |
| |
| return ApplySortComparator(ia->values[0], ia->isnull[0], |
| ib->values[0], ib->isnull[0], |
| ssup); |
| } |
| |
| /* |
| * build_column_frequencies |
| * Compute frequencies of values in each column. |
| * |
| * This returns an array of SortItems for each attribute the MCV is built |
| * on, with a frequency (number of occurrences) for each value. This is |
| * then used to compute "base" frequency of MCV items. |
| * |
| * All the memory is allocated in a single chunk, so that a single pfree |
| * is enough to release it. We do not allocate space for values/isnull |
| * arrays in the SortItems, because we can simply point into the input |
| * groups directly. |
| */ |
| static SortItem ** |
| build_column_frequencies(SortItem *groups, int ngroups, |
| MultiSortSupport mss, int *ncounts) |
| { |
| int i, |
| dim; |
| SortItem **result; |
| char *ptr; |
| |
| Assert(groups); |
| Assert(ncounts); |
| |
| /* allocate arrays for all columns as a single chunk */ |
| ptr = palloc(MAXALIGN(sizeof(SortItem *) * mss->ndims) + |
| mss->ndims * MAXALIGN(sizeof(SortItem) * ngroups)); |
| |
| /* initial array of pointers */ |
| result = (SortItem **) ptr; |
| ptr += MAXALIGN(sizeof(SortItem *) * mss->ndims); |
| |
| for (dim = 0; dim < mss->ndims; dim++) |
| { |
| SortSupport ssup = &mss->ssup[dim]; |
| |
| /* array of values for a single column */ |
| result[dim] = (SortItem *) ptr; |
| ptr += MAXALIGN(sizeof(SortItem) * ngroups); |
| |
| /* extract data for the dimension */ |
| for (i = 0; i < ngroups; i++) |
| { |
| /* point into the input groups */ |
| result[dim][i].values = &groups[i].values[dim]; |
| result[dim][i].isnull = &groups[i].isnull[dim]; |
| result[dim][i].count = groups[i].count; |
| } |
| |
| /* sort the values, deduplicate */ |
| qsort_interruptible(result[dim], ngroups, sizeof(SortItem), |
| sort_item_compare, ssup); |
| |
| /* |
| * Identify distinct values, compute frequency (there might be |
| * multiple MCV items containing this value, so we need to sum counts |
| * from all of them. |
| */ |
| ncounts[dim] = 1; |
| for (i = 1; i < ngroups; i++) |
| { |
| if (sort_item_compare(&result[dim][i - 1], &result[dim][i], ssup) == 0) |
| { |
| result[dim][ncounts[dim] - 1].count += result[dim][i].count; |
| continue; |
| } |
| |
| result[dim][ncounts[dim]] = result[dim][i]; |
| |
| ncounts[dim]++; |
| } |
| } |
| |
| return result; |
| } |
| |
| /* |
| * statext_mcv_load |
| * Load the MCV list for the indicated pg_statistic_ext_data tuple. |
| */ |
| MCVList * |
| statext_mcv_load(Oid mvoid, bool inh) |
| { |
| MCVList *result; |
| bool isnull; |
| Datum mcvlist; |
| HeapTuple htup = SearchSysCache2(STATEXTDATASTXOID, |
| ObjectIdGetDatum(mvoid), BoolGetDatum(inh)); |
| |
| if (!HeapTupleIsValid(htup)) |
| elog(ERROR, "cache lookup failed for statistics object %u", mvoid); |
| |
| mcvlist = SysCacheGetAttr(STATEXTDATASTXOID, htup, |
| Anum_pg_statistic_ext_data_stxdmcv, &isnull); |
| |
| if (isnull) |
| elog(ERROR, |
| "requested statistics kind \"%c\" is not yet built for statistics object %u", |
| STATS_EXT_MCV, mvoid); |
| |
| result = statext_mcv_deserialize(DatumGetByteaP(mcvlist)); |
| |
| ReleaseSysCache(htup); |
| |
| return result; |
| } |
| |
| |
| /* |
| * statext_mcv_serialize |
| * Serialize MCV list into a pg_mcv_list value. |
| * |
| * The MCV items may include values of various data types, and it's reasonable |
| * to expect redundancy (values for a given attribute, repeated for multiple |
| * MCV list items). So we deduplicate the values into arrays, and then replace |
| * the values by indexes into those arrays. |
| * |
| * The overall structure of the serialized representation looks like this: |
| * |
| * +---------------+----------------+---------------------+-------+ |
| * | header fields | dimension info | deduplicated values | items | |
| * +---------------+----------------+---------------------+-------+ |
| * |
| * Where dimension info stores information about the type of the K-th |
| * attribute (e.g. typlen, typbyval and length of deduplicated values). |
| * Deduplicated values store deduplicated values for each attribute. And |
| * items store the actual MCV list items, with values replaced by indexes into |
| * the arrays. |
| * |
| * When serializing the items, we use uint16 indexes. The number of MCV items |
| * is limited by the statistics target (which is capped to 10k at the moment). |
| * We might increase this to 65k and still fit into uint16, so there's a bit of |
| * slack. Furthermore, this limit is on the number of distinct values per column, |
| * and we usually have few of those (and various combinations of them for the |
| * those MCV list). So uint16 seems fine for now. |
| * |
| * We don't really expect the serialization to save as much space as for |
| * histograms, as we are not doing any bucket splits (which is the source |
| * of high redundancy in histograms). |
| * |
| * TODO: Consider packing boolean flags (NULL) for each item into a single char |
| * (or a longer type) instead of using an array of bool items. |
| */ |
| bytea * |
| statext_mcv_serialize(MCVList *mcvlist, VacAttrStats **stats) |
| { |
| int i; |
| int dim; |
| int ndims = mcvlist->ndimensions; |
| |
| SortSupport ssup; |
| DimensionInfo *info; |
| |
| Size total_length; |
| |
| /* serialized items (indexes into arrays, etc.) */ |
| bytea *raw; |
| char *ptr; |
| char *endptr PG_USED_FOR_ASSERTS_ONLY; |
| |
| /* values per dimension (and number of non-NULL values) */ |
| Datum **values = (Datum **) palloc0(sizeof(Datum *) * ndims); |
| int *counts = (int *) palloc0(sizeof(int) * ndims); |
| |
| /* |
| * We'll include some rudimentary information about the attribute types |
| * (length, by-val flag), so that we don't have to look them up while |
| * deserializing the MCV list (we already have the type OID in the |
| * header). This is safe because when changing the type of the attribute |
| * the statistics gets dropped automatically. We need to store the info |
| * about the arrays of deduplicated values anyway. |
| */ |
| info = (DimensionInfo *) palloc0(sizeof(DimensionInfo) * ndims); |
| |
| /* sort support data for all attributes included in the MCV list */ |
| ssup = (SortSupport) palloc0(sizeof(SortSupportData) * ndims); |
| |
| /* collect and deduplicate values for each dimension (attribute) */ |
| for (dim = 0; dim < ndims; dim++) |
| { |
| int ndistinct; |
| TypeCacheEntry *typentry; |
| |
| /* |
| * Lookup the LT operator (can't get it from stats extra_data, as we |
| * don't know how to interpret that - scalar vs. array etc.). |
| */ |
| typentry = lookup_type_cache(stats[dim]->attrtypid, TYPECACHE_LT_OPR); |
| |
| /* copy important info about the data type (length, by-value) */ |
| info[dim].typlen = stats[dim]->attrtype->typlen; |
| info[dim].typbyval = stats[dim]->attrtype->typbyval; |
| |
| /* allocate space for values in the attribute and collect them */ |
| values[dim] = (Datum *) palloc0(sizeof(Datum) * mcvlist->nitems); |
| |
| for (i = 0; i < mcvlist->nitems; i++) |
| { |
| /* skip NULL values - we don't need to deduplicate those */ |
| if (mcvlist->items[i].isnull[dim]) |
| continue; |
| |
| /* append the value at the end */ |
| values[dim][counts[dim]] = mcvlist->items[i].values[dim]; |
| counts[dim] += 1; |
| } |
| |
| /* if there are just NULL values in this dimension, we're done */ |
| if (counts[dim] == 0) |
| continue; |
| |
| /* sort and deduplicate the data */ |
| ssup[dim].ssup_cxt = CurrentMemoryContext; |
| ssup[dim].ssup_collation = stats[dim]->attrcollid; |
| ssup[dim].ssup_nulls_first = false; |
| |
| PrepareSortSupportFromOrderingOp(typentry->lt_opr, &ssup[dim]); |
| |
| qsort_interruptible(values[dim], counts[dim], sizeof(Datum), |
| compare_scalars_simple, &ssup[dim]); |
| |
| /* |
| * Walk through the array and eliminate duplicate values, but keep the |
| * ordering (so that we can do a binary search later). We know there's |
| * at least one item as (counts[dim] != 0), so we can skip the first |
| * element. |
| */ |
| ndistinct = 1; /* number of distinct values */ |
| for (i = 1; i < counts[dim]; i++) |
| { |
| /* expect sorted array */ |
| Assert(compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]) <= 0); |
| |
| /* if the value is the same as the previous one, we can skip it */ |
| if (!compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim])) |
| continue; |
| |
| values[dim][ndistinct] = values[dim][i]; |
| ndistinct += 1; |
| } |
| |
| /* we must not exceed PG_UINT16_MAX, as we use uint16 indexes */ |
| Assert(ndistinct <= PG_UINT16_MAX); |
| |
| /* |
| * Store additional info about the attribute - number of deduplicated |
| * values, and also size of the serialized data. For fixed-length data |
| * types this is trivial to compute, for varwidth types we need to |
| * actually walk the array and sum the sizes. |
| */ |
| info[dim].nvalues = ndistinct; |
| |
| if (info[dim].typbyval) /* by-value data types */ |
| { |
| info[dim].nbytes = info[dim].nvalues * info[dim].typlen; |
| |
| /* |
| * We copy the data into the MCV item during deserialization, so |
| * we don't need to allocate any extra space. |
| */ |
| info[dim].nbytes_aligned = 0; |
| } |
| else if (info[dim].typlen > 0) /* fixed-length by-ref */ |
| { |
| /* |
| * We don't care about alignment in the serialized data, so we |
| * pack the data as much as possible. But we also track how much |
| * data will be needed after deserialization, and in that case we |
| * need to account for alignment of each item. |
| * |
| * Note: As the items are fixed-length, we could easily compute |
| * this during deserialization, but we do it here anyway. |
| */ |
| info[dim].nbytes = info[dim].nvalues * info[dim].typlen; |
| info[dim].nbytes_aligned = info[dim].nvalues * MAXALIGN(info[dim].typlen); |
| } |
| else if (info[dim].typlen == -1) /* varlena */ |
| { |
| info[dim].nbytes = 0; |
| info[dim].nbytes_aligned = 0; |
| for (i = 0; i < info[dim].nvalues; i++) |
| { |
| Size len; |
| |
| /* |
| * For varlena values, we detoast the values and store the |
| * length and data separately. We don't bother with alignment |
| * here, which means that during deserialization we need to |
| * copy the fields and only access the copies. |
| */ |
| values[dim][i] = PointerGetDatum(PG_DETOAST_DATUM(values[dim][i])); |
| |
| /* serialized length (uint32 length + data) */ |
| len = VARSIZE_ANY_EXHDR(values[dim][i]); |
| info[dim].nbytes += sizeof(uint32); /* length */ |
| info[dim].nbytes += len; /* value (no header) */ |
| |
| /* |
| * During deserialization we'll build regular varlena values |
| * with full headers, and we need to align them properly. |
| */ |
| info[dim].nbytes_aligned += MAXALIGN(VARHDRSZ + len); |
| } |
| } |
| else if (info[dim].typlen == -2) /* cstring */ |
| { |
| info[dim].nbytes = 0; |
| info[dim].nbytes_aligned = 0; |
| for (i = 0; i < info[dim].nvalues; i++) |
| { |
| Size len; |
| |
| /* |
| * cstring is handled similar to varlena - first we store the |
| * length as uint32 and then the data. We don't care about |
| * alignment, which means that during deserialization we need |
| * to copy the fields and only access the copies. |
| */ |
| |
| /* c-strings include terminator, so +1 byte */ |
| len = strlen(DatumGetCString(values[dim][i])) + 1; |
| info[dim].nbytes += sizeof(uint32); /* length */ |
| info[dim].nbytes += len; /* value */ |
| |
| /* space needed for properly aligned deserialized copies */ |
| info[dim].nbytes_aligned += MAXALIGN(len); |
| } |
| } |
| |
| /* we know (count>0) so there must be some data */ |
| Assert(info[dim].nbytes > 0); |
| } |
| |
| /* |
| * Now we can finally compute how much space we'll actually need for the |
| * whole serialized MCV list (varlena header, MCV header, dimension info |
| * for each attribute, deduplicated values and items). |
| */ |
| total_length = (3 * sizeof(uint32)) /* magic + type + nitems */ |
| + sizeof(AttrNumber) /* ndimensions */ |
| + (ndims * sizeof(Oid)); /* attribute types */ |
| |
| /* dimension info */ |
| total_length += ndims * sizeof(DimensionInfo); |
| |
| /* add space for the arrays of deduplicated values */ |
| for (i = 0; i < ndims; i++) |
| total_length += info[i].nbytes; |
| |
| /* |
| * And finally account for the items (those are fixed-length, thanks to |
| * replacing values with uint16 indexes into the deduplicated arrays). |
| */ |
| total_length += mcvlist->nitems * ITEM_SIZE(dim); |
| |
| /* |
| * Allocate space for the whole serialized MCV list (we'll skip bytes, so |
| * we set them to zero to make the result more compressible). |
| */ |
| raw = (bytea *) palloc0(VARHDRSZ + total_length); |
| SET_VARSIZE(raw, VARHDRSZ + total_length); |
| |
| ptr = VARDATA(raw); |
| endptr = ptr + total_length; |
| |
| /* copy the MCV list header fields, one by one */ |
| memcpy(ptr, &mcvlist->magic, sizeof(uint32)); |
| ptr += sizeof(uint32); |
| |
| memcpy(ptr, &mcvlist->type, sizeof(uint32)); |
| ptr += sizeof(uint32); |
| |
| memcpy(ptr, &mcvlist->nitems, sizeof(uint32)); |
| ptr += sizeof(uint32); |
| |
| memcpy(ptr, &mcvlist->ndimensions, sizeof(AttrNumber)); |
| ptr += sizeof(AttrNumber); |
| |
| memcpy(ptr, mcvlist->types, sizeof(Oid) * ndims); |
| ptr += (sizeof(Oid) * ndims); |
| |
| /* store information about the attributes (data amounts, ...) */ |
| memcpy(ptr, info, sizeof(DimensionInfo) * ndims); |
| ptr += sizeof(DimensionInfo) * ndims; |
| |
| /* Copy the deduplicated values for all attributes to the output. */ |
| for (dim = 0; dim < ndims; dim++) |
| { |
| /* remember the starting point for Asserts later */ |
| char *start PG_USED_FOR_ASSERTS_ONLY = ptr; |
| |
| for (i = 0; i < info[dim].nvalues; i++) |
| { |
| Datum value = values[dim][i]; |
| |
| if (info[dim].typbyval) /* passed by value */ |
| { |
| Datum tmp; |
| |
| /* |
| * For byval types, we need to copy just the significant bytes |
| * - we can't use memcpy directly, as that assumes |
| * little-endian behavior. store_att_byval does almost what |
| * we need, but it requires a properly aligned buffer - the |
| * output buffer does not guarantee that. So we simply use a |
| * local Datum variable (which guarantees proper alignment), |
| * and then copy the value from it. |
| */ |
| store_att_byval(&tmp, value, info[dim].typlen); |
| |
| memcpy(ptr, &tmp, info[dim].typlen); |
| ptr += info[dim].typlen; |
| } |
| else if (info[dim].typlen > 0) /* passed by reference */ |
| { |
| /* no special alignment needed, treated as char array */ |
| memcpy(ptr, DatumGetPointer(value), info[dim].typlen); |
| ptr += info[dim].typlen; |
| } |
| else if (info[dim].typlen == -1) /* varlena */ |
| { |
| uint32 len = VARSIZE_ANY_EXHDR(DatumGetPointer(value)); |
| |
| /* copy the length */ |
| memcpy(ptr, &len, sizeof(uint32)); |
| ptr += sizeof(uint32); |
| |
| /* data from the varlena value (without the header) */ |
| memcpy(ptr, VARDATA_ANY(DatumGetPointer(value)), len); |
| ptr += len; |
| } |
| else if (info[dim].typlen == -2) /* cstring */ |
| { |
| uint32 len = (uint32) strlen(DatumGetCString(value)) + 1; |
| |
| /* copy the length */ |
| memcpy(ptr, &len, sizeof(uint32)); |
| ptr += sizeof(uint32); |
| |
| /* value */ |
| memcpy(ptr, DatumGetCString(value), len); |
| ptr += len; |
| } |
| |
| /* no underflows or overflows */ |
| Assert((ptr > start) && ((ptr - start) <= info[dim].nbytes)); |
| } |
| |
| /* we should get exactly nbytes of data for this dimension */ |
| Assert((ptr - start) == info[dim].nbytes); |
| } |
| |
| /* Serialize the items, with uint16 indexes instead of the values. */ |
| for (i = 0; i < mcvlist->nitems; i++) |
| { |
| MCVItem *mcvitem = &mcvlist->items[i]; |
| |
| /* don't write beyond the allocated space */ |
| Assert(ptr <= (endptr - ITEM_SIZE(dim))); |
| |
| /* copy NULL and frequency flags into the serialized MCV */ |
| memcpy(ptr, mcvitem->isnull, sizeof(bool) * ndims); |
| ptr += sizeof(bool) * ndims; |
| |
| memcpy(ptr, &mcvitem->frequency, sizeof(double)); |
| ptr += sizeof(double); |
| |
| memcpy(ptr, &mcvitem->base_frequency, sizeof(double)); |
| ptr += sizeof(double); |
| |
| /* store the indexes last */ |
| for (dim = 0; dim < ndims; dim++) |
| { |
| uint16 index = 0; |
| Datum *value; |
| |
| /* do the lookup only for non-NULL values */ |
| if (!mcvitem->isnull[dim]) |
| { |
| value = (Datum *) bsearch_arg(&mcvitem->values[dim], values[dim], |
| info[dim].nvalues, sizeof(Datum), |
| compare_scalars_simple, &ssup[dim]); |
| |
| Assert(value != NULL); /* serialization or deduplication |
| * error */ |
| |
| /* compute index within the deduplicated array */ |
| index = (uint16) (value - values[dim]); |
| |
| /* check the index is within expected bounds */ |
| Assert(index < info[dim].nvalues); |
| } |
| |
| /* copy the index into the serialized MCV */ |
| memcpy(ptr, &index, sizeof(uint16)); |
| ptr += sizeof(uint16); |
| } |
| |
| /* make sure we don't overflow the allocated value */ |
| Assert(ptr <= endptr); |
| } |
| |
| /* at this point we expect to match the total_length exactly */ |
| Assert(ptr == endptr); |
| |
| pfree(values); |
| pfree(counts); |
| |
| return raw; |
| } |
| |
| /* |
| * statext_mcv_deserialize |
| * Reads serialized MCV list into MCVList structure. |
| * |
| * All the memory needed by the MCV list is allocated as a single chunk, so |
| * it's possible to simply pfree() it at once. |
| */ |
| MCVList * |
| statext_mcv_deserialize(bytea *data) |
| { |
| int dim, |
| i; |
| Size expected_size; |
| MCVList *mcvlist; |
| char *raw; |
| char *ptr; |
| char *endptr PG_USED_FOR_ASSERTS_ONLY; |
| |
| int ndims, |
| nitems; |
| DimensionInfo *info = NULL; |
| |
| /* local allocation buffer (used only for deserialization) */ |
| Datum **map = NULL; |
| |
| /* MCV list */ |
| Size mcvlen; |
| |
| /* buffer used for the result */ |
| Size datalen; |
| char *dataptr; |
| char *valuesptr; |
| char *isnullptr; |
| |
| if (data == NULL) |
| return NULL; |
| |
| /* |
| * We can't possibly deserialize a MCV list if there's not even a complete |
| * header. We need an explicit formula here, because we serialize the |
| * header fields one by one, so we need to ignore struct alignment. |
| */ |
| if (VARSIZE_ANY(data) < MinSizeOfMCVList) |
| elog(ERROR, "invalid MCV size %zu (expected at least %zu)", |
| VARSIZE_ANY(data), MinSizeOfMCVList); |
| |
| /* read the MCV list header */ |
| mcvlist = (MCVList *) palloc0(offsetof(MCVList, items)); |
| |
| /* pointer to the data part (skip the varlena header) */ |
| raw = (char *) data; |
| ptr = VARDATA_ANY(raw); |
| endptr = (char *) raw + VARSIZE_ANY(data); |
| |
| /* get the header and perform further sanity checks */ |
| memcpy(&mcvlist->magic, ptr, sizeof(uint32)); |
| ptr += sizeof(uint32); |
| |
| memcpy(&mcvlist->type, ptr, sizeof(uint32)); |
| ptr += sizeof(uint32); |
| |
| memcpy(&mcvlist->nitems, ptr, sizeof(uint32)); |
| ptr += sizeof(uint32); |
| |
| memcpy(&mcvlist->ndimensions, ptr, sizeof(AttrNumber)); |
| ptr += sizeof(AttrNumber); |
| |
| if (mcvlist->magic != STATS_MCV_MAGIC) |
| elog(ERROR, "invalid MCV magic %u (expected %u)", |
| mcvlist->magic, STATS_MCV_MAGIC); |
| |
| if (mcvlist->type != STATS_MCV_TYPE_BASIC) |
| elog(ERROR, "invalid MCV type %u (expected %u)", |
| mcvlist->type, STATS_MCV_TYPE_BASIC); |
| |
| if (mcvlist->ndimensions == 0) |
| elog(ERROR, "invalid zero-length dimension array in MCVList"); |
| else if ((mcvlist->ndimensions > STATS_MAX_DIMENSIONS) || |
| (mcvlist->ndimensions < 0)) |
| elog(ERROR, "invalid length (%d) dimension array in MCVList", |
| mcvlist->ndimensions); |
| |
| if (mcvlist->nitems == 0) |
| elog(ERROR, "invalid zero-length item array in MCVList"); |
| else if (mcvlist->nitems > STATS_MCVLIST_MAX_ITEMS) |
| elog(ERROR, "invalid length (%u) item array in MCVList", |
| mcvlist->nitems); |
| |
| nitems = mcvlist->nitems; |
| ndims = mcvlist->ndimensions; |
| |
| /* |
| * Check amount of data including DimensionInfo for all dimensions and |
| * also the serialized items (including uint16 indexes). Also, walk |
| * through the dimension information and add it to the sum. |
| */ |
| expected_size = SizeOfMCVList(ndims, nitems); |
| |
| /* |
| * Check that we have at least the dimension and info records, along with |
| * the items. We don't know the size of the serialized values yet. We need |
| * to do this check first, before accessing the dimension info. |
| */ |
| if (VARSIZE_ANY(data) < expected_size) |
| elog(ERROR, "invalid MCV size %zu (expected %zu)", |
| VARSIZE_ANY(data), expected_size); |
| |
| /* Now copy the array of type Oids. */ |
| memcpy(mcvlist->types, ptr, sizeof(Oid) * ndims); |
| ptr += (sizeof(Oid) * ndims); |
| |
| /* Now it's safe to access the dimension info. */ |
| info = palloc(ndims * sizeof(DimensionInfo)); |
| |
| memcpy(info, ptr, ndims * sizeof(DimensionInfo)); |
| ptr += (ndims * sizeof(DimensionInfo)); |
| |
| /* account for the value arrays */ |
| for (dim = 0; dim < ndims; dim++) |
| { |
| /* |
| * XXX I wonder if we can/should rely on asserts here. Maybe those |
| * checks should be done every time? |
| */ |
| Assert(info[dim].nvalues >= 0); |
| Assert(info[dim].nbytes >= 0); |
| |
| expected_size += info[dim].nbytes; |
| } |
| |
| /* |
| * Now we know the total expected MCV size, including all the pieces |
| * (header, dimension info. items and deduplicated data). So do the final |
| * check on size. |
| */ |
| if (VARSIZE_ANY(data) != expected_size) |
| elog(ERROR, "invalid MCV size %zu (expected %zu)", |
| VARSIZE_ANY(data), expected_size); |
| |
| /* |
| * We need an array of Datum values for each dimension, so that we can |
| * easily translate the uint16 indexes later. We also need a top-level |
| * array of pointers to those per-dimension arrays. |
| * |
| * While allocating the arrays for dimensions, compute how much space we |
| * need for a copy of the by-ref data, as we can't simply point to the |
| * original values (it might go away). |
| */ |
| datalen = 0; /* space for by-ref data */ |
| map = (Datum **) palloc(ndims * sizeof(Datum *)); |
| |
| for (dim = 0; dim < ndims; dim++) |
| { |
| map[dim] = (Datum *) palloc(sizeof(Datum) * info[dim].nvalues); |
| |
| /* space needed for a copy of data for by-ref types */ |
| datalen += info[dim].nbytes_aligned; |
| } |
| |
| /* |
| * Now resize the MCV list so that the allocation includes all the data. |
| * |
| * Allocate space for a copy of the data, as we can't simply reference the |
| * serialized data - it's not aligned properly, and it may disappear while |
| * we're still using the MCV list, e.g. due to catcache release. |
| * |
| * We do care about alignment here, because we will allocate all the |
| * pieces at once, but then use pointers to different parts. |
| */ |
| mcvlen = MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems)); |
| |
| /* arrays of values and isnull flags for all MCV items */ |
| mcvlen += nitems * MAXALIGN(sizeof(Datum) * ndims); |
| mcvlen += nitems * MAXALIGN(sizeof(bool) * ndims); |
| |
| /* we don't quite need to align this, but it makes some asserts easier */ |
| mcvlen += MAXALIGN(datalen); |
| |
| /* now resize the deserialized MCV list, and compute pointers to parts */ |
| mcvlist = repalloc(mcvlist, mcvlen); |
| |
| /* pointer to the beginning of values/isnull arrays */ |
| valuesptr = (char *) mcvlist |
| + MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems)); |
| |
| isnullptr = valuesptr + (nitems * MAXALIGN(sizeof(Datum) * ndims)); |
| |
| dataptr = isnullptr + (nitems * MAXALIGN(sizeof(bool) * ndims)); |
| |
| /* |
| * Build mapping (index => value) for translating the serialized data into |
| * the in-memory representation. |
| */ |
| for (dim = 0; dim < ndims; dim++) |
| { |
| /* remember start position in the input array */ |
| char *start PG_USED_FOR_ASSERTS_ONLY = ptr; |
| |
| if (info[dim].typbyval) |
| { |
| /* for by-val types we simply copy data into the mapping */ |
| for (i = 0; i < info[dim].nvalues; i++) |
| { |
| Datum v = 0; |
| |
| memcpy(&v, ptr, info[dim].typlen); |
| ptr += info[dim].typlen; |
| |
| map[dim][i] = fetch_att(&v, true, info[dim].typlen); |
| |
| /* no under/overflow of input array */ |
| Assert(ptr <= (start + info[dim].nbytes)); |
| } |
| } |
| else |
| { |
| /* for by-ref types we need to also make a copy of the data */ |
| |
| /* passed by reference, but fixed length (name, tid, ...) */ |
| if (info[dim].typlen > 0) |
| { |
| for (i = 0; i < info[dim].nvalues; i++) |
| { |
| memcpy(dataptr, ptr, info[dim].typlen); |
| ptr += info[dim].typlen; |
| |
| /* just point into the array */ |
| map[dim][i] = PointerGetDatum(dataptr); |
| dataptr += MAXALIGN(info[dim].typlen); |
| } |
| } |
| else if (info[dim].typlen == -1) |
| { |
| /* varlena */ |
| for (i = 0; i < info[dim].nvalues; i++) |
| { |
| uint32 len; |
| |
| /* read the uint32 length */ |
| memcpy(&len, ptr, sizeof(uint32)); |
| ptr += sizeof(uint32); |
| |
| /* the length is data-only */ |
| SET_VARSIZE(dataptr, len + VARHDRSZ); |
| memcpy(VARDATA(dataptr), ptr, len); |
| ptr += len; |
| |
| /* just point into the array */ |
| map[dim][i] = PointerGetDatum(dataptr); |
| |
| /* skip to place of the next deserialized value */ |
| dataptr += MAXALIGN(len + VARHDRSZ); |
| } |
| } |
| else if (info[dim].typlen == -2) |
| { |
| /* cstring */ |
| for (i = 0; i < info[dim].nvalues; i++) |
| { |
| uint32 len; |
| |
| memcpy(&len, ptr, sizeof(uint32)); |
| ptr += sizeof(uint32); |
| |
| memcpy(dataptr, ptr, len); |
| ptr += len; |
| |
| /* just point into the array */ |
| map[dim][i] = PointerGetDatum(dataptr); |
| dataptr += MAXALIGN(len); |
| } |
| } |
| |
| /* no under/overflow of input array */ |
| Assert(ptr <= (start + info[dim].nbytes)); |
| |
| /* no overflow of the output mcv value */ |
| Assert(dataptr <= ((char *) mcvlist + mcvlen)); |
| } |
| |
| /* check we consumed input data for this dimension exactly */ |
| Assert(ptr == (start + info[dim].nbytes)); |
| } |
| |
| /* we should have also filled the MCV list exactly */ |
| Assert(dataptr == ((char *) mcvlist + mcvlen)); |
| |
| /* deserialize the MCV items and translate the indexes to Datums */ |
| for (i = 0; i < nitems; i++) |
| { |
| MCVItem *item = &mcvlist->items[i]; |
| |
| item->values = (Datum *) valuesptr; |
| valuesptr += MAXALIGN(sizeof(Datum) * ndims); |
| |
| item->isnull = (bool *) isnullptr; |
| isnullptr += MAXALIGN(sizeof(bool) * ndims); |
| |
| memcpy(item->isnull, ptr, sizeof(bool) * ndims); |
| ptr += sizeof(bool) * ndims; |
| |
| memcpy(&item->frequency, ptr, sizeof(double)); |
| ptr += sizeof(double); |
| |
| memcpy(&item->base_frequency, ptr, sizeof(double)); |
| ptr += sizeof(double); |
| |
| /* finally translate the indexes (for non-NULL only) */ |
| for (dim = 0; dim < ndims; dim++) |
| { |
| uint16 index; |
| |
| memcpy(&index, ptr, sizeof(uint16)); |
| ptr += sizeof(uint16); |
| |
| if (item->isnull[dim]) |
| continue; |
| |
| item->values[dim] = map[dim][index]; |
| } |
| |
| /* check we're not overflowing the input */ |
| Assert(ptr <= endptr); |
| } |
| |
| /* check that we processed all the data */ |
| Assert(ptr == endptr); |
| |
| /* release the buffers used for mapping */ |
| for (dim = 0; dim < ndims; dim++) |
| pfree(map[dim]); |
| |
| pfree(map); |
| |
| return mcvlist; |
| } |
| |
| /* |
| * SRF with details about buckets of a histogram: |
| * |
| * - item ID (0...nitems) |
| * - values (string array) |
| * - nulls only (boolean array) |
| * - frequency (double precision) |
| * - base_frequency (double precision) |
| * |
| * The input is the OID of the statistics, and there are no rows returned if |
| * the statistics contains no histogram. |
| */ |
| Datum |
| pg_stats_ext_mcvlist_items(PG_FUNCTION_ARGS) |
| { |
| FuncCallContext *funcctx; |
| |
| /* stuff done only on the first call of the function */ |
| if (SRF_IS_FIRSTCALL()) |
| { |
| MemoryContext oldcontext; |
| MCVList *mcvlist; |
| TupleDesc tupdesc; |
| |
| /* create a function context for cross-call persistence */ |
| funcctx = SRF_FIRSTCALL_INIT(); |
| |
| /* switch to memory context appropriate for multiple function calls */ |
| oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); |
| |
| mcvlist = statext_mcv_deserialize(PG_GETARG_BYTEA_P(0)); |
| |
| funcctx->user_fctx = mcvlist; |
| |
| /* total number of tuples to be returned */ |
| funcctx->max_calls = 0; |
| if (funcctx->user_fctx != NULL) |
| funcctx->max_calls = mcvlist->nitems; |
| |
| /* Build a tuple descriptor for our result type */ |
| if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) |
| ereport(ERROR, |
| (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
| errmsg("function returning record called in context " |
| "that cannot accept type record"))); |
| tupdesc = BlessTupleDesc(tupdesc); |
| |
| /* |
| * generate attribute metadata needed later to produce tuples from raw |
| * C strings |
| */ |
| funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc); |
| |
| MemoryContextSwitchTo(oldcontext); |
| } |
| |
| /* stuff done on every call of the function */ |
| funcctx = SRF_PERCALL_SETUP(); |
| |
| if (funcctx->call_cntr < funcctx->max_calls) /* do when there is more |
| * left to send */ |
| { |
| Datum values[5]; |
| bool nulls[5]; |
| HeapTuple tuple; |
| Datum result; |
| ArrayBuildState *astate_values = NULL; |
| ArrayBuildState *astate_nulls = NULL; |
| |
| int i; |
| MCVList *mcvlist; |
| MCVItem *item; |
| |
| mcvlist = (MCVList *) funcctx->user_fctx; |
| |
| Assert(funcctx->call_cntr < mcvlist->nitems); |
| |
| item = &mcvlist->items[funcctx->call_cntr]; |
| |
| for (i = 0; i < mcvlist->ndimensions; i++) |
| { |
| |
| astate_nulls = accumArrayResult(astate_nulls, |
| BoolGetDatum(item->isnull[i]), |
| false, |
| BOOLOID, |
| CurrentMemoryContext); |
| |
| if (!item->isnull[i]) |
| { |
| bool isvarlena; |
| Oid outfunc; |
| FmgrInfo fmgrinfo; |
| Datum val; |
| text *txt; |
| |
| /* lookup output func for the type */ |
| getTypeOutputInfo(mcvlist->types[i], &outfunc, &isvarlena); |
| fmgr_info(outfunc, &fmgrinfo); |
| |
| val = FunctionCall1(&fmgrinfo, item->values[i]); |
| txt = cstring_to_text(DatumGetPointer(val)); |
| |
| astate_values = accumArrayResult(astate_values, |
| PointerGetDatum(txt), |
| false, |
| TEXTOID, |
| CurrentMemoryContext); |
| } |
| else |
| astate_values = accumArrayResult(astate_values, |
| (Datum) 0, |
| true, |
| TEXTOID, |
| CurrentMemoryContext); |
| } |
| |
| values[0] = Int32GetDatum(funcctx->call_cntr); |
| values[1] = makeArrayResult(astate_values, CurrentMemoryContext); |
| values[2] = makeArrayResult(astate_nulls, CurrentMemoryContext); |
| values[3] = Float8GetDatum(item->frequency); |
| values[4] = Float8GetDatum(item->base_frequency); |
| |
| /* no NULLs in the tuple */ |
| memset(nulls, 0, sizeof(nulls)); |
| |
| /* build a tuple */ |
| tuple = heap_form_tuple(funcctx->attinmeta->tupdesc, values, nulls); |
| |
| /* make the tuple into a datum */ |
| result = HeapTupleGetDatum(tuple); |
| |
| SRF_RETURN_NEXT(funcctx, result); |
| } |
| else /* do when there is no more left */ |
| { |
| SRF_RETURN_DONE(funcctx); |
| } |
| } |
| |
| /* |
| * pg_mcv_list_in - input routine for type pg_mcv_list. |
| * |
| * pg_mcv_list is real enough to be a table column, but it has no operations |
| * of its own, and disallows input too |
| */ |
| Datum |
| pg_mcv_list_in(PG_FUNCTION_ARGS) |
| { |
| /* |
| * pg_mcv_list stores the data in binary form and parsing text input is |
| * not needed, so disallow this. |
| */ |
| ereport(ERROR, |
| (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
| errmsg("cannot accept a value of type %s", "pg_mcv_list"))); |
| |
| PG_RETURN_VOID(); /* keep compiler quiet */ |
| } |
| |
| |
| /* |
| * pg_mcv_list_out - output routine for type pg_mcv_list. |
| * |
| * MCV lists are serialized into a bytea value, so we simply call byteaout() |
| * to serialize the value into text. But it'd be nice to serialize that into |
| * a meaningful representation (e.g. for inspection by people). |
| * |
| * XXX This should probably return something meaningful, similar to what |
| * pg_dependencies_out does. Not sure how to deal with the deduplicated |
| * values, though - do we want to expand that or not? |
| */ |
| Datum |
| pg_mcv_list_out(PG_FUNCTION_ARGS) |
| { |
| return byteaout(fcinfo); |
| } |
| |
| /* |
| * pg_mcv_list_recv - binary input routine for type pg_mcv_list. |
| */ |
| Datum |
| pg_mcv_list_recv(PG_FUNCTION_ARGS) |
| { |
| ereport(ERROR, |
| (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
| errmsg("cannot accept a value of type %s", "pg_mcv_list"))); |
| |
| PG_RETURN_VOID(); /* keep compiler quiet */ |
| } |
| |
| /* |
| * pg_mcv_list_send - binary output routine for type pg_mcv_list. |
| * |
| * MCV lists are serialized in a bytea value (although the type is named |
| * differently), so let's just send that. |
| */ |
| Datum |
| pg_mcv_list_send(PG_FUNCTION_ARGS) |
| { |
| return byteasend(fcinfo); |
| } |
| |
| /* |
| * match the attribute/expression to a dimension of the statistic |
| * |
| * Returns the zero-based index of the matching statistics dimension. |
| * Optionally determines the collation. |
| */ |
| static int |
| mcv_match_expression(Node *expr, Bitmapset *keys, List *exprs, Oid *collid) |
| { |
| int idx; |
| |
| if (IsA(expr, Var)) |
| { |
| /* simple Var, so just lookup using varattno */ |
| Var *var = (Var *) expr; |
| |
| if (collid) |
| *collid = var->varcollid; |
| |
| idx = bms_member_index(keys, var->varattno); |
| |
| if (idx < 0) |
| elog(ERROR, "variable not found in statistics object"); |
| } |
| else |
| { |
| /* expression - lookup in stats expressions */ |
| ListCell *lc; |
| |
| if (collid) |
| *collid = exprCollation(expr); |
| |
| /* expressions are stored after the simple columns */ |
| idx = bms_num_members(keys); |
| foreach(lc, exprs) |
| { |
| Node *stat_expr = (Node *) lfirst(lc); |
| |
| if (equal(expr, stat_expr)) |
| break; |
| |
| idx++; |
| } |
| |
| if (lc == NULL) |
| elog(ERROR, "expression not found in statistics object"); |
| } |
| |
| return idx; |
| } |
| |
| /* |
| * mcv_get_match_bitmap |
| * Evaluate clauses using the MCV list, and update the match bitmap. |
| * |
| * A match bitmap keeps match/mismatch status for each MCV item, and we |
| * update it based on additional clauses. We also use it to skip items |
| * that can't possibly match (e.g. item marked as "mismatch" can't change |
| * to "match" when evaluating AND clause list). |
| * |
| * The function also returns a flag indicating whether there was an |
| * equality condition for all attributes, the minimum frequency in the MCV |
| * list, and a total MCV frequency (sum of frequencies for all items). |
| * |
| * XXX Currently the match bitmap uses a bool for each MCV item, which is |
| * somewhat wasteful as we could do with just a single bit, thus reducing |
| * the size to ~1/8. It would also allow us to combine bitmaps simply using |
| * & and |, which should be faster than min/max. The bitmaps are fairly |
| * small, though (thanks to the cap on the MCV list size). |
| */ |
| static bool * |
| mcv_get_match_bitmap(PlannerInfo *root, List *clauses, |
| Bitmapset *keys, List *exprs, |
| MCVList *mcvlist, bool is_or) |
| { |
| ListCell *l; |
| bool *matches; |
| |
| /* The bitmap may be partially built. */ |
| Assert(clauses != NIL); |
| Assert(mcvlist != NULL); |
| Assert(mcvlist->nitems > 0); |
| Assert(mcvlist->nitems <= STATS_MCVLIST_MAX_ITEMS); |
| |
| matches = palloc(sizeof(bool) * mcvlist->nitems); |
| memset(matches, !is_or, sizeof(bool) * mcvlist->nitems); |
| |
| /* |
| * Loop through the list of clauses, and for each of them evaluate all the |
| * MCV items not yet eliminated by the preceding clauses. |
| */ |
| foreach(l, clauses) |
| { |
| Node *clause = (Node *) lfirst(l); |
| |
| /* if it's a RestrictInfo, then extract the clause */ |
| if (IsA(clause, RestrictInfo)) |
| clause = (Node *) ((RestrictInfo *) clause)->clause; |
| |
| /* |
| * Handle the various types of clauses - OpClause, NullTest and |
| * AND/OR/NOT |
| */ |
| if (is_opclause(clause)) |
| { |
| OpExpr *expr = (OpExpr *) clause; |
| FmgrInfo opproc; |
| |
| /* valid only after examine_opclause_args returns true */ |
| Node *clause_expr; |
| Const *cst; |
| bool expronleft; |
| int idx; |
| Oid collid; |
| |
| fmgr_info(get_opcode(expr->opno), &opproc); |
| |
| /* extract the var/expr and const from the expression */ |
| if (!examine_opclause_args(expr->args, &clause_expr, &cst, &expronleft)) |
| elog(ERROR, "incompatible clause"); |
| |
| /* match the attribute/expression to a dimension of the statistic */ |
| idx = mcv_match_expression(clause_expr, keys, exprs, &collid); |
| |
| /* |
| * Walk through the MCV items and evaluate the current clause. We |
| * can skip items that were already ruled out, and terminate if |
| * there are no remaining MCV items that might possibly match. |
| */ |
| for (int i = 0; i < mcvlist->nitems; i++) |
| { |
| bool match = true; |
| MCVItem *item = &mcvlist->items[i]; |
| |
| Assert(idx >= 0); |
| |
| /* |
| * When the MCV item or the Const value is NULL we can treat |
| * this as a mismatch. We must not call the operator because |
| * of strictness. |
| */ |
| if (item->isnull[idx] || cst->constisnull) |
| { |
| matches[i] = RESULT_MERGE(matches[i], is_or, false); |
| continue; |
| } |
| |
| /* |
| * Skip MCV items that can't change result in the bitmap. Once |
| * the value gets false for AND-lists, or true for OR-lists, |
| * we don't need to look at more clauses. |
| */ |
| if (RESULT_IS_FINAL(matches[i], is_or)) |
| continue; |
| |
| /* |
| * First check whether the constant is below the lower |
| * boundary (in that case we can skip the bucket, because |
| * there's no overlap). |
| * |
| * We don't store collations used to build the statistics, but |
| * we can use the collation for the attribute itself, as |
| * stored in varcollid. We do reset the statistics after a |
| * type change (including collation change), so this is OK. |
| * For expressions, we use the collation extracted from the |
| * expression itself. |
| */ |
| if (expronleft) |
| match = DatumGetBool(FunctionCall2Coll(&opproc, |
| collid, |
| item->values[idx], |
| cst->constvalue)); |
| else |
| match = DatumGetBool(FunctionCall2Coll(&opproc, |
| collid, |
| cst->constvalue, |
| item->values[idx])); |
| |
| /* update the match bitmap with the result */ |
| matches[i] = RESULT_MERGE(matches[i], is_or, match); |
| } |
| } |
| else if (IsA(clause, ScalarArrayOpExpr)) |
| { |
| ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause; |
| FmgrInfo opproc; |
| |
| /* valid only after examine_opclause_args returns true */ |
| Node *clause_expr; |
| Const *cst; |
| bool expronleft; |
| Oid collid; |
| int idx; |
| |
| /* array evaluation */ |
| ArrayType *arrayval; |
| int16 elmlen; |
| bool elmbyval; |
| char elmalign; |
| int num_elems; |
| Datum *elem_values; |
| bool *elem_nulls; |
| |
| fmgr_info(get_opcode(expr->opno), &opproc); |
| |
| /* extract the var/expr and const from the expression */ |
| if (!examine_opclause_args(expr->args, &clause_expr, &cst, &expronleft)) |
| elog(ERROR, "incompatible clause"); |
| |
| /* We expect Var on left */ |
| if (!expronleft) |
| elog(ERROR, "incompatible clause"); |
| |
| /* |
| * Deconstruct the array constant, unless it's NULL (we'll cover |
| * that case below) |
| */ |
| if (!cst->constisnull) |
| { |
| arrayval = DatumGetArrayTypeP(cst->constvalue); |
| get_typlenbyvalalign(ARR_ELEMTYPE(arrayval), |
| &elmlen, &elmbyval, &elmalign); |
| deconstruct_array(arrayval, |
| ARR_ELEMTYPE(arrayval), |
| elmlen, elmbyval, elmalign, |
| &elem_values, &elem_nulls, &num_elems); |
| } |
| |
| /* match the attribute/expression to a dimension of the statistic */ |
| idx = mcv_match_expression(clause_expr, keys, exprs, &collid); |
| |
| /* |
| * Walk through the MCV items and evaluate the current clause. We |
| * can skip items that were already ruled out, and terminate if |
| * there are no remaining MCV items that might possibly match. |
| */ |
| for (int i = 0; i < mcvlist->nitems; i++) |
| { |
| int j; |
| bool match = !expr->useOr; |
| MCVItem *item = &mcvlist->items[i]; |
| |
| /* |
| * When the MCV item or the Const value is NULL we can treat |
| * this as a mismatch. We must not call the operator because |
| * of strictness. |
| */ |
| if (item->isnull[idx] || cst->constisnull) |
| { |
| matches[i] = RESULT_MERGE(matches[i], is_or, false); |
| continue; |
| } |
| |
| /* |
| * Skip MCV items that can't change result in the bitmap. Once |
| * the value gets false for AND-lists, or true for OR-lists, |
| * we don't need to look at more clauses. |
| */ |
| if (RESULT_IS_FINAL(matches[i], is_or)) |
| continue; |
| |
| for (j = 0; j < num_elems; j++) |
| { |
| Datum elem_value = elem_values[j]; |
| bool elem_isnull = elem_nulls[j]; |
| bool elem_match; |
| |
| /* NULL values always evaluate as not matching. */ |
| if (elem_isnull) |
| { |
| match = RESULT_MERGE(match, expr->useOr, false); |
| continue; |
| } |
| |
| /* |
| * Stop evaluating the array elements once we reach a |
| * matching value that can't change - ALL() is the same as |
| * AND-list, ANY() is the same as OR-list. |
| */ |
| if (RESULT_IS_FINAL(match, expr->useOr)) |
| break; |
| |
| elem_match = DatumGetBool(FunctionCall2Coll(&opproc, |
| collid, |
| item->values[idx], |
| elem_value)); |
| |
| match = RESULT_MERGE(match, expr->useOr, elem_match); |
| } |
| |
| /* update the match bitmap with the result */ |
| matches[i] = RESULT_MERGE(matches[i], is_or, match); |
| } |
| } |
| else if (IsA(clause, NullTest)) |
| { |
| NullTest *expr = (NullTest *) clause; |
| Node *clause_expr = (Node *) (expr->arg); |
| |
| /* match the attribute/expression to a dimension of the statistic */ |
| int idx = mcv_match_expression(clause_expr, keys, exprs, NULL); |
| |
| /* |
| * Walk through the MCV items and evaluate the current clause. We |
| * can skip items that were already ruled out, and terminate if |
| * there are no remaining MCV items that might possibly match. |
| */ |
| for (int i = 0; i < mcvlist->nitems; i++) |
| { |
| bool match = false; /* assume mismatch */ |
| MCVItem *item = &mcvlist->items[i]; |
| |
| /* if the clause mismatches the MCV item, update the bitmap */ |
| switch (expr->nulltesttype) |
| { |
| case IS_NULL: |
| match = (item->isnull[idx]) ? true : match; |
| break; |
| |
| case IS_NOT_NULL: |
| match = (!item->isnull[idx]) ? true : match; |
| break; |
| } |
| |
| /* now, update the match bitmap, depending on OR/AND type */ |
| matches[i] = RESULT_MERGE(matches[i], is_or, match); |
| } |
| } |
| else if (is_orclause(clause) || is_andclause(clause)) |
| { |
| /* AND/OR clause, with all subclauses being compatible */ |
| |
| int i; |
| BoolExpr *bool_clause = ((BoolExpr *) clause); |
| List *bool_clauses = bool_clause->args; |
| |
| /* match/mismatch bitmap for each MCV item */ |
| bool *bool_matches = NULL; |
| |
| Assert(bool_clauses != NIL); |
| Assert(list_length(bool_clauses) >= 2); |
| |
| /* build the match bitmap for the OR-clauses */ |
| bool_matches = mcv_get_match_bitmap(root, bool_clauses, keys, exprs, |
| mcvlist, is_orclause(clause)); |
| |
| /* |
| * Merge the bitmap produced by mcv_get_match_bitmap into the |
| * current one. We need to consider if we're evaluating AND or OR |
| * condition when merging the results. |
| */ |
| for (i = 0; i < mcvlist->nitems; i++) |
| matches[i] = RESULT_MERGE(matches[i], is_or, bool_matches[i]); |
| |
| pfree(bool_matches); |
| } |
| else if (is_notclause(clause)) |
| { |
| /* NOT clause, with all subclauses compatible */ |
| |
| int i; |
| BoolExpr *not_clause = ((BoolExpr *) clause); |
| List *not_args = not_clause->args; |
| |
| /* match/mismatch bitmap for each MCV item */ |
| bool *not_matches = NULL; |
| |
| Assert(not_args != NIL); |
| Assert(list_length(not_args) == 1); |
| |
| /* build the match bitmap for the NOT-clause */ |
| not_matches = mcv_get_match_bitmap(root, not_args, keys, exprs, |
| mcvlist, false); |
| |
| /* |
| * Merge the bitmap produced by mcv_get_match_bitmap into the |
| * current one. We're handling a NOT clause, so invert the result |
| * before merging it into the global bitmap. |
| */ |
| for (i = 0; i < mcvlist->nitems; i++) |
| matches[i] = RESULT_MERGE(matches[i], is_or, !not_matches[i]); |
| |
| pfree(not_matches); |
| } |
| else if (IsA(clause, Var)) |
| { |
| /* Var (has to be a boolean Var, possibly from below NOT) */ |
| |
| Var *var = (Var *) (clause); |
| |
| /* match the attribute to a dimension of the statistic */ |
| int idx = bms_member_index(keys, var->varattno); |
| |
| Assert(var->vartype == BOOLOID); |
| |
| /* |
| * Walk through the MCV items and evaluate the current clause. We |
| * can skip items that were already ruled out, and terminate if |
| * there are no remaining MCV items that might possibly match. |
| */ |
| for (int i = 0; i < mcvlist->nitems; i++) |
| { |
| MCVItem *item = &mcvlist->items[i]; |
| bool match = false; |
| |
| /* if the item is NULL, it's a mismatch */ |
| if (!item->isnull[idx] && DatumGetBool(item->values[idx])) |
| match = true; |
| |
| /* update the result bitmap */ |
| matches[i] = RESULT_MERGE(matches[i], is_or, match); |
| } |
| } |
| else |
| { |
| /* Otherwise, it must be a bare boolean-returning expression */ |
| int idx; |
| |
| /* match the expression to a dimension of the statistic */ |
| idx = mcv_match_expression(clause, keys, exprs, NULL); |
| |
| /* |
| * Walk through the MCV items and evaluate the current clause. We |
| * can skip items that were already ruled out, and terminate if |
| * there are no remaining MCV items that might possibly match. |
| */ |
| for (int i = 0; i < mcvlist->nitems; i++) |
| { |
| bool match; |
| MCVItem *item = &mcvlist->items[i]; |
| |
| /* "match" just means it's bool TRUE */ |
| match = !item->isnull[idx] && DatumGetBool(item->values[idx]); |
| |
| /* now, update the match bitmap, depending on OR/AND type */ |
| matches[i] = RESULT_MERGE(matches[i], is_or, match); |
| } |
| } |
| } |
| |
| return matches; |
| } |
| |
| |
| /* |
| * mcv_combine_selectivities |
| * Combine per-column and multi-column MCV selectivity estimates. |
| * |
| * simple_sel is a "simple" selectivity estimate (produced without using any |
| * extended statistics, essentially assuming independence of columns/clauses). |
| * |
| * mcv_sel and mcv_basesel are sums of the frequencies and base frequencies of |
| * all matching MCV items. The difference (mcv_sel - mcv_basesel) is then |
| * essentially interpreted as a correction to be added to simple_sel, as |
| * described below. |
| * |
| * mcv_totalsel is the sum of the frequencies of all MCV items (not just the |
| * matching ones). This is used as an upper bound on the portion of the |
| * selectivity estimates not covered by the MCV statistics. |
| * |
| * Note: While simple and base selectivities are defined in a quite similar |
| * way, the values are computed differently and are not therefore equal. The |
| * simple selectivity is computed as a product of per-clause estimates, while |
| * the base selectivity is computed by adding up base frequencies of matching |
| * items of the multi-column MCV list. So the values may differ for two main |
| * reasons - (a) the MCV list may not cover 100% of the data and (b) some of |
| * the MCV items did not match the estimated clauses. |
| * |
| * As both (a) and (b) reduce the base selectivity value, it generally holds |
| * that (simple_sel >= mcv_basesel). If the MCV list covers all the data, the |
| * values may be equal. |
| * |
| * So, other_sel = (simple_sel - mcv_basesel) is an estimate for the part not |
| * covered by the MCV list, and (mcv_sel - mcv_basesel) may be seen as a |
| * correction for the part covered by the MCV list. Those two statements are |
| * actually equivalent. |
| */ |
| Selectivity |
| mcv_combine_selectivities(Selectivity simple_sel, |
| Selectivity mcv_sel, |
| Selectivity mcv_basesel, |
| Selectivity mcv_totalsel) |
| { |
| Selectivity other_sel; |
| Selectivity sel; |
| |
| /* estimated selectivity of values not covered by MCV matches */ |
| other_sel = simple_sel - mcv_basesel; |
| CLAMP_PROBABILITY(other_sel); |
| |
| /* this non-MCV selectivity cannot exceed 1 - mcv_totalsel */ |
| if (other_sel > 1.0 - mcv_totalsel) |
| other_sel = 1.0 - mcv_totalsel; |
| |
| /* overall selectivity is the sum of the MCV and non-MCV parts */ |
| sel = mcv_sel + other_sel; |
| CLAMP_PROBABILITY(sel); |
| |
| return sel; |
| } |
| |
| |
| /* |
| * mcv_clauselist_selectivity |
| * Use MCV statistics to estimate the selectivity of an implicitly-ANDed |
| * list of clauses. |
| * |
| * This determines which MCV items match every clause in the list and returns |
| * the sum of the frequencies of those items. |
| * |
| * In addition, it returns the sum of the base frequencies of each of those |
| * items (that is the sum of the selectivities that each item would have if |
| * the columns were independent of one another), and the total selectivity of |
| * all the MCV items (not just the matching ones). These are expected to be |
| * used together with a "simple" selectivity estimate (one based only on |
| * per-column statistics) to produce an overall selectivity estimate that |
| * makes use of both per-column and multi-column statistics --- see |
| * mcv_combine_selectivities(). |
| */ |
| Selectivity |
| mcv_clauselist_selectivity(PlannerInfo *root, StatisticExtInfo *stat, |
| List *clauses, int varRelid, |
| JoinType jointype, SpecialJoinInfo *sjinfo, |
| RelOptInfo *rel, |
| Selectivity *basesel, Selectivity *totalsel) |
| { |
| int i; |
| MCVList *mcv; |
| Selectivity s = 0.0; |
| RangeTblEntry *rte = root->simple_rte_array[rel->relid]; |
| |
| /* match/mismatch bitmap for each MCV item */ |
| bool *matches = NULL; |
| |
| /* load the MCV list stored in the statistics object */ |
| mcv = statext_mcv_load(stat->statOid, rte->inh); |
| |
| /* build a match bitmap for the clauses */ |
| matches = mcv_get_match_bitmap(root, clauses, stat->keys, stat->exprs, |
| mcv, false); |
| |
| /* sum frequencies for all the matching MCV items */ |
| *basesel = 0.0; |
| *totalsel = 0.0; |
| for (i = 0; i < mcv->nitems; i++) |
| { |
| *totalsel += mcv->items[i].frequency; |
| |
| if (matches[i] != false) |
| { |
| *basesel += mcv->items[i].base_frequency; |
| s += mcv->items[i].frequency; |
| } |
| } |
| |
| return s; |
| } |
| |
| |
| /* |
| * mcv_clause_selectivity_or |
| * Use MCV statistics to estimate the selectivity of a clause that |
| * appears in an ORed list of clauses. |
| * |
| * As with mcv_clauselist_selectivity() this determines which MCV items match |
| * the clause and returns both the sum of the frequencies and the sum of the |
| * base frequencies of those items, as well as the sum of the frequencies of |
| * all MCV items (not just the matching ones) so that this information can be |
| * used by mcv_combine_selectivities() to produce a selectivity estimate that |
| * makes use of both per-column and multi-column statistics. |
| * |
| * Additionally, we return information to help compute the overall selectivity |
| * of the ORed list of clauses assumed to contain this clause. This function |
| * is intended to be called for each clause in the ORed list of clauses, |
| * allowing the overall selectivity to be computed using the following |
| * algorithm: |
| * |
| * Suppose P[n] = P(C[1] OR C[2] OR ... OR C[n]) is the combined selectivity |
| * of the first n clauses in the list. Then the combined selectivity taking |
| * into account the next clause C[n+1] can be written as |
| * |
| * P[n+1] = P[n] + P(C[n+1]) - P((C[1] OR ... OR C[n]) AND C[n+1]) |
| * |
| * The final term above represents the overlap between the clauses examined so |
| * far and the (n+1)'th clause. To estimate its selectivity, we track the |
| * match bitmap for the ORed list of clauses examined so far and examine its |
| * intersection with the match bitmap for the (n+1)'th clause. |
| * |
| * We then also return the sums of the MCV item frequencies and base |
| * frequencies for the match bitmap intersection corresponding to the overlap |
| * term above, so that they can be combined with a simple selectivity estimate |
| * for that term. |
| * |
| * The parameter "or_matches" is an in/out parameter tracking the match bitmap |
| * for the clauses examined so far. The caller is expected to set it to NULL |
| * the first time it calls this function. |
| */ |
| Selectivity |
| mcv_clause_selectivity_or(PlannerInfo *root, StatisticExtInfo *stat, |
| MCVList *mcv, Node *clause, bool **or_matches, |
| Selectivity *basesel, Selectivity *overlap_mcvsel, |
| Selectivity *overlap_basesel, Selectivity *totalsel) |
| { |
| Selectivity s = 0.0; |
| bool *new_matches; |
| int i; |
| |
| /* build the OR-matches bitmap, if not built already */ |
| if (*or_matches == NULL) |
| *or_matches = palloc0(sizeof(bool) * mcv->nitems); |
| |
| /* build the match bitmap for the new clause */ |
| new_matches = mcv_get_match_bitmap(root, list_make1(clause), stat->keys, |
| stat->exprs, mcv, false); |
| |
| /* |
| * Sum the frequencies for all the MCV items matching this clause and also |
| * those matching the overlap between this clause and any of the preceding |
| * clauses as described above. |
| */ |
| *basesel = 0.0; |
| *overlap_mcvsel = 0.0; |
| *overlap_basesel = 0.0; |
| *totalsel = 0.0; |
| for (i = 0; i < mcv->nitems; i++) |
| { |
| *totalsel += mcv->items[i].frequency; |
| |
| if (new_matches[i]) |
| { |
| s += mcv->items[i].frequency; |
| *basesel += mcv->items[i].base_frequency; |
| |
| if ((*or_matches)[i]) |
| { |
| *overlap_mcvsel += mcv->items[i].frequency; |
| *overlap_basesel += mcv->items[i].base_frequency; |
| } |
| } |
| |
| /* update the OR-matches bitmap for the next clause */ |
| (*or_matches)[i] = (*or_matches)[i] || new_matches[i]; |
| } |
| |
| pfree(new_matches); |
| |
| return s; |
| } |