| /*------------------------------------------------------------------------- |
| * |
| * clausesel.c |
| * Routines to compute clause selectivities |
| * |
| * Portions Copyright (c) 2006-2008, Greenplum inc |
| * Portions Copyright (c) 2012-Present VMware, Inc. or its affiliates. |
| * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group |
| * Portions Copyright (c) 1994, Regents of the University of California |
| * |
| * |
| * IDENTIFICATION |
| * src/backend/optimizer/path/clausesel.c |
| * |
| *------------------------------------------------------------------------- |
| */ |
| #include "postgres.h" |
| |
| #include <math.h> |
| |
| #include "nodes/makefuncs.h" |
| #include "nodes/nodeFuncs.h" |
| #include "optimizer/clauses.h" |
| #include "optimizer/cost.h" |
| #include "optimizer/optimizer.h" |
| #include "optimizer/pathnode.h" |
| #include "optimizer/plancat.h" |
| #include "statistics/statistics.h" |
| #include "utils/fmgroids.h" |
| #include "utils/lsyscache.h" |
| #include "utils/selfuncs.h" |
| |
| #include "cdb/cdbvars.h" /* cdb GUCs */ |
| |
| /* |
| * Data structure for accumulating info about possible range-query |
| * clause pairs in clauselist_selectivity. |
| */ |
| typedef struct RangeQueryClause |
| { |
| struct RangeQueryClause *next; /* next in linked list */ |
| Node *var; /* The common variable of the clauses */ |
| bool have_lobound; /* found a low-bound clause yet? */ |
| bool have_hibound; /* found a high-bound clause yet? */ |
| Selectivity lobound; /* Selectivity of a var > something clause */ |
| Selectivity hibound; /* Selectivity of a var < something clause */ |
| } RangeQueryClause; |
| |
| static void addRangeClause(RangeQueryClause **rqlist, Node *clause, |
| bool varonleft, bool isLTsel, Selectivity s2); |
| static RelOptInfo *find_single_rel_for_clauses(PlannerInfo *root, |
| List *clauses); |
| static Selectivity clauselist_selectivity_or(PlannerInfo *root, |
| List *clauses, |
| int varRelid, |
| JoinType jointype, |
| SpecialJoinInfo *sjinfo, |
| bool use_extended_stats, |
| bool use_damping); |
| |
| /* cmpSelectivity |
| * comparison function for using qsort on an array of Selectivity entries |
| */ |
| static int |
| cmpSelectivity |
| ( |
| const void *psela, |
| const void *pselb |
| ) |
| { |
| Selectivity sela = * (Selectivity *) psela; |
| Selectivity selb = * (Selectivity *) pselb; |
| |
| if (sela < selb) |
| return -1; |
| if (selb < sela) |
| return 1; |
| |
| return 0; |
| } |
| |
| /**************************************************************************** |
| * ROUTINES TO COMPUTE SELECTIVITIES |
| ****************************************************************************/ |
| |
| /* |
| * clauselist_selectivity - |
| * Compute the selectivity of an implicitly-ANDed list of boolean |
| * expression clauses. The list can be empty, in which case 1.0 |
| * must be returned. List elements may be either RestrictInfos |
| * or bare expression clauses --- the former is preferred since |
| * it allows caching of results. |
| * |
| * See clause_selectivity() for the meaning of the additional parameters. |
| * |
| * The basic approach is to apply extended statistics first, on as many |
| * clauses as possible, in order to capture cross-column dependencies etc. |
| * The remaining clauses are then estimated by taking the product of their |
| * selectivities, but that's only right if they have independent |
| * probabilities, and in reality they are often NOT independent even if they |
| * only refer to a single column. So, we want to be smarter where we can. |
| * |
| * We also recognize "range queries", such as "x > 34 AND x < 42". Clauses |
| * are recognized as possible range query components if they are restriction |
| * opclauses whose operators have scalarltsel or a related function as their |
| * restriction selectivity estimator. We pair up clauses of this form that |
| * refer to the same variable. An unpairable clause of this kind is simply |
| * multiplied into the selectivity product in the normal way. But when we |
| * find a pair, we know that the selectivities represent the relative |
| * positions of the low and high bounds within the column's range, so instead |
| * of figuring the selectivity as hisel * losel, we can figure it as hisel + |
| * losel - 1. (To visualize this, see that hisel is the fraction of the range |
| * below the high bound, while losel is the fraction above the low bound; so |
| * hisel can be interpreted directly as a 0..1 value but we need to convert |
| * losel to 1-losel before interpreting it as a value. Then the available |
| * range is 1-losel to hisel. However, this calculation double-excludes |
| * nulls, so really we need hisel + losel + null_frac - 1.) |
| * |
| * If either selectivity is exactly DEFAULT_INEQ_SEL, we forget this equation |
| * and instead use DEFAULT_RANGE_INEQ_SEL. The same applies if the equation |
| * yields an impossible (negative) result. |
| * |
| * A free side-effect is that we can recognize redundant inequalities such |
| * as "x < 4 AND x < 5"; only the tighter constraint will be counted. |
| * |
| * Of course this is all very dependent on the behavior of the inequality |
| * selectivity functions; perhaps some day we can generalize the approach. |
| */ |
| Selectivity |
| clauselist_selectivity(PlannerInfo *root, |
| List *clauses, |
| int varRelid, |
| JoinType jointype, |
| SpecialJoinInfo *sjinfo, |
| bool use_damping) |
| { |
| return clauselist_selectivity_ext(root, clauses, varRelid, |
| jointype, sjinfo, true, use_damping); |
| } |
| |
| /* |
| * clauselist_selectivity_ext - |
| * Extended version of clauselist_selectivity(). If "use_extended_stats" |
| * is false, all extended statistics will be ignored, and only per-column |
| * statistics will be used. |
| */ |
| Selectivity |
| clauselist_selectivity_ext(PlannerInfo *root, |
| List *clauses, |
| int varRelid, |
| JoinType jointype, |
| SpecialJoinInfo *sjinfo, |
| bool use_extended_stats, |
| bool use_damping) |
| { |
| Selectivity s1 = 1.0; |
| Selectivity *rgsel = NULL; |
| RelOptInfo *rel; |
| Bitmapset *estimatedclauses = NULL; |
| RangeQueryClause *rqlist = NULL; |
| ListCell *l; |
| int listidx; |
| |
| int pos = 0; |
| int i = 0; |
| |
| // if the PlannerInfo was created from Orca, we don't care about the selectivity/costing |
| // here and some of the necessary fields may not be populated (eg: glob). Instead return |
| // the default selectivity |
| if (root->is_from_orca) |
| { |
| return s1; |
| } |
| |
| /* allocate array to hold all selectivity factors */ |
| rgsel = (Selectivity *) palloc(sizeof(Selectivity) * list_length(clauses)); |
| |
| /* |
| * If there's exactly one clause, just go directly to |
| * clause_selectivity_ext(). None of what we might do below is relevant. |
| */ |
| if (list_length(clauses) == 1) |
| return clause_selectivity_ext(root, (Node *) linitial(clauses), |
| varRelid, jointype, sjinfo, |
| use_extended_stats, use_damping); |
| |
| /* |
| * Determine if these clauses reference a single relation. If so, and if |
| * it has extended statistics, try to apply those. |
| */ |
| rel = find_single_rel_for_clauses(root, clauses); |
| if (use_extended_stats && rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL) |
| { |
| /* |
| * Estimate as many clauses as possible using extended statistics. |
| * |
| * 'estimatedclauses' is populated with the 0-based list position |
| * index of clauses estimated here, and that should be ignored below. |
| */ |
| s1 = statext_clauselist_selectivity(root, clauses, varRelid, |
| jointype, sjinfo, rel, |
| &estimatedclauses, false); |
| } |
| |
| /* |
| * Apply normal selectivity estimates for remaining clauses. We'll be |
| * careful to skip any clauses which were already estimated above. |
| * |
| * Anything that doesn't look like a potential rangequery clause gets |
| * multiplied into s1 and forgotten. Anything that does gets inserted into |
| * an rqlist entry. |
| */ |
| listidx = -1; |
| foreach(l, clauses) |
| { |
| Node *clause = (Node *) lfirst(l); |
| RestrictInfo *rinfo; |
| Selectivity s2; |
| |
| listidx++; |
| |
| /* |
| * Skip this clause if it's already been estimated by some other |
| * statistics above. |
| */ |
| if (bms_is_member(listidx, estimatedclauses)) |
| continue; |
| |
| /* Compute the selectivity of this clause in isolation */ |
| s2 = clause_selectivity_ext(root, clause, varRelid, jointype, sjinfo, |
| use_extended_stats, use_damping); |
| |
| /* |
| * Check for being passed a RestrictInfo. |
| * |
| * If it's a pseudoconstant RestrictInfo, then s2 is either 1.0 or |
| * 0.0; just use that rather than looking for range pairs. |
| */ |
| if (IsA(clause, RestrictInfo)) |
| { |
| rinfo = (RestrictInfo *) clause; |
| if (rinfo->pseudoconstant) |
| { |
| rgsel[pos++] = s2; |
| continue; |
| } |
| clause = (Node *) rinfo->clause; |
| } |
| else |
| rinfo = NULL; |
| |
| /* |
| * See if it looks like a restriction clause with a pseudoconstant on |
| * one side. (Anything more complicated than that might not behave in |
| * the simple way we are expecting.) Most of the tests here can be |
| * done more efficiently with rinfo than without. |
| */ |
| if (is_opclause(clause) && list_length(((OpExpr *) clause)->args) == 2) |
| { |
| OpExpr *expr = (OpExpr *) clause; |
| bool varonleft = true; |
| bool ok; |
| |
| if (rinfo) |
| { |
| ok = (bms_membership(rinfo->clause_relids) == BMS_SINGLETON) && |
| (is_pseudo_constant_clause_relids(lsecond(expr->args), |
| rinfo->right_relids) || |
| (varonleft = false, |
| is_pseudo_constant_clause_relids(linitial(expr->args), |
| rinfo->left_relids))); |
| } |
| else |
| { |
| ok = (NumRelids(root, clause) == 1) && |
| (is_pseudo_constant_clause(lsecond(expr->args)) || |
| (varonleft = false, |
| is_pseudo_constant_clause(linitial(expr->args)))); |
| } |
| |
| if (ok) |
| { |
| /* |
| * If it's not a "<"/"<="/">"/">=" operator, just merge the |
| * selectivity in generically. But if it's the right oprrest, |
| * add the clause to rqlist for later processing. |
| */ |
| switch (get_oprrest(expr->opno)) |
| { |
| case F_SCALARLTSEL: |
| case F_SCALARLESEL: |
| addRangeClause(&rqlist, clause, |
| varonleft, true, s2); |
| break; |
| case F_SCALARGTSEL: |
| case F_SCALARGESEL: |
| addRangeClause(&rqlist, clause, |
| varonleft, false, s2); |
| break; |
| default: |
| /* Just merge the selectivity in generically */ |
| rgsel[pos++] = s2; |
| break; |
| } |
| continue; /* drop to loop bottom */ |
| } |
| } |
| |
| /* Not the right form, so treat it generically. */ |
| rgsel[pos++] = s2; |
| } |
| |
| /* |
| * Now scan the rangequery pair list. |
| */ |
| while (rqlist != NULL) |
| { |
| RangeQueryClause *rqnext; |
| |
| if (rqlist->have_lobound && rqlist->have_hibound) |
| { |
| /* Successfully matched a pair of range clauses */ |
| Selectivity s2; |
| |
| /* |
| * Exact equality to the default value probably means the |
| * selectivity function punted. This is not airtight but should |
| * be good enough. |
| */ |
| if (rqlist->hibound == DEFAULT_INEQ_SEL || |
| rqlist->lobound == DEFAULT_INEQ_SEL) |
| { |
| s2 = DEFAULT_RANGE_INEQ_SEL; |
| } |
| else |
| { |
| s2 = rqlist->hibound + rqlist->lobound - 1.0; |
| |
| /* Adjust for double-exclusion of NULLs */ |
| s2 += nulltestsel(root, IS_NULL, rqlist->var, |
| varRelid, jointype, sjinfo); |
| |
| /* |
| * A zero or slightly negative s2 should be converted into a |
| * small positive value; we probably are dealing with a very |
| * tight range and got a bogus result due to roundoff errors. |
| * However, if s2 is very negative, then we probably have |
| * default selectivity estimates on one or both sides of the |
| * range that we failed to recognize above for some reason. |
| */ |
| if (s2 <= 0.0) |
| { |
| if (s2 < -0.01) |
| { |
| /* |
| * No data available --- use a default estimate that |
| * is small, but not real small. |
| */ |
| s2 = DEFAULT_RANGE_INEQ_SEL; |
| } |
| else |
| { |
| /* |
| * It's just roundoff error; use a small positive |
| * value |
| */ |
| s2 = 1.0e-10; |
| } |
| } |
| } |
| /* Merge in the selectivity of the pair of clauses */ |
| rgsel[pos++] = s2; |
| } |
| else |
| { |
| /* Only found one of a pair, merge it in generically */ |
| if (rqlist->have_lobound) |
| rgsel[pos++] = rqlist->lobound; |
| else |
| rgsel[pos++] = rqlist->hibound; |
| } |
| /* release storage and advance */ |
| rqnext = rqlist->next; |
| pfree(rqlist); |
| rqlist = rqnext; |
| } |
| |
| Assert(pos <= list_length(clauses)); |
| |
| if (use_damping && pos >= 2) |
| { |
| /* sort selectivities first; most significant (i.e. lowest) first */ |
| if (gp_selectivity_damping_sigsort) |
| qsort(rgsel, pos, sizeof(Selectivity), cmpSelectivity); |
| |
| for (i = 1; i < pos; i++) |
| { |
| /* dampen selectivity as n-th root of the original value */ |
| rgsel[i] = pow(rgsel[i], 1.0/Max(0.1, ((i + 1) * gp_selectivity_damping_factor))); |
| } |
| } |
| |
| for (i = 0; i < pos; i++) |
| { |
| s1 *= rgsel[i]; |
| } |
| |
| pfree(rgsel); |
| |
| return s1; |
| } |
| |
| /* |
| * clauselist_selectivity_or - |
| * Compute the selectivity of an implicitly-ORed list of boolean |
| * expression clauses. The list can be empty, in which case 0.0 |
| * must be returned. List elements may be either RestrictInfos |
| * or bare expression clauses --- the former is preferred since |
| * it allows caching of results. |
| * |
| * See clause_selectivity() for the meaning of the additional parameters. |
| * |
| * The basic approach is to apply extended statistics first, on as many |
| * clauses as possible, in order to capture cross-column dependencies etc. |
| * The remaining clauses are then estimated as if they were independent. |
| */ |
| static Selectivity |
| clauselist_selectivity_or(PlannerInfo *root, |
| List *clauses, |
| int varRelid, |
| JoinType jointype, |
| SpecialJoinInfo *sjinfo, |
| bool use_extended_stats, |
| bool use_damping) |
| { |
| Selectivity s1 = 0.0; |
| RelOptInfo *rel; |
| Bitmapset *estimatedclauses = NULL; |
| ListCell *lc; |
| int listidx; |
| |
| /* |
| * Determine if these clauses reference a single relation. If so, and if |
| * it has extended statistics, try to apply those. |
| */ |
| rel = find_single_rel_for_clauses(root, clauses); |
| if (use_extended_stats && rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL) |
| { |
| /* |
| * Estimate as many clauses as possible using extended statistics. |
| * |
| * 'estimatedclauses' is populated with the 0-based list position |
| * index of clauses estimated here, and that should be ignored below. |
| */ |
| s1 = statext_clauselist_selectivity(root, clauses, varRelid, |
| jointype, sjinfo, rel, |
| &estimatedclauses, true); |
| } |
| |
| /* |
| * Estimate the remaining clauses as if they were independent. |
| * |
| * Selectivities for an OR clause are computed as s1+s2 - s1*s2 to account |
| * for the probable overlap of selected tuple sets. |
| * |
| * XXX is this too conservative? |
| */ |
| listidx = -1; |
| foreach(lc, clauses) |
| { |
| Selectivity s2; |
| |
| listidx++; |
| |
| /* |
| * Skip this clause if it's already been estimated by some other |
| * statistics above. |
| */ |
| if (bms_is_member(listidx, estimatedclauses)) |
| continue; |
| |
| s2 = clause_selectivity_ext(root, (Node *) lfirst(lc), varRelid, |
| jointype, sjinfo, use_extended_stats, use_damping); |
| |
| s1 = s1 + s2 - s1 * s2; |
| } |
| |
| return s1; |
| } |
| |
| /* |
| * addRangeClause --- add a new range clause for clauselist_selectivity |
| * |
| * Here is where we try to match up pairs of range-query clauses |
| */ |
| static void |
| addRangeClause(RangeQueryClause **rqlist, Node *clause, |
| bool varonleft, bool isLTsel, Selectivity s2) |
| { |
| RangeQueryClause *rqelem; |
| Node *var; |
| bool is_lobound; |
| |
| if (varonleft) |
| { |
| var = get_leftop((Expr *) clause); |
| is_lobound = !isLTsel; /* x < something is high bound */ |
| } |
| else |
| { |
| var = get_rightop((Expr *) clause); |
| is_lobound = isLTsel; /* something < x is low bound */ |
| } |
| |
| for (rqelem = *rqlist; rqelem; rqelem = rqelem->next) |
| { |
| /* |
| * We use full equal() here because the "var" might be a function of |
| * one or more attributes of the same relation... |
| */ |
| if (!equal(var, rqelem->var)) |
| continue; |
| /* Found the right group to put this clause in */ |
| if (is_lobound) |
| { |
| if (!rqelem->have_lobound) |
| { |
| rqelem->have_lobound = true; |
| rqelem->lobound = s2; |
| } |
| else |
| { |
| |
| /*------ |
| * We have found two similar clauses, such as |
| * x < y AND x <= z. |
| * Keep only the more restrictive one. |
| *------ |
| */ |
| if (rqelem->lobound > s2) |
| rqelem->lobound = s2; |
| } |
| } |
| else |
| { |
| if (!rqelem->have_hibound) |
| { |
| rqelem->have_hibound = true; |
| rqelem->hibound = s2; |
| } |
| else |
| { |
| |
| /*------ |
| * We have found two similar clauses, such as |
| * x > y AND x >= z. |
| * Keep only the more restrictive one. |
| *------ |
| */ |
| if (rqelem->hibound > s2) |
| rqelem->hibound = s2; |
| } |
| } |
| return; |
| } |
| |
| /* No matching var found, so make a new clause-pair data structure */ |
| rqelem = (RangeQueryClause *) palloc(sizeof(RangeQueryClause)); |
| rqelem->var = var; |
| if (is_lobound) |
| { |
| rqelem->have_lobound = true; |
| rqelem->have_hibound = false; |
| rqelem->lobound = s2; |
| } |
| else |
| { |
| rqelem->have_lobound = false; |
| rqelem->have_hibound = true; |
| rqelem->hibound = s2; |
| } |
| rqelem->next = *rqlist; |
| *rqlist = rqelem; |
| } |
| |
| /* |
| * find_single_rel_for_clauses |
| * Examine each clause in 'clauses' and determine if all clauses |
| * reference only a single relation. If so return that relation, |
| * otherwise return NULL. |
| */ |
| static RelOptInfo * |
| find_single_rel_for_clauses(PlannerInfo *root, List *clauses) |
| { |
| int lastrelid = 0; |
| ListCell *l; |
| |
| foreach(l, clauses) |
| { |
| RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); |
| int relid; |
| |
| /* |
| * If we have a list of bare clauses rather than RestrictInfos, we |
| * could pull out their relids the hard way with pull_varnos(). |
| * However, currently the extended-stats machinery won't do anything |
| * with non-RestrictInfo clauses anyway, so there's no point in |
| * spending extra cycles; just fail if that's what we have. |
| * |
| * An exception to that rule is if we have a bare BoolExpr AND clause. |
| * We treat this as a special case because the restrictinfo machinery |
| * doesn't build RestrictInfos on top of AND clauses. |
| */ |
| if (is_andclause(rinfo)) |
| { |
| RelOptInfo *rel; |
| |
| rel = find_single_rel_for_clauses(root, |
| ((BoolExpr *) rinfo)->args); |
| |
| if (rel == NULL) |
| return NULL; |
| if (lastrelid == 0) |
| lastrelid = rel->relid; |
| else if (rel->relid != lastrelid) |
| return NULL; |
| |
| continue; |
| } |
| |
| if (!IsA(rinfo, RestrictInfo)) |
| return NULL; |
| |
| if (bms_is_empty(rinfo->clause_relids)) |
| continue; /* we can ignore variable-free clauses */ |
| if (!bms_get_singleton_member(rinfo->clause_relids, &relid)) |
| return NULL; /* multiple relations in this clause */ |
| if (lastrelid == 0) |
| lastrelid = relid; /* first clause referencing a relation */ |
| else if (relid != lastrelid) |
| return NULL; /* relation not same as last one */ |
| } |
| |
| if (lastrelid != 0) |
| return find_base_rel(root, lastrelid); |
| |
| return NULL; /* no clauses */ |
| } |
| |
| /* |
| * bms_is_subset_singleton |
| * |
| * Same result as bms_is_subset(s, bms_make_singleton(x)), |
| * but a little faster and doesn't leak memory. |
| * |
| * Is this of use anywhere else? If so move to bitmapset.c ... |
| */ |
| static bool |
| bms_is_subset_singleton(const Bitmapset *s, int x) |
| { |
| switch (bms_membership(s)) |
| { |
| case BMS_EMPTY_SET: |
| return true; |
| case BMS_SINGLETON: |
| return bms_is_member(x, s); |
| case BMS_MULTIPLE: |
| return false; |
| } |
| /* can't get here... */ |
| return false; |
| } |
| |
| /* |
| * treat_as_join_clause - |
| * Decide whether an operator clause is to be handled by the |
| * restriction or join estimator. Subroutine for clause_selectivity(). |
| */ |
| static inline bool |
| treat_as_join_clause(PlannerInfo *root, Node *clause, RestrictInfo *rinfo, |
| int varRelid, SpecialJoinInfo *sjinfo) |
| { |
| if (varRelid != 0) |
| { |
| /* |
| * Caller is forcing restriction mode (eg, because we are examining an |
| * inner indexscan qual). |
| */ |
| return false; |
| } |
| else if (sjinfo == NULL) |
| { |
| /* |
| * It must be a restriction clause, since it's being evaluated at a |
| * scan node. |
| */ |
| return false; |
| } |
| else |
| { |
| /* |
| * Otherwise, it's a join if there's more than one relation used. We |
| * can optimize this calculation if an rinfo was passed. |
| * |
| * XXX Since we know the clause is being evaluated at a join, the |
| * only way it could be single-relation is if it was delayed by outer |
| * joins. Although we can make use of the restriction qual estimators |
| * anyway, it seems likely that we ought to account for the |
| * probability of injected nulls somehow. |
| */ |
| if (rinfo) |
| return (bms_membership(rinfo->clause_relids) == BMS_MULTIPLE); |
| else |
| return (NumRelids(root, clause) > 1); |
| } |
| } |
| |
| |
| /* |
| * clause_selectivity - |
| * Compute the selectivity of a general boolean expression clause. |
| * |
| * The clause can be either a RestrictInfo or a plain expression. If it's |
| * a RestrictInfo, we try to cache the selectivity for possible re-use, |
| * so passing RestrictInfos is preferred. |
| * |
| * varRelid is either 0 or a rangetable index. |
| * |
| * When varRelid is not 0, only variables belonging to that relation are |
| * considered in computing selectivity; other vars are treated as constants |
| * of unknown values. This is appropriate for estimating the selectivity of |
| * a join clause that is being used as a restriction clause in a scan of a |
| * nestloop join's inner relation --- varRelid should then be the ID of the |
| * inner relation. |
| * |
| * When varRelid is 0, all variables are treated as variables. This |
| * is appropriate for ordinary join clauses and restriction clauses. |
| * |
| * jointype is the join type, if the clause is a join clause. Pass JOIN_INNER |
| * if the clause isn't a join clause. |
| * |
| * sjinfo is NULL for a non-join clause, otherwise it provides additional |
| * context information about the join being performed. There are some |
| * special cases: |
| * 1. For a special (not INNER) join, sjinfo is always a member of |
| * root->join_info_list. |
| * 2. For an INNER join, sjinfo is just a transient struct, and only the |
| * relids and jointype fields in it can be trusted. |
| * It is possible for jointype to be different from sjinfo->jointype. |
| * This indicates we are considering a variant join: either with |
| * the LHS and RHS switched, or with one input unique-ified. |
| * |
| * Note: when passing nonzero varRelid, it's normally appropriate to set |
| * jointype == JOIN_INNER, sjinfo == NULL, even if the clause is really a |
| * join clause; because we aren't treating it as a join clause. |
| */ |
| Selectivity |
| clause_selectivity(PlannerInfo *root, |
| Node *clause, |
| int varRelid, |
| JoinType jointype, |
| SpecialJoinInfo *sjinfo, |
| bool use_damping) |
| { |
| return clause_selectivity_ext(root, clause, varRelid, |
| jointype, sjinfo, true, use_damping); |
| } |
| |
| /* |
| * clause_selectivity_ext - |
| * Extended version of clause_selectivity(). If "use_extended_stats" is |
| * false, all extended statistics will be ignored, and only per-column |
| * statistics will be used. |
| */ |
| Selectivity |
| clause_selectivity_ext(PlannerInfo *root, |
| Node *clause, |
| int varRelid, |
| JoinType jointype, |
| SpecialJoinInfo *sjinfo, |
| bool use_extended_stats, |
| bool use_damping) |
| { |
| Selectivity s1 = 0.5; /* default for any unhandled clause type */ |
| RestrictInfo *rinfo = NULL; |
| bool cacheable = false; |
| |
| if (clause == NULL) /* can this still happen? */ |
| return s1; |
| |
| if (IsA(clause, RestrictInfo)) |
| { |
| rinfo = (RestrictInfo *) clause; |
| |
| /* |
| * If the clause is marked pseudoconstant, then it will be used as a |
| * gating qual and should not affect selectivity estimates; hence |
| * return 1.0. The only exception is that a constant FALSE may be |
| * taken as having selectivity 0.0, since it will surely mean no rows |
| * out of the plan. This case is simple enough that we need not |
| * bother caching the result. |
| */ |
| if (rinfo->pseudoconstant) |
| { |
| if (!IsA(rinfo->clause, Const)) |
| return (Selectivity) 1.0; |
| } |
| |
| /* |
| * If the clause is marked redundant, always return 1.0. |
| */ |
| if (rinfo->norm_selec > 1) |
| return (Selectivity) 1.0; |
| |
| /* |
| * If possible, cache the result of the selectivity calculation for |
| * the clause. We can cache if varRelid is zero or the clause |
| * contains only vars of that relid --- otherwise varRelid will affect |
| * the result, so mustn't cache. Outer join quals might be examined |
| * with either their join's actual jointype or JOIN_INNER, so we need |
| * two cache variables to remember both cases. Note: we assume the |
| * result won't change if we are switching the input relations or |
| * considering a unique-ified case, so we only need one cache variable |
| * for all non-JOIN_INNER cases. |
| */ |
| if (varRelid == 0 || |
| bms_is_subset_singleton(rinfo->clause_relids, varRelid)) |
| { |
| /* Cacheable --- do we already have the result? */ |
| if (jointype == JOIN_INNER) |
| { |
| if (rinfo->norm_selec >= 0) |
| return rinfo->norm_selec; |
| } |
| else |
| { |
| if (rinfo->outer_selec >= 0) |
| return rinfo->outer_selec; |
| } |
| cacheable = true; |
| } |
| |
| /* |
| * Proceed with examination of contained clause. If the clause is an |
| * OR-clause, we want to look at the variant with sub-RestrictInfos, |
| * so that per-subclause selectivities can be cached. |
| */ |
| if (rinfo->orclause) |
| clause = (Node *) rinfo->orclause; |
| else |
| clause = (Node *) rinfo->clause; |
| } |
| |
| if (IsA(clause, Var)) |
| { |
| Var *var = (Var *) clause; |
| |
| /* |
| * We probably shouldn't ever see an uplevel Var here, but if we do, |
| * return the default selectivity... |
| */ |
| if (var->varlevelsup == 0 && |
| (varRelid == 0 || varRelid == (int) var->varno)) |
| { |
| /* Use the restriction selectivity function for a bool Var */ |
| s1 = boolvarsel(root, (Node *) var, varRelid); |
| } |
| } |
| else if (IsA(clause, Const)) |
| { |
| /* bool constant is pretty easy... */ |
| Const *con = (Const *) clause; |
| |
| s1 = con->constisnull ? 0.0 : |
| DatumGetBool(con->constvalue) ? 1.0 : 0.0; |
| } |
| else if (IsA(clause, Param)) |
| { |
| /* see if we can replace the Param */ |
| Node *subst = estimate_expression_value(root, clause); |
| |
| if (IsA(subst, Const)) |
| { |
| /* bool constant is pretty easy... */ |
| Const *con = (Const *) subst; |
| |
| s1 = con->constisnull ? 0.0 : |
| DatumGetBool(con->constvalue) ? 1.0 : 0.0; |
| } |
| else |
| { |
| /* XXX any way to do better than default? */ |
| } |
| } |
| else if (is_notclause(clause)) |
| { |
| /* inverse of the selectivity of the underlying clause */ |
| s1 = 1.0 - clause_selectivity_ext(root, |
| (Node *) get_notclausearg((Expr *) clause), |
| varRelid, |
| jointype, |
| sjinfo, |
| use_extended_stats, |
| use_damping); |
| } |
| else if (is_andclause(clause)) |
| { |
| /* share code with clauselist_selectivity() */ |
| s1 = clauselist_selectivity_ext(root, |
| ((BoolExpr *) clause)->args, |
| varRelid, |
| jointype, |
| sjinfo, |
| use_extended_stats, |
| use_damping); |
| } |
| else if (is_orclause(clause)) |
| { |
| /* |
| * Almost the same thing as clauselist_selectivity, but with the |
| * clauses connected by OR. |
| */ |
| s1 = clauselist_selectivity_or(root, |
| ((BoolExpr *) clause)->args, |
| varRelid, |
| jointype, |
| sjinfo, |
| use_extended_stats, |
| use_damping); |
| } |
| else if (is_opclause(clause) || IsA(clause, DistinctExpr)) |
| { |
| OpExpr *opclause = (OpExpr *) clause; |
| Oid opno = opclause->opno; |
| |
| if (treat_as_join_clause(root, clause, rinfo, varRelid, sjinfo)) |
| { |
| /* Estimate selectivity for a join clause. */ |
| s1 = join_selectivity(root, opno, |
| opclause->args, |
| opclause->inputcollid, |
| jointype, |
| sjinfo); |
| } |
| else |
| { |
| /* Estimate selectivity for a restriction clause. */ |
| s1 = restriction_selectivity(root, opno, |
| opclause->args, |
| opclause->inputcollid, |
| varRelid); |
| } |
| |
| /* |
| * DistinctExpr has the same representation as OpExpr, but the |
| * contained operator is "=" not "<>", so we must negate the result. |
| * This estimation method doesn't give the right behavior for nulls, |
| * but it's better than doing nothing. |
| */ |
| if (IsA(clause, DistinctExpr)) |
| s1 = 1.0 - s1; |
| } |
| else if (is_funcclause(clause)) |
| { |
| FuncExpr *funcclause = (FuncExpr *) clause; |
| |
| /* Try to get an estimate from the support function, if any */ |
| s1 = function_selectivity(root, |
| funcclause->funcid, |
| funcclause->args, |
| funcclause->inputcollid, |
| treat_as_join_clause(root, clause, rinfo, |
| varRelid, sjinfo), |
| varRelid, |
| jointype, |
| sjinfo); |
| } |
| else if (IsA(clause, ScalarArrayOpExpr)) |
| { |
| /* Use node specific selectivity calculation function */ |
| s1 = scalararraysel(root, |
| (ScalarArrayOpExpr *) clause, |
| treat_as_join_clause(root, clause, rinfo, |
| varRelid, sjinfo), |
| varRelid, |
| jointype, |
| sjinfo); |
| } |
| else if (IsA(clause, RowCompareExpr)) |
| { |
| /* Use node specific selectivity calculation function */ |
| s1 = rowcomparesel(root, |
| (RowCompareExpr *) clause, |
| varRelid, |
| jointype, |
| sjinfo); |
| } |
| else if (IsA(clause, NullTest)) |
| { |
| /* Use node specific selectivity calculation function */ |
| s1 = nulltestsel(root, |
| ((NullTest *) clause)->nulltesttype, |
| (Node *) ((NullTest *) clause)->arg, |
| varRelid, |
| jointype, |
| sjinfo); |
| } |
| else if (IsA(clause, BooleanTest)) |
| { |
| /* Use node specific selectivity calculation function */ |
| s1 = booltestsel(root, |
| ((BooleanTest *) clause)->booltesttype, |
| (Node *) ((BooleanTest *) clause)->arg, |
| varRelid, |
| jointype, |
| sjinfo); |
| } |
| else if (IsA(clause, CurrentOfExpr)) |
| { |
| /* CURRENT OF selects at most one row of its table */ |
| CurrentOfExpr *cexpr = (CurrentOfExpr *) clause; |
| RelOptInfo *crel = find_base_rel(root, cexpr->cvarno); |
| |
| if (crel->tuples > 0) |
| s1 = 1.0 / crel->tuples; |
| } |
| else if (IsA(clause, RelabelType)) |
| { |
| /* Not sure this case is needed, but it can't hurt */ |
| s1 = clause_selectivity_ext(root, |
| (Node *) ((RelabelType *) clause)->arg, |
| varRelid, |
| jointype, |
| sjinfo, |
| use_extended_stats, |
| use_damping); |
| } |
| else if (IsA(clause, CoerceToDomain)) |
| { |
| /* Not sure this case is needed, but it can't hurt */ |
| s1 = clause_selectivity_ext(root, |
| (Node *) ((CoerceToDomain *) clause)->arg, |
| varRelid, |
| jointype, |
| sjinfo, |
| use_extended_stats, |
| use_damping); |
| } |
| else |
| { |
| /* |
| * For anything else, see if we can consider it as a boolean variable. |
| * This only works if it's an immutable expression in Vars of a single |
| * relation; but there's no point in us checking that here because |
| * boolvarsel() will do it internally, and return a suitable default |
| * selectivity if not. |
| */ |
| s1 = boolvarsel(root, clause, varRelid); |
| } |
| |
| /* Cache the result if possible */ |
| if (cacheable) |
| { |
| if (jointype == JOIN_INNER) |
| rinfo->norm_selec = s1; |
| else |
| rinfo->outer_selec = s1; |
| } |
| |
| #ifdef SELECTIVITY_DEBUG |
| elog(DEBUG4, "clause_selectivity: s1 %f", s1); |
| #endif /* SELECTIVITY_DEBUG */ |
| |
| return s1; |
| } |