src/backend/optimizer/path/clausesel.c - cloudberry - Git at Google

 /*-------------------------------------------------------------------------
  *
  * clausesel.c
  *	  Routines to compute clause selectivities
  *
  * Portions Copyright (c) 2006-2008, Greenplum inc
  * Portions Copyright (c) 2012-Present VMware, Inc. or its affiliates.
  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  *
  * IDENTIFICATION
  *	  src/backend/optimizer/path/clausesel.c
  *
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"

 #include <math.h>

 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
 #include "optimizer/clauses.h"
 #include "optimizer/cost.h"
 #include "optimizer/optimizer.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/plancat.h"
 #include "statistics/statistics.h"
 #include "utils/fmgroids.h"
 #include "utils/lsyscache.h"
 #include "utils/selfuncs.h"

 #include "cdb/cdbvars.h"        /* cdb GUCs */

 /*
  * Data structure for accumulating info about possible range-query
  * clause pairs in clauselist_selectivity.
  */
 typedef struct RangeQueryClause
 {
 	struct RangeQueryClause *next;	/* next in linked list */
 	Node	   *var;			/* The common variable of the clauses */
 	bool		have_lobound;	/* found a low-bound clause yet? */
 	bool		have_hibound;	/* found a high-bound clause yet? */
 	Selectivity lobound;		/* Selectivity of a var > something clause */
 	Selectivity hibound;		/* Selectivity of a var < something clause */
 } RangeQueryClause;

 static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
 						   bool varonleft, bool isLTsel, Selectivity s2);
 static RelOptInfo *find_single_rel_for_clauses(PlannerInfo *root,
 											   List *clauses);
 static Selectivity clauselist_selectivity_or(PlannerInfo *root,
 											 List *clauses,
 											 int varRelid,
 											 JoinType jointype,
 											 SpecialJoinInfo *sjinfo,
 											 bool use_extended_stats,
 											 bool use_damping);

 /* cmpSelectivity
  * comparison function for using qsort on an array of Selectivity entries
  */
 static int
 cmpSelectivity
 	(
 	const void *psela,
 	const void *pselb
 	)
 {
 	Selectivity sela = * (Selectivity *) psela;
 	Selectivity selb = * (Selectivity *) pselb;

 	if (sela < selb)
 		return -1;
 	if (selb < sela)
 		return 1;

 	return 0;
 }

 /****************************************************************************
  *		ROUTINES TO COMPUTE SELECTIVITIES
  ****************************************************************************/

 /*
  * clauselist_selectivity -
  *	  Compute the selectivity of an implicitly-ANDed list of boolean
  *	  expression clauses.  The list can be empty, in which case 1.0
  *	  must be returned.  List elements may be either RestrictInfos
  *	  or bare expression clauses --- the former is preferred since
  *	  it allows caching of results.
  *
  * See clause_selectivity() for the meaning of the additional parameters.
  *
  * The basic approach is to apply extended statistics first, on as many
  * clauses as possible, in order to capture cross-column dependencies etc.
  * The remaining clauses are then estimated by taking the product of their
  * selectivities, but that's only right if they have independent
  * probabilities, and in reality they are often NOT independent even if they
  * only refer to a single column.  So, we want to be smarter where we can.
  *
  * We also recognize "range queries", such as "x > 34 AND x < 42".  Clauses
  * are recognized as possible range query components if they are restriction
  * opclauses whose operators have scalarltsel or a related function as their
  * restriction selectivity estimator.  We pair up clauses of this form that
  * refer to the same variable.  An unpairable clause of this kind is simply
  * multiplied into the selectivity product in the normal way.  But when we
  * find a pair, we know that the selectivities represent the relative
  * positions of the low and high bounds within the column's range, so instead
  * of figuring the selectivity as hisel * losel, we can figure it as hisel +
  * losel - 1.  (To visualize this, see that hisel is the fraction of the range
  * below the high bound, while losel is the fraction above the low bound; so
  * hisel can be interpreted directly as a 0..1 value but we need to convert
  * losel to 1-losel before interpreting it as a value.  Then the available
  * range is 1-losel to hisel.  However, this calculation double-excludes
  * nulls, so really we need hisel + losel + null_frac - 1.)
  *
  * If either selectivity is exactly DEFAULT_INEQ_SEL, we forget this equation
  * and instead use DEFAULT_RANGE_INEQ_SEL.  The same applies if the equation
  * yields an impossible (negative) result.
  *
  * A free side-effect is that we can recognize redundant inequalities such
  * as "x < 4 AND x < 5"; only the tighter constraint will be counted.
  *
  * Of course this is all very dependent on the behavior of the inequality
  * selectivity functions; perhaps some day we can generalize the approach.
  */
 Selectivity
 clauselist_selectivity(PlannerInfo *root,
 					   List *clauses,
 					   int varRelid,
 					   JoinType jointype,
 					   SpecialJoinInfo *sjinfo,
 					   bool use_damping)
 {
 	return clauselist_selectivity_ext(root, clauses, varRelid,
 									  jointype, sjinfo, true, use_damping);
 }

 /*
  * clauselist_selectivity_ext -
  *	  Extended version of clauselist_selectivity().  If "use_extended_stats"
  *	  is false, all extended statistics will be ignored, and only per-column
  *	  statistics will be used.
  */
 Selectivity
 clauselist_selectivity_ext(PlannerInfo *root,
 						   List *clauses,
 						   int varRelid,
 						   JoinType jointype,
 						   SpecialJoinInfo *sjinfo,
 						   bool use_extended_stats,
 						   bool use_damping)
 {
 	Selectivity s1 = 1.0;
 	Selectivity *rgsel = NULL;
 	RelOptInfo *rel;
 	Bitmapset  *estimatedclauses = NULL;
 	RangeQueryClause *rqlist = NULL;
 	ListCell   *l;
 	int			listidx;

 	int pos = 0;
 	int i = 0;

 	// if the PlannerInfo was created from Orca, we don't care about the selectivity/costing
 	// here and some of the necessary fields may not be populated (eg: glob). Instead return
 	// the default selectivity
 	if (root->is_from_orca)
 	{
 		return s1;
 	}

 	/* allocate array to hold all selectivity factors */
 	rgsel = (Selectivity *) palloc(sizeof(Selectivity) * list_length(clauses));

 	/*
 	 * If there's exactly one clause, just go directly to
 	 * clause_selectivity_ext(). None of what we might do below is relevant.
 	 */
 	if (list_length(clauses) == 1)
 		return clause_selectivity_ext(root, (Node *) linitial(clauses),
 									  varRelid, jointype, sjinfo,
 									  use_extended_stats, use_damping);

 	/*
 	 * Determine if these clauses reference a single relation.  If so, and if
 	 * it has extended statistics, try to apply those.
 	 */
 	rel = find_single_rel_for_clauses(root, clauses);
 	if (use_extended_stats && rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
 	{
 		/*
 		 * Estimate as many clauses as possible using extended statistics.
 		 *
 		 * 'estimatedclauses' is populated with the 0-based list position
 		 * index of clauses estimated here, and that should be ignored below.
 		 */
 		s1 = statext_clauselist_selectivity(root, clauses, varRelid,
 											jointype, sjinfo, rel,
 											&estimatedclauses, false);
 	}

 	/*
 	 * Apply normal selectivity estimates for remaining clauses. We'll be
 	 * careful to skip any clauses which were already estimated above.
 	 *
 	 * Anything that doesn't look like a potential rangequery clause gets
 	 * multiplied into s1 and forgotten. Anything that does gets inserted into
 	 * an rqlist entry.
 	 */
 	listidx = -1;
 	foreach(l, clauses)
 	{
 		Node	   *clause = (Node *) lfirst(l);
 		RestrictInfo *rinfo;
 		Selectivity s2;

 		listidx++;

 		/*
 		 * Skip this clause if it's already been estimated by some other
 		 * statistics above.
 		 */
 		if (bms_is_member(listidx, estimatedclauses))
 			continue;

 		/* Compute the selectivity of this clause in isolation */
 		s2 = clause_selectivity_ext(root, clause, varRelid, jointype, sjinfo,
 									use_extended_stats, use_damping);

 		/*
 		 * Check for being passed a RestrictInfo.
 		 *
 		 * If it's a pseudoconstant RestrictInfo, then s2 is either 1.0 or
 		 * 0.0; just use that rather than looking for range pairs.
 		 */
 		if (IsA(clause, RestrictInfo))
 		{
 			rinfo = (RestrictInfo *) clause;
 			if (rinfo->pseudoconstant)
 			{
 				rgsel[pos++] = s2;
 				continue;
 			}
 			clause = (Node *) rinfo->clause;
 		}
 		else
 			rinfo = NULL;

 		/*
 		 * See if it looks like a restriction clause with a pseudoconstant on
 		 * one side.  (Anything more complicated than that might not behave in
 		 * the simple way we are expecting.)  Most of the tests here can be
 		 * done more efficiently with rinfo than without.
 		 */
 		if (is_opclause(clause) && list_length(((OpExpr *) clause)->args) == 2)
 		{
 			OpExpr	   *expr = (OpExpr *) clause;
 			bool		varonleft = true;
 			bool		ok;

 			if (rinfo)
 			{
 				ok = (bms_membership(rinfo->clause_relids) == BMS_SINGLETON) &&
 					(is_pseudo_constant_clause_relids(lsecond(expr->args),
 													  rinfo->right_relids) ||
 					 (varonleft = false,
 					  is_pseudo_constant_clause_relids(linitial(expr->args),
 													   rinfo->left_relids)));
 			}
 			else
 			{
 				ok = (NumRelids(root, clause) == 1) &&
 					(is_pseudo_constant_clause(lsecond(expr->args)) ||
 					 (varonleft = false,
 					  is_pseudo_constant_clause(linitial(expr->args))));
 			}

 			if (ok)
 			{
 				/*
 				 * If it's not a "<"/"<="/">"/">=" operator, just merge the
 				 * selectivity in generically.  But if it's the right oprrest,
 				 * add the clause to rqlist for later processing.
 				 */
 				switch (get_oprrest(expr->opno))
 				{
 					case F_SCALARLTSEL:
 					case F_SCALARLESEL:
 						addRangeClause(&rqlist, clause,
 									   varonleft, true, s2);
 						break;
 					case F_SCALARGTSEL:
 					case F_SCALARGESEL:
 						addRangeClause(&rqlist, clause,
 									   varonleft, false, s2);
 						break;
 					default:
 						/* Just merge the selectivity in generically */
 						rgsel[pos++] = s2;
 						break;
 				}
 				continue;		/* drop to loop bottom */
 			}
 		}

 		/* Not the right form, so treat it generically. */
 		rgsel[pos++] = s2;
 	}

 	/*
 	 * Now scan the rangequery pair list.
 	 */
 	while (rqlist != NULL)
 	{
 		RangeQueryClause *rqnext;

 		if (rqlist->have_lobound && rqlist->have_hibound)
 		{
 			/* Successfully matched a pair of range clauses */
 			Selectivity s2;

 			/*
 			 * Exact equality to the default value probably means the
 			 * selectivity function punted.  This is not airtight but should
 			 * be good enough.
 			 */
 			if (rqlist->hibound == DEFAULT_INEQ_SEL ||
 				rqlist->lobound == DEFAULT_INEQ_SEL)
 			{
 				s2 = DEFAULT_RANGE_INEQ_SEL;
 			}
 			else
 			{
 				s2 = rqlist->hibound + rqlist->lobound - 1.0;

 				/* Adjust for double-exclusion of NULLs */
 				s2 += nulltestsel(root, IS_NULL, rqlist->var,
 								  varRelid, jointype, sjinfo);

 				/*
 				 * A zero or slightly negative s2 should be converted into a
 				 * small positive value; we probably are dealing with a very
 				 * tight range and got a bogus result due to roundoff errors.
 				 * However, if s2 is very negative, then we probably have
 				 * default selectivity estimates on one or both sides of the
 				 * range that we failed to recognize above for some reason.
 				 */
 				if (s2 <= 0.0)
 				{
 					if (s2 < -0.01)
 					{
 						/*
 						 * No data available --- use a default estimate that
 						 * is small, but not real small.
 						 */
 						s2 = DEFAULT_RANGE_INEQ_SEL;
 					}
 					else
 					{
 						/*
 						 * It's just roundoff error; use a small positive
 						 * value
 						 */
 						s2 = 1.0e-10;
 					}
 				}
 			}
 			/* Merge in the selectivity of the pair of clauses */
 			rgsel[pos++] = s2;
 		}
 		else
 		{
 			/* Only found one of a pair, merge it in generically */
 			if (rqlist->have_lobound)
 				rgsel[pos++] = rqlist->lobound;
 			else
 				rgsel[pos++] = rqlist->hibound;
 		}
 		/* release storage and advance */
 		rqnext = rqlist->next;
 		pfree(rqlist);
 		rqlist = rqnext;
 	}

 	Assert(pos <= list_length(clauses));

 	if (use_damping && pos >= 2)
 	{
 		/* sort selectivities first; most significant (i.e. lowest) first */
 		if (gp_selectivity_damping_sigsort)
 			qsort(rgsel, pos, sizeof(Selectivity), cmpSelectivity);

 		for (i = 1; i < pos; i++)
 		{
 			/* dampen selectivity as n-th root of the original value */
 			rgsel[i] = pow(rgsel[i], 1.0/Max(0.1, ((i + 1) * gp_selectivity_damping_factor)));
 		}
 	}

 	for (i = 0; i < pos; i++)
 	{
 		s1 *= rgsel[i];
 	}

 	pfree(rgsel);

 	return s1;
 }

 /*
  * clauselist_selectivity_or -
  *	  Compute the selectivity of an implicitly-ORed list of boolean
  *	  expression clauses.  The list can be empty, in which case 0.0
  *	  must be returned.  List elements may be either RestrictInfos
  *	  or bare expression clauses --- the former is preferred since
  *	  it allows caching of results.
  *
  * See clause_selectivity() for the meaning of the additional parameters.
  *
  * The basic approach is to apply extended statistics first, on as many
  * clauses as possible, in order to capture cross-column dependencies etc.
  * The remaining clauses are then estimated as if they were independent.
  */
 static Selectivity
 clauselist_selectivity_or(PlannerInfo *root,
 						  List *clauses,
 						  int varRelid,
 						  JoinType jointype,
 						  SpecialJoinInfo *sjinfo,
 						  bool use_extended_stats,
 						  bool use_damping)
 {
 	Selectivity s1 = 0.0;
 	RelOptInfo *rel;
 	Bitmapset  *estimatedclauses = NULL;
 	ListCell   *lc;
 	int			listidx;

 	/*
 	 * Determine if these clauses reference a single relation.  If so, and if
 	 * it has extended statistics, try to apply those.
 	 */
 	rel = find_single_rel_for_clauses(root, clauses);
 	if (use_extended_stats && rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
 	{
 		/*
 		 * Estimate as many clauses as possible using extended statistics.
 		 *
 		 * 'estimatedclauses' is populated with the 0-based list position
 		 * index of clauses estimated here, and that should be ignored below.
 		 */
 		s1 = statext_clauselist_selectivity(root, clauses, varRelid,
 											jointype, sjinfo, rel,
 											&estimatedclauses, true);
 	}

 	/*
 	 * Estimate the remaining clauses as if they were independent.
 	 *
 	 * Selectivities for an OR clause are computed as s1+s2 - s1*s2 to account
 	 * for the probable overlap of selected tuple sets.
 	 *
 	 * XXX is this too conservative?
 	 */
 	listidx = -1;
 	foreach(lc, clauses)
 	{
 		Selectivity s2;

 		listidx++;

 		/*
 		 * Skip this clause if it's already been estimated by some other
 		 * statistics above.
 		 */
 		if (bms_is_member(listidx, estimatedclauses))
 			continue;

 		s2 = clause_selectivity_ext(root, (Node *) lfirst(lc), varRelid,
 									jointype, sjinfo, use_extended_stats, use_damping);

 		s1 = s1 + s2 - s1 * s2;
 	}

 	return s1;
 }

 /*
  * addRangeClause --- add a new range clause for clauselist_selectivity
  *
  * Here is where we try to match up pairs of range-query clauses
  */
 static void
 addRangeClause(RangeQueryClause **rqlist, Node *clause,
 			   bool varonleft, bool isLTsel, Selectivity s2)
 {
 	RangeQueryClause *rqelem;
 	Node	   *var;
 	bool		is_lobound;

 	if (varonleft)
 	{
 		var = get_leftop((Expr *) clause);
 		is_lobound = !isLTsel;	/* x < something is high bound */
 	}
 	else
 	{
 		var = get_rightop((Expr *) clause);
 		is_lobound = isLTsel;	/* something < x is low bound */
 	}

 	for (rqelem = *rqlist; rqelem; rqelem = rqelem->next)
 	{
 		/*
 		 * We use full equal() here because the "var" might be a function of
 		 * one or more attributes of the same relation...
 		 */
 		if (!equal(var, rqelem->var))
 			continue;
 		/* Found the right group to put this clause in */
 		if (is_lobound)
 		{
 			if (!rqelem->have_lobound)
 			{
 				rqelem->have_lobound = true;
 				rqelem->lobound = s2;
 			}
 			else
 			{

 				/*------
 				 * We have found two similar clauses, such as
 				 * x < y AND x <= z.
 				 * Keep only the more restrictive one.
 				 *------
 				 */
 				if (rqelem->lobound > s2)
 					rqelem->lobound = s2;
 			}
 		}
 		else
 		{
 			if (!rqelem->have_hibound)
 			{
 				rqelem->have_hibound = true;
 				rqelem->hibound = s2;
 			}
 			else
 			{

 				/*------
 				 * We have found two similar clauses, such as
 				 * x > y AND x >= z.
 				 * Keep only the more restrictive one.
 				 *------
 				 */
 				if (rqelem->hibound > s2)
 					rqelem->hibound = s2;
 			}
 		}
 		return;
 	}

 	/* No matching var found, so make a new clause-pair data structure */
 	rqelem = (RangeQueryClause *) palloc(sizeof(RangeQueryClause));
 	rqelem->var = var;
 	if (is_lobound)
 	{
 		rqelem->have_lobound = true;
 		rqelem->have_hibound = false;
 		rqelem->lobound = s2;
 	}
 	else
 	{
 		rqelem->have_lobound = false;
 		rqelem->have_hibound = true;
 		rqelem->hibound = s2;
 	}
 	rqelem->next = *rqlist;
 	*rqlist = rqelem;
 }

 /*
  * find_single_rel_for_clauses
  *		Examine each clause in 'clauses' and determine if all clauses
  *		reference only a single relation.  If so return that relation,
  *		otherwise return NULL.
  */
 static RelOptInfo *
 find_single_rel_for_clauses(PlannerInfo *root, List *clauses)
 {
 	int			lastrelid = 0;
 	ListCell   *l;

 	foreach(l, clauses)
 	{
 		RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
 		int			relid;

 		/*
 		 * If we have a list of bare clauses rather than RestrictInfos, we
 		 * could pull out their relids the hard way with pull_varnos().
 		 * However, currently the extended-stats machinery won't do anything
 		 * with non-RestrictInfo clauses anyway, so there's no point in
 		 * spending extra cycles; just fail if that's what we have.
 		 *
 		 * An exception to that rule is if we have a bare BoolExpr AND clause.
 		 * We treat this as a special case because the restrictinfo machinery
 		 * doesn't build RestrictInfos on top of AND clauses.
 		 */
 		if (is_andclause(rinfo))
 		{
 			RelOptInfo *rel;

 			rel = find_single_rel_for_clauses(root,
 											  ((BoolExpr *) rinfo)->args);

 			if (rel == NULL)
 				return NULL;
 			if (lastrelid == 0)
 				lastrelid = rel->relid;
 			else if (rel->relid != lastrelid)
 				return NULL;

 			continue;
 		}

 		if (!IsA(rinfo, RestrictInfo))
 			return NULL;

 		if (bms_is_empty(rinfo->clause_relids))
 			continue;			/* we can ignore variable-free clauses */
 		if (!bms_get_singleton_member(rinfo->clause_relids, &relid))
 			return NULL;		/* multiple relations in this clause */
 		if (lastrelid == 0)
 			lastrelid = relid;	/* first clause referencing a relation */
 		else if (relid != lastrelid)
 			return NULL;		/* relation not same as last one */
 	}

 	if (lastrelid != 0)
 		return find_base_rel(root, lastrelid);

 	return NULL;				/* no clauses */
 }

 /*
  * bms_is_subset_singleton
  *
  * Same result as bms_is_subset(s, bms_make_singleton(x)),
  * but a little faster and doesn't leak memory.
  *
  * Is this of use anywhere else?  If so move to bitmapset.c ...
  */
 static bool
 bms_is_subset_singleton(const Bitmapset *s, int x)
 {
 	switch (bms_membership(s))
 	{
 		case BMS_EMPTY_SET:
 			return true;
 		case BMS_SINGLETON:
 			return bms_is_member(x, s);
 		case BMS_MULTIPLE:
 			return false;
 	}
 	/* can't get here... */
 	return false;
 }

 /*
  * treat_as_join_clause -
  *	  Decide whether an operator clause is to be handled by the
  *	  restriction or join estimator.  Subroutine for clause_selectivity().
  */
 static inline bool
 treat_as_join_clause(PlannerInfo *root, Node *clause, RestrictInfo *rinfo,
 					 int varRelid, SpecialJoinInfo *sjinfo)
 {
 	if (varRelid != 0)
 	{
 		/*
 		 * Caller is forcing restriction mode (eg, because we are examining an
 		 * inner indexscan qual).
 		 */
 		return false;
 	}
 	else if (sjinfo == NULL)
 	{
 		/*
 		 * It must be a restriction clause, since it's being evaluated at a
 		 * scan node.
 		 */
 		return false;
 	}
 	else
 	{
 		/*
 		 * Otherwise, it's a join if there's more than one relation used. We
 		 * can optimize this calculation if an rinfo was passed.
 		 *
 		 * XXX	Since we know the clause is being evaluated at a join, the
 		 * only way it could be single-relation is if it was delayed by outer
 		 * joins.  Although we can make use of the restriction qual estimators
 		 * anyway, it seems likely that we ought to account for the
 		 * probability of injected nulls somehow.
 		 */
 		if (rinfo)
 			return (bms_membership(rinfo->clause_relids) == BMS_MULTIPLE);
 		else
 			return (NumRelids(root, clause) > 1);
 	}
 }


 /*
  * clause_selectivity -
  *	  Compute the selectivity of a general boolean expression clause.
  *
  * The clause can be either a RestrictInfo or a plain expression.  If it's
  * a RestrictInfo, we try to cache the selectivity for possible re-use,
  * so passing RestrictInfos is preferred.
  *
  * varRelid is either 0 or a rangetable index.
  *
  * When varRelid is not 0, only variables belonging to that relation are
  * considered in computing selectivity; other vars are treated as constants
  * of unknown values.  This is appropriate for estimating the selectivity of
  * a join clause that is being used as a restriction clause in a scan of a
  * nestloop join's inner relation --- varRelid should then be the ID of the
  * inner relation.
  *
  * When varRelid is 0, all variables are treated as variables.  This
  * is appropriate for ordinary join clauses and restriction clauses.
  *
  * jointype is the join type, if the clause is a join clause.  Pass JOIN_INNER
  * if the clause isn't a join clause.
  *
  * sjinfo is NULL for a non-join clause, otherwise it provides additional
  * context information about the join being performed.  There are some
  * special cases:
  *	1. For a special (not INNER) join, sjinfo is always a member of
  *	   root->join_info_list.
  *	2. For an INNER join, sjinfo is just a transient struct, and only the
  *	   relids and jointype fields in it can be trusted.
  * It is possible for jointype to be different from sjinfo->jointype.
  * This indicates we are considering a variant join: either with
  * the LHS and RHS switched, or with one input unique-ified.
  *
  * Note: when passing nonzero varRelid, it's normally appropriate to set
  * jointype == JOIN_INNER, sjinfo == NULL, even if the clause is really a
  * join clause; because we aren't treating it as a join clause.
  */
 Selectivity
 clause_selectivity(PlannerInfo *root,
 				   Node *clause,
 				   int varRelid,
 				   JoinType jointype,
 				   SpecialJoinInfo *sjinfo,
 				   bool use_damping)
 {
 	return clause_selectivity_ext(root, clause, varRelid,
 								  jointype, sjinfo, true, use_damping);
 }

 /*
  * clause_selectivity_ext -
  *	  Extended version of clause_selectivity().  If "use_extended_stats" is
  *	  false, all extended statistics will be ignored, and only per-column
  *	  statistics will be used.
  */
 Selectivity
 clause_selectivity_ext(PlannerInfo *root,
 					   Node *clause,
 					   int varRelid,
 					   JoinType jointype,
 					   SpecialJoinInfo *sjinfo,
 					   bool use_extended_stats,
 					   bool use_damping)
 {
 	Selectivity s1 = 0.5;		/* default for any unhandled clause type */
 	RestrictInfo *rinfo = NULL;
 	bool		cacheable = false;

 	if (clause == NULL)			/* can this still happen? */
 		return s1;

 	if (IsA(clause, RestrictInfo))
 	{
 		rinfo = (RestrictInfo *) clause;

 		/*
 		 * If the clause is marked pseudoconstant, then it will be used as a
 		 * gating qual and should not affect selectivity estimates; hence
 		 * return 1.0.  The only exception is that a constant FALSE may be
 		 * taken as having selectivity 0.0, since it will surely mean no rows
 		 * out of the plan.  This case is simple enough that we need not
 		 * bother caching the result.
 		 */
 		if (rinfo->pseudoconstant)
 		{
 			if (!IsA(rinfo->clause, Const))
 				return (Selectivity) 1.0;
 		}

 		/*
 		 * If the clause is marked redundant, always return 1.0.
 		 */
 		if (rinfo->norm_selec > 1)
 			return (Selectivity) 1.0;

 		/*
 		 * If possible, cache the result of the selectivity calculation for
 		 * the clause.  We can cache if varRelid is zero or the clause
 		 * contains only vars of that relid --- otherwise varRelid will affect
 		 * the result, so mustn't cache.  Outer join quals might be examined
 		 * with either their join's actual jointype or JOIN_INNER, so we need
 		 * two cache variables to remember both cases.  Note: we assume the
 		 * result won't change if we are switching the input relations or
 		 * considering a unique-ified case, so we only need one cache variable
 		 * for all non-JOIN_INNER cases.
 		 */
 		if (varRelid == 0 ||
 			bms_is_subset_singleton(rinfo->clause_relids, varRelid))
 		{
 			/* Cacheable --- do we already have the result? */
 			if (jointype == JOIN_INNER)
 			{
 				if (rinfo->norm_selec >= 0)
 					return rinfo->norm_selec;
 			}
 			else
 			{
 				if (rinfo->outer_selec >= 0)
 					return rinfo->outer_selec;
 			}
 			cacheable = true;
 		}

 		/*
 		 * Proceed with examination of contained clause.  If the clause is an
 		 * OR-clause, we want to look at the variant with sub-RestrictInfos,
 		 * so that per-subclause selectivities can be cached.
 		 */
 		if (rinfo->orclause)
 			clause = (Node *) rinfo->orclause;
 		else
 			clause = (Node *) rinfo->clause;
 	}

 	if (IsA(clause, Var))
 	{
 		Var		   *var = (Var *) clause;

 		/*
 		 * We probably shouldn't ever see an uplevel Var here, but if we do,
 		 * return the default selectivity...
 		 */
 		if (var->varlevelsup == 0 &&
 			(varRelid == 0 || varRelid == (int) var->varno))
 		{
 			/* Use the restriction selectivity function for a bool Var */
 			s1 = boolvarsel(root, (Node *) var, varRelid);
 		}
 	}
 	else if (IsA(clause, Const))
 	{
 		/* bool constant is pretty easy... */
 		Const	   *con = (Const *) clause;

 		s1 = con->constisnull ? 0.0 :
 			DatumGetBool(con->constvalue) ? 1.0 : 0.0;
 	}
 	else if (IsA(clause, Param))
 	{
 		/* see if we can replace the Param */
 		Node	   *subst = estimate_expression_value(root, clause);

 		if (IsA(subst, Const))
 		{
 			/* bool constant is pretty easy... */
 			Const	   *con = (Const *) subst;

 			s1 = con->constisnull ? 0.0 :
 				DatumGetBool(con->constvalue) ? 1.0 : 0.0;
 		}
 		else
 		{
 			/* XXX any way to do better than default? */
 		}
 	}
 	else if (is_notclause(clause))
 	{
 		/* inverse of the selectivity of the underlying clause */
 		s1 = 1.0 - clause_selectivity_ext(root,
 										  (Node *) get_notclausearg((Expr *) clause),
 										  varRelid,
 										  jointype,
 										  sjinfo,
 										  use_extended_stats,
 										  use_damping);
 	}
 	else if (is_andclause(clause))
 	{
 		/* share code with clauselist_selectivity() */
 		s1 = clauselist_selectivity_ext(root,
 										((BoolExpr *) clause)->args,
 										varRelid,
 										jointype,
 										sjinfo,
 										use_extended_stats,
 										use_damping);
 	}
 	else if (is_orclause(clause))
 	{
 		/*
 		 * Almost the same thing as clauselist_selectivity, but with the
 		 * clauses connected by OR.
 		 */
 		s1 = clauselist_selectivity_or(root,
 									   ((BoolExpr *) clause)->args,
 									   varRelid,
 									   jointype,
 									   sjinfo,
 									   use_extended_stats,
 									   use_damping);
 	}
 	else if (is_opclause(clause) || IsA(clause, DistinctExpr))
 	{
 		OpExpr	   *opclause = (OpExpr *) clause;
 		Oid			opno = opclause->opno;

 		if (treat_as_join_clause(root, clause, rinfo, varRelid, sjinfo))
 		{
 			/* Estimate selectivity for a join clause. */
 			s1 = join_selectivity(root, opno,
 								  opclause->args,
 								  opclause->inputcollid,
 								  jointype,
 								  sjinfo);
 		}
 		else
 		{
 			/* Estimate selectivity for a restriction clause. */
 			s1 = restriction_selectivity(root, opno,
 										 opclause->args,
 										 opclause->inputcollid,
 										 varRelid);
 		}

 		/*
 		 * DistinctExpr has the same representation as OpExpr, but the
 		 * contained operator is "=" not "<>", so we must negate the result.
 		 * This estimation method doesn't give the right behavior for nulls,
 		 * but it's better than doing nothing.
 		 */
 		if (IsA(clause, DistinctExpr))
 			s1 = 1.0 - s1;
 	}
 	else if (is_funcclause(clause))
 	{
 		FuncExpr   *funcclause = (FuncExpr *) clause;

 		/* Try to get an estimate from the support function, if any */
 		s1 = function_selectivity(root,
 								  funcclause->funcid,
 								  funcclause->args,
 								  funcclause->inputcollid,
 								  treat_as_join_clause(root, clause, rinfo,
 													   varRelid, sjinfo),
 								  varRelid,
 								  jointype,
 								  sjinfo);
 	}
 	else if (IsA(clause, ScalarArrayOpExpr))
 	{
 		/* Use node specific selectivity calculation function */
 		s1 = scalararraysel(root,
 							(ScalarArrayOpExpr *) clause,
 							treat_as_join_clause(root, clause, rinfo,
 												 varRelid, sjinfo),
 							varRelid,
 							jointype,
 							sjinfo);
 	}
 	else if (IsA(clause, RowCompareExpr))
 	{
 		/* Use node specific selectivity calculation function */
 		s1 = rowcomparesel(root,
 						   (RowCompareExpr *) clause,
 						   varRelid,
 						   jointype,
 						   sjinfo);
 	}
 	else if (IsA(clause, NullTest))
 	{
 		/* Use node specific selectivity calculation function */
 		s1 = nulltestsel(root,
 						 ((NullTest *) clause)->nulltesttype,
 						 (Node *) ((NullTest *) clause)->arg,
 						 varRelid,
 						 jointype,
 						 sjinfo);
 	}
 	else if (IsA(clause, BooleanTest))
 	{
 		/* Use node specific selectivity calculation function */
 		s1 = booltestsel(root,
 						 ((BooleanTest *) clause)->booltesttype,
 						 (Node *) ((BooleanTest *) clause)->arg,
 						 varRelid,
 						 jointype,
 						 sjinfo);
 	}
 	else if (IsA(clause, CurrentOfExpr))
 	{
 		/* CURRENT OF selects at most one row of its table */
 		CurrentOfExpr *cexpr = (CurrentOfExpr *) clause;
 		RelOptInfo *crel = find_base_rel(root, cexpr->cvarno);

 		if (crel->tuples > 0)
 			s1 = 1.0 / crel->tuples;
 	}
 	else if (IsA(clause, RelabelType))
 	{
 		/* Not sure this case is needed, but it can't hurt */
 		s1 = clause_selectivity_ext(root,
 									(Node *) ((RelabelType *) clause)->arg,
 									varRelid,
 									jointype,
 									sjinfo,
 									use_extended_stats,
 									use_damping);
 	}
 	else if (IsA(clause, CoerceToDomain))
 	{
 		/* Not sure this case is needed, but it can't hurt */
 		s1 = clause_selectivity_ext(root,
 									(Node *) ((CoerceToDomain *) clause)->arg,
 									varRelid,
 									jointype,
 									sjinfo,
 									use_extended_stats,
 									use_damping);
 	}
 	else
 	{
 		/*
 		 * For anything else, see if we can consider it as a boolean variable.
 		 * This only works if it's an immutable expression in Vars of a single
 		 * relation; but there's no point in us checking that here because
 		 * boolvarsel() will do it internally, and return a suitable default
 		 * selectivity if not.
 		 */
 		s1 = boolvarsel(root, clause, varRelid);
 	}

 	/* Cache the result if possible */
 	if (cacheable)
 	{
 		if (jointype == JOIN_INNER)
 			rinfo->norm_selec = s1;
 		else
 			rinfo->outer_selec = s1;
 	}

 #ifdef SELECTIVITY_DEBUG
 	elog(DEBUG4, "clause_selectivity: s1 %f", s1);
 #endif							/* SELECTIVITY_DEBUG */

 	return s1;
 }