src/backend/statistics/dependencies.c - cloudberry - Git at Google

 /*-------------------------------------------------------------------------
  *
  * dependencies.c
  *	  POSTGRES functional dependencies
  *
  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
  *	  src/backend/statistics/dependencies.c
  *
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"

 #include "access/htup_details.h"
 #include "access/sysattr.h"
 #include "catalog/pg_operator.h"
 #include "catalog/pg_statistic_ext.h"
 #include "catalog/pg_statistic_ext_data.h"
 #include "lib/stringinfo.h"
 #include "nodes/nodeFuncs.h"
 #include "nodes/nodes.h"
 #include "nodes/pathnodes.h"
 #include "optimizer/clauses.h"
 #include "optimizer/optimizer.h"
 #include "parser/parsetree.h"
 #include "statistics/extended_stats_internal.h"
 #include "statistics/statistics.h"
 #include "utils/bytea.h"
 #include "utils/fmgroids.h"
 #include "utils/fmgrprotos.h"
 #include "utils/lsyscache.h"
 #include "utils/memutils.h"
 #include "utils/selfuncs.h"
 #include "utils/syscache.h"
 #include "utils/typcache.h"

 /* size of the struct header fields (magic, type, ndeps) */
 #define SizeOfHeader		(3 * sizeof(uint32))

 /* size of a serialized dependency (degree, natts, atts) */
 #define SizeOfItem(natts) \
 	(sizeof(double) + sizeof(AttrNumber) * (1 + (natts)))

 /* minimal size of a dependency (with two attributes) */
 #define MinSizeOfItem	SizeOfItem(2)

 /* minimal size of dependencies, when all deps are minimal */
 #define MinSizeOfItems(ndeps) \
 	(SizeOfHeader + (ndeps) * MinSizeOfItem)

 /*
  * Internal state for DependencyGenerator of dependencies. Dependencies are similar to
  * k-permutations of n elements, except that the order does not matter for the
  * first (k-1) elements. That is, (a,b=>c) and (b,a=>c) are equivalent.
  */
 typedef struct DependencyGeneratorData
 {
 	int			k;				/* size of the dependency */
 	int			n;				/* number of possible attributes */
 	int			current;		/* next dependency to return (index) */
 	AttrNumber	ndependencies;	/* number of dependencies generated */
 	AttrNumber *dependencies;	/* array of pre-generated dependencies	*/
 } DependencyGeneratorData;

 typedef DependencyGeneratorData *DependencyGenerator;

 static void generate_dependencies_recurse(DependencyGenerator state,
 										  int index, AttrNumber start, AttrNumber *current);
 static void generate_dependencies(DependencyGenerator state);
 static DependencyGenerator DependencyGenerator_init(int n, int k);
 static void DependencyGenerator_free(DependencyGenerator state);
 static AttrNumber *DependencyGenerator_next(DependencyGenerator state);
 static double dependency_degree(StatsBuildData *data, int k, AttrNumber *dependency);
 static bool dependency_is_fully_matched(MVDependency *dependency,
 										Bitmapset *attnums);
 static bool dependency_is_compatible_clause(Node *clause, Index relid,
 											AttrNumber *attnum);
 static bool dependency_is_compatible_expression(Node *clause, Index relid,
 												List *statlist, Node **expr);
 static MVDependency *find_strongest_dependency(MVDependencies **dependencies,
 											   int ndependencies, Bitmapset *attnums);
 static Selectivity clauselist_apply_dependencies(PlannerInfo *root, List *clauses,
 												 int varRelid, JoinType jointype,
 												 SpecialJoinInfo *sjinfo,
 												 MVDependency **dependencies,
 												 int ndependencies,
 												 AttrNumber *list_attnums,
 												 Bitmapset **estimatedclauses);

 static void
 generate_dependencies_recurse(DependencyGenerator state, int index,
 							  AttrNumber start, AttrNumber *current)
 {
 	/*
 	 * The generator handles the first (k-1) elements differently from the
 	 * last element.
 	 */
 	if (index < (state->k - 1))
 	{
 		AttrNumber	i;

 		/*
 		 * The first (k-1) values have to be in ascending order, which we
 		 * generate recursively.
 		 */

 		for (i = start; i < state->n; i++)
 		{
 			current[index] = i;
 			generate_dependencies_recurse(state, (index + 1), (i + 1), current);
 		}
 	}
 	else
 	{
 		int			i;

 		/*
 		 * the last element is the implied value, which does not respect the
 		 * ascending order. We just need to check that the value is not in the
 		 * first (k-1) elements.
 		 */

 		for (i = 0; i < state->n; i++)
 		{
 			int			j;
 			bool		match = false;

 			current[index] = i;

 			for (j = 0; j < index; j++)
 			{
 				if (current[j] == i)
 				{
 					match = true;
 					break;
 				}
 			}

 			/*
 			 * If the value is not found in the first part of the dependency,
 			 * we're done.
 			 */
 			if (!match)
 			{
 				state->dependencies = (AttrNumber *) repalloc(state->dependencies,
 															  state->k * (state->ndependencies + 1) * sizeof(AttrNumber));
 				memcpy(&state->dependencies[(state->k * state->ndependencies)],
 					   current, state->k * sizeof(AttrNumber));
 				state->ndependencies++;
 			}
 		}
 	}
 }

 /* generate all dependencies (k-permutations of n elements) */
 static void
 generate_dependencies(DependencyGenerator state)
 {
 	AttrNumber *current = (AttrNumber *) palloc0(sizeof(AttrNumber) * state->k);

 	generate_dependencies_recurse(state, 0, 0, current);

 	pfree(current);
 }

 /*
  * initialize the DependencyGenerator of variations, and prebuild the variations
  *
  * This pre-builds all the variations. We could also generate them in
  * DependencyGenerator_next(), but this seems simpler.
  */
 static DependencyGenerator
 DependencyGenerator_init(int n, int k)
 {
 	DependencyGenerator state;

 	Assert((n >= k) && (k > 0));

 	/* allocate the DependencyGenerator state */
 	state = (DependencyGenerator) palloc0(sizeof(DependencyGeneratorData));
 	state->dependencies = (AttrNumber *) palloc(k * sizeof(AttrNumber));

 	state->ndependencies = 0;
 	state->current = 0;
 	state->k = k;
 	state->n = n;

 	/* now actually pre-generate all the variations */
 	generate_dependencies(state);

 	return state;
 }

 /* free the DependencyGenerator state */
 static void
 DependencyGenerator_free(DependencyGenerator state)
 {
 	pfree(state->dependencies);
 	pfree(state);

 }

 /* generate next combination */
 static AttrNumber *
 DependencyGenerator_next(DependencyGenerator state)
 {
 	if (state->current == state->ndependencies)
 		return NULL;

 	return &state->dependencies[state->k * state->current++];
 }


 /*
  * validates functional dependency on the data
  *
  * An actual work horse of detecting functional dependencies. Given a variation
  * of k attributes, it checks that the first (k-1) are sufficient to determine
  * the last one.
  */
 static double
 dependency_degree(StatsBuildData *data, int k, AttrNumber *dependency)
 {
 	int			i,
 				nitems;
 	MultiSortSupport mss;
 	SortItem   *items;
 	AttrNumber *attnums_dep;

 	/* counters valid within a group */
 	int			group_size = 0;
 	int			n_violations = 0;

 	/* total number of rows supporting (consistent with) the dependency */
 	int			n_supporting_rows = 0;

 	/* Make sure we have at least two input attributes. */
 	Assert(k >= 2);

 	/* sort info for all attributes columns */
 	mss = multi_sort_init(k);

 	/*
 	 * Translate the array of indexes to regular attnums for the dependency
 	 * (we will need this to identify the columns in StatsBuildData).
 	 */
 	attnums_dep = (AttrNumber *) palloc(k * sizeof(AttrNumber));
 	for (i = 0; i < k; i++)
 		attnums_dep[i] = data->attnums[dependency[i]];

 	/*
 	 * Verify the dependency (a,b,...)->z, using a rather simple algorithm:
 	 *
 	 * (a) sort the data lexicographically
 	 *
 	 * (b) split the data into groups by first (k-1) columns
 	 *
 	 * (c) for each group count different values in the last column
 	 *
 	 * We use the column data types' default sort operators and collations;
 	 * perhaps at some point it'd be worth using column-specific collations?
 	 */

 	/* prepare the sort function for the dimensions */
 	for (i = 0; i < k; i++)
 	{
 		VacAttrStats *colstat = data->stats[dependency[i]];
 		TypeCacheEntry *type;

 		type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR);
 		if (type->lt_opr == InvalidOid) /* shouldn't happen */
 			elog(ERROR, "cache lookup failed for ordering operator for type %u",
 				 colstat->attrtypid);

 		/* prepare the sort function for this dimension */
 		multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
 	}

 	/*
 	 * build an array of SortItem(s) sorted using the multi-sort support
 	 *
 	 * XXX This relies on all stats entries pointing to the same tuple
 	 * descriptor.  For now that assumption holds, but it might change in the
 	 * future for example if we support statistics on multiple tables.
 	 */
 	items = build_sorted_items(data, &nitems, mss, k, attnums_dep);

 	/*
 	 * Walk through the sorted array, split it into rows according to the
 	 * first (k-1) columns. If there's a single value in the last column, we
 	 * count the group as 'supporting' the functional dependency. Otherwise we
 	 * count it as contradicting.
 	 */

 	/* start with the first row forming a group */
 	group_size = 1;

 	/* loop 1 beyond the end of the array so that we count the final group */
 	for (i = 1; i <= nitems; i++)
 	{
 		/*
 		 * Check if the group ended, which may be either because we processed
 		 * all the items (i==nitems), or because the i-th item is not equal to
 		 * the preceding one.
 		 */
 		if (i == nitems ||
 			multi_sort_compare_dims(0, k - 2, &items[i - 1], &items[i], mss) != 0)
 		{
 			/*
 			 * If no violations were found in the group then track the rows of
 			 * the group as supporting the functional dependency.
 			 */
 			if (n_violations == 0)
 				n_supporting_rows += group_size;

 			/* Reset counters for the new group */
 			n_violations = 0;
 			group_size = 1;
 			continue;
 		}
 		/* first columns match, but the last one does not (so contradicting) */
 		else if (multi_sort_compare_dim(k - 1, &items[i - 1], &items[i], mss) != 0)
 			n_violations++;

 		group_size++;
 	}

 	/* Compute the 'degree of validity' as (supporting/total). */
 	return (n_supporting_rows * 1.0 / data->numrows);
 }

 /*
  * detects functional dependencies between groups of columns
  *
  * Generates all possible subsets of columns (variations) and computes
  * the degree of validity for each one. For example when creating statistics
  * on three columns (a,b,c) there are 9 possible dependencies
  *
  *	   two columns			  three columns
  *	   -----------			  -------------
  *	   (a) -> b				  (a,b) -> c
  *	   (a) -> c				  (a,c) -> b
  *	   (b) -> a				  (b,c) -> a
  *	   (b) -> c
  *	   (c) -> a
  *	   (c) -> b
  */
 MVDependencies *
 statext_dependencies_build(StatsBuildData *data)
 {
 	int			i,
 				k;

 	/* result */
 	MVDependencies *dependencies = NULL;
 	MemoryContext	cxt;

 	Assert(data->nattnums >= 2);

 	/* tracks memory allocated by dependency_degree calls */
 	cxt = AllocSetContextCreate(CurrentMemoryContext,
 								"dependency_degree cxt",
 								ALLOCSET_DEFAULT_SIZES);

 	/*
 	 * We'll try build functional dependencies starting from the smallest ones
 	 * covering just 2 columns, to the largest ones, covering all columns
 	 * included in the statistics object.  We start from the smallest ones
 	 * because we want to be able to skip already implied ones.
 	 */
 	for (k = 2; k <= data->nattnums; k++)
 	{
 		AttrNumber *dependency; /* array with k elements */

 		/* prepare a DependencyGenerator of variation */
 		DependencyGenerator DependencyGenerator = DependencyGenerator_init(data->nattnums, k);

 		/* generate all possible variations of k values (out of n) */
 		while ((dependency = DependencyGenerator_next(DependencyGenerator)))
 		{
 			double		degree;
 			MVDependency *d;
 			MemoryContext oldcxt;

 			/* release memory used by dependency degree calculation */
 			oldcxt = MemoryContextSwitchTo(cxt);

 			/* compute how valid the dependency seems */
 			degree = dependency_degree(data, k, dependency);

 			MemoryContextSwitchTo(oldcxt);
 			MemoryContextReset(cxt);

 			/*
 			 * if the dependency seems entirely invalid, don't store it
 			 */
 			if (degree == 0.0)
 				continue;

 			d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
 										 + k * sizeof(AttrNumber));

 			/* copy the dependency (and keep the indexes into stxkeys) */
 			d->degree = degree;
 			d->nattributes = k;
 			for (i = 0; i < k; i++)
 				d->attributes[i] = data->attnums[dependency[i]];

 			/* initialize the list of dependencies */
 			if (dependencies == NULL)
 			{
 				dependencies
 					= (MVDependencies *) palloc0(sizeof(MVDependencies));

 				dependencies->magic = STATS_DEPS_MAGIC;
 				dependencies->type = STATS_DEPS_TYPE_BASIC;
 				dependencies->ndeps = 0;
 			}

 			dependencies->ndeps++;
 			dependencies = (MVDependencies *) repalloc(dependencies,
 													   offsetof(MVDependencies, deps)
 													   + dependencies->ndeps * sizeof(MVDependency *));

 			dependencies->deps[dependencies->ndeps - 1] = d;
 		}

 		/*
 		 * we're done with variations of k elements, so free the
 		 * DependencyGenerator
 		 */
 		DependencyGenerator_free(DependencyGenerator);
 	}

 	MemoryContextDelete(cxt);

 	return dependencies;
 }


 /*
  * Serialize list of dependencies into a bytea value.
  */
 bytea *
 statext_dependencies_serialize(MVDependencies *dependencies)
 {
 	int			i;
 	bytea	   *output;
 	char	   *tmp;
 	Size		len;

 	/* we need to store ndeps, with a number of attributes for each one */
 	len = VARHDRSZ + SizeOfHeader;

 	/* and also include space for the actual attribute numbers and degrees */
 	for (i = 0; i < dependencies->ndeps; i++)
 		len += SizeOfItem(dependencies->deps[i]->nattributes);

 	output = (bytea *) palloc0(len);
 	SET_VARSIZE(output, len);

 	tmp = VARDATA(output);

 	/* Store the base struct values (magic, type, ndeps) */
 	memcpy(tmp, &dependencies->magic, sizeof(uint32));
 	tmp += sizeof(uint32);
 	memcpy(tmp, &dependencies->type, sizeof(uint32));
 	tmp += sizeof(uint32);
 	memcpy(tmp, &dependencies->ndeps, sizeof(uint32));
 	tmp += sizeof(uint32);

 	/* store number of attributes and attribute numbers for each dependency */
 	for (i = 0; i < dependencies->ndeps; i++)
 	{
 		MVDependency *d = dependencies->deps[i];

 		memcpy(tmp, &d->degree, sizeof(double));
 		tmp += sizeof(double);

 		memcpy(tmp, &d->nattributes, sizeof(AttrNumber));
 		tmp += sizeof(AttrNumber);

 		memcpy(tmp, d->attributes, sizeof(AttrNumber) * d->nattributes);
 		tmp += sizeof(AttrNumber) * d->nattributes;

 		/* protect against overflow */
 		Assert(tmp <= ((char *) output + len));
 	}

 	/* make sure we've produced exactly the right amount of data */
 	Assert(tmp == ((char *) output + len));

 	return output;
 }

 /*
  * Reads serialized dependencies into MVDependencies structure.
  */
 MVDependencies *
 statext_dependencies_deserialize(bytea *data)
 {
 	int			i;
 	Size		min_expected_size;
 	MVDependencies *dependencies;
 	char	   *tmp;

 	if (data == NULL)
 		return NULL;

 	if (VARSIZE_ANY_EXHDR(data) < SizeOfHeader)
 		elog(ERROR, "invalid MVDependencies size %zd (expected at least %zd)",
 			 VARSIZE_ANY_EXHDR(data), SizeOfHeader);

 	/* read the MVDependencies header */
 	dependencies = (MVDependencies *) palloc0(sizeof(MVDependencies));

 	/* initialize pointer to the data part (skip the varlena header) */
 	tmp = VARDATA_ANY(data);

 	/* read the header fields and perform basic sanity checks */
 	memcpy(&dependencies->magic, tmp, sizeof(uint32));
 	tmp += sizeof(uint32);
 	memcpy(&dependencies->type, tmp, sizeof(uint32));
 	tmp += sizeof(uint32);
 	memcpy(&dependencies->ndeps, tmp, sizeof(uint32));
 	tmp += sizeof(uint32);

 	if (dependencies->magic != STATS_DEPS_MAGIC)
 		elog(ERROR, "invalid dependency magic %d (expected %d)",
 			 dependencies->magic, STATS_DEPS_MAGIC);

 	if (dependencies->type != STATS_DEPS_TYPE_BASIC)
 		elog(ERROR, "invalid dependency type %d (expected %d)",
 			 dependencies->type, STATS_DEPS_TYPE_BASIC);

 	if (dependencies->ndeps == 0)
 		elog(ERROR, "invalid zero-length item array in MVDependencies");

 	/* what minimum bytea size do we expect for those parameters */
 	min_expected_size = SizeOfItem(dependencies->ndeps);

 	if (VARSIZE_ANY_EXHDR(data) < min_expected_size)
 		elog(ERROR, "invalid dependencies size %zd (expected at least %zd)",
 			 VARSIZE_ANY_EXHDR(data), min_expected_size);

 	/* allocate space for the MCV items */
 	dependencies = repalloc(dependencies, offsetof(MVDependencies, deps)
 							+ (dependencies->ndeps * sizeof(MVDependency *)));

 	for (i = 0; i < dependencies->ndeps; i++)
 	{
 		double		degree;
 		AttrNumber	k;
 		MVDependency *d;

 		/* degree of validity */
 		memcpy(&degree, tmp, sizeof(double));
 		tmp += sizeof(double);

 		/* number of attributes */
 		memcpy(&k, tmp, sizeof(AttrNumber));
 		tmp += sizeof(AttrNumber);

 		/* is the number of attributes valid? */
 		Assert((k >= 2) && (k <= STATS_MAX_DIMENSIONS));

 		/* now that we know the number of attributes, allocate the dependency */
 		d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
 									 + (k * sizeof(AttrNumber)));

 		d->degree = degree;
 		d->nattributes = k;

 		/* copy attribute numbers */
 		memcpy(d->attributes, tmp, sizeof(AttrNumber) * d->nattributes);
 		tmp += sizeof(AttrNumber) * d->nattributes;

 		dependencies->deps[i] = d;

 		/* still within the bytea */
 		Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
 	}

 	/* we should have consumed the whole bytea exactly */
 	Assert(tmp == ((char *) data + VARSIZE_ANY(data)));

 	return dependencies;
 }

 /*
  * dependency_is_fully_matched
  *		checks that a functional dependency is fully matched given clauses on
  *		attributes (assuming the clauses are suitable equality clauses)
  */
 static bool
 dependency_is_fully_matched(MVDependency *dependency, Bitmapset *attnums)
 {
 	int			j;

 	/*
 	 * Check that the dependency actually is fully covered by clauses. We have
 	 * to translate all attribute numbers, as those are referenced
 	 */
 	for (j = 0; j < dependency->nattributes; j++)
 	{
 		int			attnum = dependency->attributes[j];

 		if (!bms_is_member(attnum, attnums))
 			return false;
 	}

 	return true;
 }

 /*
  * statext_dependencies_load
  *		Load the functional dependencies for the indicated pg_statistic_ext tuple
  */
 MVDependencies *
 statext_dependencies_load(Oid mvoid, bool allow_null)
 {
 	MVDependencies *result;
 	bool		isnull;
 	Datum		deps;
 	HeapTuple	htup;

 	htup = SearchSysCache1(STATEXTDATASTXOID, ObjectIdGetDatum(mvoid));
 	if (!HeapTupleIsValid(htup))
 		elog(ERROR, "cache lookup failed for statistics object %u", mvoid);

 	deps = SysCacheGetAttr(STATEXTDATASTXOID, htup,
 						   Anum_pg_statistic_ext_data_stxddependencies, &isnull);
 	if (isnull)
 	{
 		if (!allow_null)
 		{
 			elog(ERROR,
 				"requested statistics kind \"%c\" is not yet built for statistics object %u",
 				STATS_EXT_DEPENDENCIES, mvoid);
 		}

 		ReleaseSysCache(htup);
 		return NULL;
 	}

 	result = statext_dependencies_deserialize(DatumGetByteaPP(deps));

 	ReleaseSysCache(htup);

 	return result;
 }

 /*
  * pg_dependencies_in		- input routine for type pg_dependencies.
  *
  * pg_dependencies is real enough to be a table column, but it has no operations
  * of its own, and disallows input too
  */
 Datum
 pg_dependencies_in(PG_FUNCTION_ARGS)
 {
 	/*
 	 * pg_node_list stores the data in binary form and parsing text input is
 	 * not needed, so disallow this.
 	 */
 	ereport(ERROR,
 			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 			 errmsg("cannot accept a value of type %s", "pg_dependencies")));

 	PG_RETURN_VOID();			/* keep compiler quiet */
 }

 /*
  * pg_dependencies		- output routine for type pg_dependencies.
  */
 Datum
 pg_dependencies_out(PG_FUNCTION_ARGS)
 {
 	bytea	   *data = PG_GETARG_BYTEA_PP(0);
 	MVDependencies *dependencies = statext_dependencies_deserialize(data);
 	int			i,
 				j;
 	StringInfoData str;

 	initStringInfo(&str);
 	appendStringInfoChar(&str, '{');

 	for (i = 0; i < dependencies->ndeps; i++)
 	{
 		MVDependency *dependency = dependencies->deps[i];

 		if (i > 0)
 			appendStringInfoString(&str, ", ");

 		appendStringInfoChar(&str, '"');
 		for (j = 0; j < dependency->nattributes; j++)
 		{
 			if (j == dependency->nattributes - 1)
 				appendStringInfoString(&str, " => ");
 			else if (j > 0)
 				appendStringInfoString(&str, ", ");

 			appendStringInfo(&str, "%d", dependency->attributes[j]);
 		}
 		appendStringInfo(&str, "\": %f", dependency->degree);
 	}

 	appendStringInfoChar(&str, '}');

 	PG_RETURN_CSTRING(str.data);
 }

 /*
  * pg_dependencies_recv		- binary input routine for type pg_dependencies.
  */
 Datum
 pg_dependencies_recv(PG_FUNCTION_ARGS)
 {
 	ereport(ERROR,
 			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 			 errmsg("cannot accept a value of type %s", "pg_dependencies")));

 	PG_RETURN_VOID();			/* keep compiler quiet */
 }

 /*
  * pg_dependencies_send		- binary output routine for type pg_dependencies.
  *
  * Functional dependencies are serialized in a bytea value (although the type
  * is named differently), so let's just send that.
  */
 Datum
 pg_dependencies_send(PG_FUNCTION_ARGS)
 {
 	return byteasend(fcinfo);
 }

 /*
  * dependency_is_compatible_clause
  *		Determines if the clause is compatible with functional dependencies
  *
  * Only clauses that have the form of equality to a pseudoconstant, or can be
  * interpreted that way, are currently accepted.  Furthermore the variable
  * part of the clause must be a simple Var belonging to the specified
  * relation, whose attribute number we return in *attnum on success.
  */
 static bool
 dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
 {
 	Var		   *var;
 	Node	   *clause_expr;

 	if (IsA(clause, RestrictInfo))
 	{
 		RestrictInfo *rinfo = (RestrictInfo *) clause;

 		/* Pseudoconstants are not interesting (they couldn't contain a Var) */
 		if (rinfo->pseudoconstant)
 			return false;

 		/* Clauses referencing multiple, or no, varnos are incompatible */
 		if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
 			return false;

 		clause = (Node *) rinfo->clause;
 	}

 	if (is_opclause(clause))
 	{
 		/* If it's an opclause, check for Var = Const or Const = Var. */
 		OpExpr	   *expr = (OpExpr *) clause;

 		/* Only expressions with two arguments are candidates. */
 		if (list_length(expr->args) != 2)
 			return false;

 		/* Make sure non-selected argument is a pseudoconstant. */
 		if (is_pseudo_constant_clause(lsecond(expr->args)))
 			clause_expr = linitial(expr->args);
 		else if (is_pseudo_constant_clause(linitial(expr->args)))
 			clause_expr = lsecond(expr->args);
 		else
 			return false;

 		/*
 		 * If it's not an "=" operator, just ignore the clause, as it's not
 		 * compatible with functional dependencies.
 		 *
 		 * This uses the function for estimating selectivity, not the operator
 		 * directly (a bit awkward, but well ...).
 		 *
 		 * XXX this is pretty dubious; probably it'd be better to check btree
 		 * or hash opclass membership, so as not to be fooled by custom
 		 * selectivity functions, and to be more consistent with decisions
 		 * elsewhere in the planner.
 		 */
 		if (get_oprrest(expr->opno) != F_EQSEL)
 			return false;

 		/* OK to proceed with checking "var" */
 	}
 	else if (IsA(clause, ScalarArrayOpExpr))
 	{
 		/* If it's an scalar array operator, check for Var IN Const. */
 		ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;

 		/*
 		 * Reject ALL() variant, we only care about ANY/IN.
 		 *
 		 * XXX Maybe we should check if all the values are the same, and allow
 		 * ALL in that case? Doesn't seem very practical, though.
 		 */
 		if (!expr->useOr)
 			return false;

 		/* Only expressions with two arguments are candidates. */
 		if (list_length(expr->args) != 2)
 			return false;

 		/*
 		 * We know it's always (Var IN Const), so we assume the var is the
 		 * first argument, and pseudoconstant is the second one.
 		 */
 		if (!is_pseudo_constant_clause(lsecond(expr->args)))
 			return false;

 		clause_expr = linitial(expr->args);

 		/*
 		 * If it's not an "=" operator, just ignore the clause, as it's not
 		 * compatible with functional dependencies. The operator is identified
 		 * simply by looking at which function it uses to estimate
 		 * selectivity. That's a bit strange, but it's what other similar
 		 * places do.
 		 */
 		if (get_oprrest(expr->opno) != F_EQSEL)
 			return false;

 		/* OK to proceed with checking "var" */
 	}
 	else if (is_orclause(clause))
 	{
 		BoolExpr   *bool_expr = (BoolExpr *) clause;
 		ListCell   *lc;

 		/* start with no attribute number */
 		*attnum = InvalidAttrNumber;

 		foreach(lc, bool_expr->args)
 		{
 			AttrNumber	clause_attnum;

 			/*
 			 * Had we found incompatible clause in the arguments, treat the
 			 * whole clause as incompatible.
 			 */
 			if (!dependency_is_compatible_clause((Node *) lfirst(lc),
 												 relid, &clause_attnum))
 				return false;

 			if (*attnum == InvalidAttrNumber)
 				*attnum = clause_attnum;

 			/* ensure all the variables are the same (same attnum) */
 			if (*attnum != clause_attnum)
 				return false;
 		}

 		/* the Var is already checked by the recursive call */
 		return true;
 	}
 	else if (is_notclause(clause))
 	{
 		/*
 		 * "NOT x" can be interpreted as "x = false", so get the argument and
 		 * proceed with seeing if it's a suitable Var.
 		 */
 		clause_expr = (Node *) get_notclausearg(clause);
 	}
 	else
 	{
 		/*
 		 * A boolean expression "x" can be interpreted as "x = true", so
 		 * proceed with seeing if it's a suitable Var.
 		 */
 		clause_expr = (Node *) clause;
 	}

 	/*
 	 * We may ignore any RelabelType node above the operand.  (There won't be
 	 * more than one, since eval_const_expressions has been applied already.)
 	 */
 	if (IsA(clause_expr, RelabelType))
 		clause_expr = (Node *) ((RelabelType *) clause_expr)->arg;

 	/* We only support plain Vars for now */
 	if (!IsA(clause_expr, Var))
 		return false;

 	/* OK, we know we have a Var */
 	var = (Var *) clause_expr;

 	/* Ensure Var is from the correct relation */
 	if (var->varno != relid)
 		return false;

 	/* We also better ensure the Var is from the current level */
 	if (var->varlevelsup != 0)
 		return false;

 	/* Also ignore system attributes (we don't allow stats on those) */
 	if (!AttrNumberIsForUserDefinedAttr(var->varattno))
 		return false;

 	*attnum = var->varattno;
 	return true;
 }

 /*
  * find_strongest_dependency
  *		find the strongest dependency on the attributes
  *
  * When applying functional dependencies, we start with the strongest
  * dependencies. That is, we select the dependency that:
  *
  * (a) has all attributes covered by equality clauses
  *
  * (b) has the most attributes
  *
  * (c) has the highest degree of validity
  *
  * This guarantees that we eliminate the most redundant conditions first
  * (see the comment in dependencies_clauselist_selectivity).
  */
 static MVDependency *
 find_strongest_dependency(MVDependencies **dependencies, int ndependencies,
 						  Bitmapset *attnums)
 {
 	int			i,
 				j;
 	MVDependency *strongest = NULL;

 	/* number of attnums in clauses */
 	int			nattnums = bms_num_members(attnums);

 	/*
 	 * Iterate over the MVDependency items and find the strongest one from the
 	 * fully-matched dependencies. We do the cheap checks first, before
 	 * matching it against the attnums.
 	 */
 	for (i = 0; i < ndependencies; i++)
 	{
 		for (j = 0; j < dependencies[i]->ndeps; j++)
 		{
 			MVDependency *dependency = dependencies[i]->deps[j];

 			/*
 			 * Skip dependencies referencing more attributes than available
 			 * clauses, as those can't be fully matched.
 			 */
 			if (dependency->nattributes > nattnums)
 				continue;

 			if (strongest)
 			{
 				/* skip dependencies on fewer attributes than the strongest. */
 				if (dependency->nattributes < strongest->nattributes)
 					continue;

 				/* also skip weaker dependencies when attribute count matches */
 				if (strongest->nattributes == dependency->nattributes &&
 					strongest->degree > dependency->degree)
 					continue;
 			}

 			/*
 			 * this dependency is stronger, but we must still check that it's
 			 * fully matched to these attnums. We perform this check last as
 			 * it's slightly more expensive than the previous checks.
 			 */
 			if (dependency_is_fully_matched(dependency, attnums))
 				strongest = dependency; /* save new best match */
 		}
 	}

 	return strongest;
 }

 /*
  * clauselist_apply_dependencies
  *		Apply the specified functional dependencies to a list of clauses and
  *		return the estimated selectivity of the clauses that are compatible
  *		with any of the given dependencies.
  *
  * This will estimate all not-already-estimated clauses that are compatible
  * with functional dependencies, and which have an attribute mentioned by any
  * of the given dependencies (either as an implying or implied attribute).
  *
  * Given (lists of) clauses on attributes (a,b) and a functional dependency
  * (a=>b), the per-column selectivities P(a) and P(b) are notionally combined
  * using the formula
  *
  *		P(a,b) = f * P(a) + (1-f) * P(a) * P(b)
  *
  * where 'f' is the degree of dependency.  This reflects the fact that we
  * expect a fraction f of all rows to be consistent with the dependency
  * (a=>b), and so have a selectivity of P(a), while the remaining rows are
  * treated as independent.
  *
  * In practice, we use a slightly modified version of this formula, which uses
  * a selectivity of Min(P(a), P(b)) for the dependent rows, since the result
  * should obviously not exceed either column's individual selectivity.  I.e.,
  * we actually combine selectivities using the formula
  *
  *		P(a,b) = f * Min(P(a), P(b)) + (1-f) * P(a) * P(b)
  *
  * This can make quite a difference if the specific values matching the
  * clauses are not consistent with the functional dependency.
  */
 static Selectivity
 clauselist_apply_dependencies(PlannerInfo *root, List *clauses,
 							  int varRelid, JoinType jointype,
 							  SpecialJoinInfo *sjinfo,
 							  MVDependency **dependencies, int ndependencies,
 							  AttrNumber *list_attnums,
 							  Bitmapset **estimatedclauses)
 {
 	Bitmapset  *attnums;
 	int			i;
 	int			j;
 	int			nattrs;
 	Selectivity *attr_sel;
 	int			attidx;
 	int			listidx;
 	ListCell   *l;
 	Selectivity s1;

 	/*
 	 * Extract the attnums of all implying and implied attributes from all the
 	 * given dependencies.  Each of these attributes is expected to have at
 	 * least 1 not-already-estimated compatible clause that we will estimate
 	 * here.
 	 */
 	attnums = NULL;
 	for (i = 0; i < ndependencies; i++)
 	{
 		for (j = 0; j < dependencies[i]->nattributes; j++)
 		{
 			AttrNumber	attnum = dependencies[i]->attributes[j];

 			attnums = bms_add_member(attnums, attnum);
 		}
 	}

 	/*
 	 * Compute per-column selectivity estimates for each of these attributes,
 	 * and mark all the corresponding clauses as estimated.
 	 */
 	nattrs = bms_num_members(attnums);
 	attr_sel = (Selectivity *) palloc(sizeof(Selectivity) * nattrs);

 	attidx = 0;
 	i = -1;
 	while ((i = bms_next_member(attnums, i)) >= 0)
 	{
 		List	   *attr_clauses = NIL;
 		Selectivity simple_sel;

 		listidx = -1;
 		foreach(l, clauses)
 		{
 			Node	   *clause = (Node *) lfirst(l);

 			listidx++;
 			if (list_attnums[listidx] == i)
 			{
 				attr_clauses = lappend(attr_clauses, clause);
 				*estimatedclauses = bms_add_member(*estimatedclauses, listidx);
 			}
 		}

 		simple_sel = clauselist_selectivity_ext(root, attr_clauses, varRelid,
 												jointype, sjinfo, false, false);
 		attr_sel[attidx++] = simple_sel;
 	}

 	/*
 	 * Now combine these selectivities using the dependency information.  For
 	 * chains of dependencies such as a -> b -> c, the b -> c dependency will
 	 * come before the a -> b dependency in the array, so we traverse the
 	 * array backwards to ensure such chains are computed in the right order.
 	 *
 	 * As explained above, pairs of selectivities are combined using the
 	 * formula
 	 *
 	 * P(a,b) = f * Min(P(a), P(b)) + (1-f) * P(a) * P(b)
 	 *
 	 * to ensure that the combined selectivity is never greater than either
 	 * individual selectivity.
 	 *
 	 * Where multiple dependencies apply (e.g., a -> b -> c), we use
 	 * conditional probabilities to compute the overall result as follows:
 	 *
 	 * P(a,b,c) = P(c|a,b) * P(a,b) = P(c|a,b) * P(b|a) * P(a)
 	 *
 	 * so we replace the selectivities of all implied attributes with
 	 * conditional probabilities, that are conditional on all their implying
 	 * attributes.  The selectivities of all other non-implied attributes are
 	 * left as they are.
 	 */
 	for (i = ndependencies - 1; i >= 0; i--)
 	{
 		MVDependency *dependency = dependencies[i];
 		AttrNumber	attnum;
 		Selectivity s2;
 		double		f;

 		/* Selectivity of all the implying attributes */
 		s1 = 1.0;
 		for (j = 0; j < dependency->nattributes - 1; j++)
 		{
 			attnum = dependency->attributes[j];
 			attidx = bms_member_index(attnums, attnum);
 			s1 *= attr_sel[attidx];
 		}

 		/* Original selectivity of the implied attribute */
 		attnum = dependency->attributes[j];
 		attidx = bms_member_index(attnums, attnum);
 		s2 = attr_sel[attidx];

 		/*
 		 * Replace s2 with the conditional probability s2 given s1, computed
 		 * using the formula P(b|a) = P(a,b) / P(a), which simplifies to
 		 *
 		 * P(b|a) = f * Min(P(a), P(b)) / P(a) + (1-f) * P(b)
 		 *
 		 * where P(a) = s1, the selectivity of the implying attributes, and
 		 * P(b) = s2, the selectivity of the implied attribute.
 		 */
 		f = dependency->degree;

 		if (s1 <= s2)
 			attr_sel[attidx] = f + (1 - f) * s2;
 		else
 			attr_sel[attidx] = f * s2 / s1 + (1 - f) * s2;
 	}

 	/*
 	 * The overall selectivity of all the clauses on all these attributes is
 	 * then the product of all the original (non-implied) probabilities and
 	 * the new conditional (implied) probabilities.
 	 */
 	s1 = 1.0;
 	for (i = 0; i < nattrs; i++)
 		s1 *= attr_sel[i];

 	CLAMP_PROBABILITY(s1);

 	pfree(attr_sel);
 	bms_free(attnums);

 	return s1;
 }

 /*
  * dependency_is_compatible_expression
  *		Determines if the expression is compatible with functional dependencies
  *
  * Similar to dependency_is_compatible_clause, but doesn't enforce that the
  * expression is a simple Var. OTOH we check that there's at least one
  * statistics object matching the expression.
  */
 static bool
 dependency_is_compatible_expression(Node *clause, Index relid, List *statlist, Node **expr)
 {
 	List	   *vars;
 	ListCell   *lc,
 			   *lc2;
 	Node	   *clause_expr;

 	if (IsA(clause, RestrictInfo))
 	{
 		RestrictInfo *rinfo = (RestrictInfo *) clause;

 		/* Pseudoconstants are not interesting (they couldn't contain a Var) */
 		if (rinfo->pseudoconstant)
 			return false;

 		/* Clauses referencing multiple, or no, varnos are incompatible */
 		if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
 			return false;

 		clause = (Node *) rinfo->clause;
 	}

 	if (is_opclause(clause))
 	{
 		/* If it's an opclause, check for Var = Const or Const = Var. */
 		OpExpr	   *expr = (OpExpr *) clause;

 		/* Only expressions with two arguments are candidates. */
 		if (list_length(expr->args) != 2)
 			return false;

 		/* Make sure non-selected argument is a pseudoconstant. */
 		if (is_pseudo_constant_clause(lsecond(expr->args)))
 			clause_expr = linitial(expr->args);
 		else if (is_pseudo_constant_clause(linitial(expr->args)))
 			clause_expr = lsecond(expr->args);
 		else
 			return false;

 		/*
 		 * If it's not an "=" operator, just ignore the clause, as it's not
 		 * compatible with functional dependencies.
 		 *
 		 * This uses the function for estimating selectivity, not the operator
 		 * directly (a bit awkward, but well ...).
 		 *
 		 * XXX this is pretty dubious; probably it'd be better to check btree
 		 * or hash opclass membership, so as not to be fooled by custom
 		 * selectivity functions, and to be more consistent with decisions
 		 * elsewhere in the planner.
 		 */
 		if (get_oprrest(expr->opno) != F_EQSEL)
 			return false;

 		/* OK to proceed with checking "var" */
 	}
 	else if (IsA(clause, ScalarArrayOpExpr))
 	{
 		/* If it's an scalar array operator, check for Var IN Const. */
 		ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;

 		/*
 		 * Reject ALL() variant, we only care about ANY/IN.
 		 *
 		 * FIXME Maybe we should check if all the values are the same, and
 		 * allow ALL in that case? Doesn't seem very practical, though.
 		 */
 		if (!expr->useOr)
 			return false;

 		/* Only expressions with two arguments are candidates. */
 		if (list_length(expr->args) != 2)
 			return false;

 		/*
 		 * We know it's always (Var IN Const), so we assume the var is the
 		 * first argument, and pseudoconstant is the second one.
 		 */
 		if (!is_pseudo_constant_clause(lsecond(expr->args)))
 			return false;

 		clause_expr = linitial(expr->args);

 		/*
 		 * If it's not an "=" operator, just ignore the clause, as it's not
 		 * compatible with functional dependencies. The operator is identified
 		 * simply by looking at which function it uses to estimate
 		 * selectivity. That's a bit strange, but it's what other similar
 		 * places do.
 		 */
 		if (get_oprrest(expr->opno) != F_EQSEL)
 			return false;

 		/* OK to proceed with checking "var" */
 	}
 	else if (is_orclause(clause))
 	{
 		BoolExpr   *bool_expr = (BoolExpr *) clause;
 		ListCell   *lc;

 		/* start with no expression (we'll use the first match) */
 		*expr = NULL;

 		foreach(lc, bool_expr->args)
 		{
 			Node	   *or_expr = NULL;

 			/*
 			 * Had we found incompatible expression in the arguments, treat
 			 * the whole expression as incompatible.
 			 */
 			if (!dependency_is_compatible_expression((Node *) lfirst(lc), relid,
 													 statlist, &or_expr))
 				return false;

 			if (*expr == NULL)
 				*expr = or_expr;

 			/* ensure all the expressions are the same */
 			if (!equal(or_expr, *expr))
 				return false;
 		}

 		/* the expression is already checked by the recursive call */
 		return true;
 	}
 	else if (is_notclause(clause))
 	{
 		/*
 		 * "NOT x" can be interpreted as "x = false", so get the argument and
 		 * proceed with seeing if it's a suitable Var.
 		 */
 		clause_expr = (Node *) get_notclausearg(clause);
 	}
 	else
 	{
 		/*
 		 * A boolean expression "x" can be interpreted as "x = true", so
 		 * proceed with seeing if it's a suitable Var.
 		 */
 		clause_expr = (Node *) clause;
 	}

 	/*
 	 * We may ignore any RelabelType node above the operand.  (There won't be
 	 * more than one, since eval_const_expressions has been applied already.)
 	 */
 	if (IsA(clause_expr, RelabelType))
 		clause_expr = (Node *) ((RelabelType *) clause_expr)->arg;

 	vars = pull_var_clause(clause_expr, 0);

 	foreach(lc, vars)
 	{
 		Var		   *var = (Var *) lfirst(lc);

 		/* Ensure Var is from the correct relation */
 		if (var->varno != relid)
 			return false;

 		/* We also better ensure the Var is from the current level */
 		if (var->varlevelsup != 0)
 			return false;

 		/* Also ignore system attributes (we don't allow stats on those) */
 		if (!AttrNumberIsForUserDefinedAttr(var->varattno))
 			return false;
 	}

 	/*
 	 * Check if we actually have a matching statistics for the expression.
 	 *
 	 * XXX Maybe this is an overkill. We'll eliminate the expressions later.
 	 */
 	foreach(lc, statlist)
 	{
 		StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);

 		/* ignore stats without dependencies */
 		if (info->kind != STATS_EXT_DEPENDENCIES)
 			continue;

 		foreach(lc2, info->exprs)
 		{
 			Node	   *stat_expr = (Node *) lfirst(lc2);

 			if (equal(clause_expr, stat_expr))
 			{
 				*expr = stat_expr;
 				return true;
 			}
 		}
 	}

 	return false;
 }

 /*
  * dependencies_clauselist_selectivity
  *		Return the estimated selectivity of (a subset of) the given clauses
  *		using functional dependency statistics, or 1.0 if no useful functional
  *		dependency statistic exists.
  *
  * 'estimatedclauses' is an input/output argument that gets a bit set
  * corresponding to the (zero-based) list index of each clause that is included
  * in the estimated selectivity.
  *
  * Given equality clauses on attributes (a,b) we find the strongest dependency
  * between them, i.e. either (a=>b) or (b=>a). Assuming (a=>b) is the selected
  * dependency, we then combine the per-clause selectivities using the formula
  *
  *	   P(a,b) = f * P(a) + (1-f) * P(a) * P(b)
  *
  * where 'f' is the degree of the dependency.  (Actually we use a slightly
  * modified version of this formula -- see clauselist_apply_dependencies()).
  *
  * With clauses on more than two attributes, the dependencies are applied
  * recursively, starting with the widest/strongest dependencies. For example
  * P(a,b,c) is first split like this:
  *
  *	   P(a,b,c) = f * P(a,b) + (1-f) * P(a,b) * P(c)
  *
  * assuming (a,b=>c) is the strongest dependency.
  */
 Selectivity
 dependencies_clauselist_selectivity(PlannerInfo *root,
 									List *clauses,
 									int varRelid,
 									JoinType jointype,
 									SpecialJoinInfo *sjinfo,
 									RelOptInfo *rel,
 									Bitmapset **estimatedclauses)
 {
 	Selectivity s1 = 1.0;
 	ListCell   *l;
 	Bitmapset  *clauses_attnums = NULL;
 	AttrNumber *list_attnums;
 	int			listidx;
 	MVDependencies **func_dependencies;
 	int			nfunc_dependencies;
 	int			total_ndeps;
 	MVDependency **dependencies;
 	int			ndependencies;
 	int			i;
 	AttrNumber	attnum_offset;
 	RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);

 	/* unique expressions */
 	Node	  **unique_exprs;
 	int			unique_exprs_cnt;

 	/*
 	 * When dealing with regular inheritance trees, ignore extended stats
 	 * (which were built without data from child rels, and thus do not
 	 * represent them). For partitioned tables data there's no data in the
 	 * non-leaf relations, so we build stats only for the inheritance tree.
 	 * So for partitioned tables we do consider extended stats.
 	 */
 	if (rte->inh && rte->relkind != RELKIND_PARTITIONED_TABLE)
 		return 1.0;

 	/* check if there's any stats that might be useful for us. */
 	if (!has_stats_of_kind(rel->statlist, STATS_EXT_DEPENDENCIES))
 		return 1.0;

 	list_attnums = (AttrNumber *) palloc(sizeof(AttrNumber) *
 										 list_length(clauses));

 	/*
 	 * We allocate space as if every clause was a unique expression, although
 	 * that's probably overkill. Some will be simple column references that
 	 * we'll translate to attnums, and there might be duplicates. But it's
 	 * easier and cheaper to just do one allocation than repalloc later.
 	 */
 	unique_exprs = (Node **) palloc(sizeof(Node *) * list_length(clauses));
 	unique_exprs_cnt = 0;

 	/*
 	 * Pre-process the clauses list to extract the attnums seen in each item.
 	 * We need to determine if there's any clauses which will be useful for
 	 * dependency selectivity estimations. Along the way we'll record all of
 	 * the attnums for each clause in a list which we'll reference later so we
 	 * don't need to repeat the same work again. We'll also keep track of all
 	 * attnums seen.
 	 *
 	 * We also skip clauses that we already estimated using different types of
 	 * statistics (we treat them as incompatible).
 	 *
 	 * To handle expressions, we assign them negative attnums, as if it was a
 	 * system attribute (this is fine, as we only allow extended stats on user
 	 * attributes). And then we offset everything by the number of
 	 * expressions, so that we can store the values in a bitmapset.
 	 */
 	listidx = 0;
 	foreach(l, clauses)
 	{
 		Node	   *clause = (Node *) lfirst(l);
 		AttrNumber	attnum;
 		Node	   *expr = NULL;

 		/* ignore clause by default */
 		list_attnums[listidx] = InvalidAttrNumber;

 		if (!bms_is_member(listidx, *estimatedclauses))
 		{
 			/*
 			 * If it's a simple column reference, just extract the attnum. If
 			 * it's an expression, assign a negative attnum as if it was a
 			 * system attribute.
 			 */
 			if (dependency_is_compatible_clause(clause, rel->relid, &attnum))
 			{
 				list_attnums[listidx] = attnum;
 			}
 			else if (dependency_is_compatible_expression(clause, rel->relid,
 														 rel->statlist,
 														 &expr))
 			{
 				/* special attnum assigned to this expression */
 				attnum = InvalidAttrNumber;

 				Assert(expr != NULL);

 				/* If the expression is duplicate, use the same attnum. */
 				for (i = 0; i < unique_exprs_cnt; i++)
 				{
 					if (equal(unique_exprs[i], expr))
 					{
 						/* negative attribute number to expression */
 						attnum = -(i + 1);
 						break;
 					}
 				}

 				/* not found in the list, so add it */
 				if (attnum == InvalidAttrNumber)
 				{
 					unique_exprs[unique_exprs_cnt++] = expr;

 					/* after incrementing the value, to get -1, -2, ... */
 					attnum = (-unique_exprs_cnt);
 				}

 				/* remember which attnum was assigned to this clause */
 				list_attnums[listidx] = attnum;
 			}
 		}

 		listidx++;
 	}

 	Assert(listidx == list_length(clauses));

 	/*
 	 * How much we need to offset the attnums? If there are no expressions,
 	 * then no offset is needed. Otherwise we need to offset enough for the
 	 * lowest value (-unique_exprs_cnt) to become 1.
 	 */
 	if (unique_exprs_cnt > 0)
 		attnum_offset = (unique_exprs_cnt + 1);
 	else
 		attnum_offset = 0;

 	/*
 	 * Now that we know how many expressions there are, we can offset the
 	 * values just enough to build the bitmapset.
 	 */
 	for (i = 0; i < list_length(clauses); i++)
 	{
 		AttrNumber	attnum;

 		/* ignore incompatible or already estimated clauses */
 		if (list_attnums[i] == InvalidAttrNumber)
 			continue;

 		/* make sure the attnum is in the expected range */
 		Assert(list_attnums[i] >= (-unique_exprs_cnt));
 		Assert(list_attnums[i] <= MaxHeapAttributeNumber);

 		/* make sure the attnum is positive (valid AttrNumber) */
 		attnum = list_attnums[i] + attnum_offset;

 		/*
 		 * Either it's a regular attribute, or it's an expression, in which
 		 * case we must not have seen it before (expressions are unique).
 		 *
 		 * XXX Check whether it's a regular attribute has to be done using the
 		 * original attnum, while the second check has to use the value with
 		 * an offset.
 		 */
 		Assert(AttrNumberIsForUserDefinedAttr(list_attnums[i]) ||
 			   !bms_is_member(attnum, clauses_attnums));

 		/*
 		 * Remember the offset attnum, both for attributes and expressions.
 		 * We'll pass list_attnums to clauselist_apply_dependencies, which
 		 * uses it to identify clauses in a bitmap. We could also pass the
 		 * offset, but this is more convenient.
 		 */
 		list_attnums[i] = attnum;

 		clauses_attnums = bms_add_member(clauses_attnums, attnum);
 	}

 	/*
 	 * If there's not at least two distinct attnums and expressions, then
 	 * reject the whole list of clauses. We must return 1.0 so the calling
 	 * function's selectivity is unaffected.
 	 */
 	if (bms_membership(clauses_attnums) != BMS_MULTIPLE)
 	{
 		bms_free(clauses_attnums);
 		pfree(list_attnums);
 		return 1.0;
 	}

 	/*
 	 * Load all functional dependencies matching at least two parameters. We
 	 * can simply consider all dependencies at once, without having to search
 	 * for the best statistics object.
 	 *
 	 * To not waste cycles and memory, we deserialize dependencies only for
 	 * statistics that match at least two attributes. The array is allocated
 	 * with the assumption that all objects match - we could grow the array to
 	 * make it just the right size, but it's likely wasteful anyway thanks to
 	 * moving the freed chunks to freelists etc.
 	 */
 	func_dependencies = (MVDependencies **) palloc(sizeof(MVDependencies *) *
 												   list_length(rel->statlist));
 	nfunc_dependencies = 0;
 	total_ndeps = 0;

 	foreach(l, rel->statlist)
 	{
 		StatisticExtInfo *stat = (StatisticExtInfo *) lfirst(l);
 		int			nmatched;
 		int			nexprs;
 		int			k;
 		MVDependencies *deps;

 		/* skip statistics that are not of the correct type */
 		if (stat->kind != STATS_EXT_DEPENDENCIES)
 			continue;

 		/*
 		 * Count matching attributes - we have to undo the attnum offsets. The
 		 * input attribute numbers are not offset (expressions are not
 		 * included in stat->keys, so it's not necessary). But we need to
 		 * offset it before checking against clauses_attnums.
 		 */
 		nmatched = 0;
 		k = -1;
 		while ((k = bms_next_member(stat->keys, k)) >= 0)
 		{
 			AttrNumber	attnum = (AttrNumber) k;

 			/* skip expressions */
 			if (!AttrNumberIsForUserDefinedAttr(attnum))
 				continue;

 			/* apply the same offset as above */
 			attnum += attnum_offset;

 			if (bms_is_member(attnum, clauses_attnums))
 				nmatched++;
 		}

 		/* count matching expressions */
 		nexprs = 0;
 		for (i = 0; i < unique_exprs_cnt; i++)
 		{
 			ListCell   *lc;

 			foreach(lc, stat->exprs)
 			{
 				Node	   *stat_expr = (Node *) lfirst(lc);

 				/* try to match it */
 				if (equal(stat_expr, unique_exprs[i]))
 					nexprs++;
 			}
 		}

 		/*
 		 * Skip objects matching fewer than two attributes/expressions from
 		 * clauses.
 		 */
 		if (nmatched + nexprs < 2)
 			continue;

 		deps = statext_dependencies_load(stat->statOid, false);

 		/*
 		 * The expressions may be represented by different attnums in the
 		 * stats, we need to remap them to be consistent with the clauses.
 		 * That will make the later steps (e.g. picking the strongest item and
 		 * so on) much simpler and cheaper, because it won't need to care
 		 * about the offset at all.
 		 *
 		 * When we're at it, we can ignore dependencies that are not fully
 		 * matched by clauses (i.e. referencing attributes or expressions that
 		 * are not in the clauses).
 		 *
 		 * We have to do this for all statistics, as long as there are any
 		 * expressions - we need to shift the attnums in all dependencies.
 		 *
 		 * XXX Maybe we should do this always, because it also eliminates some
 		 * of the dependencies early. It might be cheaper than having to walk
 		 * the longer list in find_strongest_dependency later, especially as
 		 * we need to do that repeatedly?
 		 *
 		 * XXX We have to do this even when there are no expressions in
 		 * clauses, otherwise find_strongest_dependency may fail for stats
 		 * with expressions (due to lookup of negative value in bitmap). So we
 		 * need to at least filter out those dependencies. Maybe we could do
 		 * it in a cheaper way (if there are no expr clauses, we can just
 		 * discard all negative attnums without any lookups).
 		 */
 		if (unique_exprs_cnt > 0 || stat->exprs != NIL)
 		{
 			int			ndeps = 0;

 			for (i = 0; i < deps->ndeps; i++)
 			{
 				bool		skip = false;
 				MVDependency *dep = deps->deps[i];
 				int			j;

 				for (j = 0; j < dep->nattributes; j++)
 				{
 					int			idx;
 					Node	   *expr;
 					int			k;
 					AttrNumber	unique_attnum = InvalidAttrNumber;
 					AttrNumber	attnum;

 					/* undo the per-statistics offset */
 					attnum = dep->attributes[j];

 					/*
 					 * For regular attributes we can simply check if it
 					 * matches any clause. If there's no matching clause, we
 					 * can just ignore it. We need to offset the attnum
 					 * though.
 					 */
 					if (AttrNumberIsForUserDefinedAttr(attnum))
 					{
 						dep->attributes[j] = attnum + attnum_offset;

 						if (!bms_is_member(dep->attributes[j], clauses_attnums))
 						{
 							skip = true;
 							break;
 						}

 						continue;
 					}

 					/*
 					 * the attnum should be a valid system attnum (-1, -2,
 					 * ...)
 					 */
 					Assert(AttributeNumberIsValid(attnum));

 					/*
 					 * For expressions, we need to do two translations. First
 					 * we have to translate the negative attnum to index in
 					 * the list of expressions (in the statistics object).
 					 * Then we need to see if there's a matching clause. The
 					 * index of the unique expression determines the attnum
 					 * (and we offset it).
 					 */
 					idx = -(1 + attnum);

 					/* Is the expression index is valid? */
 					Assert((idx >= 0) && (idx < list_length(stat->exprs)));

 					expr = (Node *) list_nth(stat->exprs, idx);

 					/* try to find the expression in the unique list */
 					for (k = 0; k < unique_exprs_cnt; k++)
 					{
 						/*
 						 * found a matching unique expression, use the attnum
 						 * (derived from index of the unique expression)
 						 */
 						if (equal(unique_exprs[k], expr))
 						{
 							unique_attnum = -(k + 1) + attnum_offset;
 							break;
 						}
 					}

 					/*
 					 * Found no matching expression, so we can simply skip
 					 * this dependency, because there's no chance it will be
 					 * fully covered.
 					 */
 					if (unique_attnum == InvalidAttrNumber)
 					{
 						skip = true;
 						break;
 					}

 					/* otherwise remap it to the new attnum */
 					dep->attributes[j] = unique_attnum;
 				}

 				/* if found a matching dependency, keep it */
 				if (!skip)
 				{
 					/* maybe we've skipped something earlier, so move it */
 					if (ndeps != i)
 						deps->deps[ndeps] = deps->deps[i];

 					ndeps++;
 				}
 			}

 			deps->ndeps = ndeps;
 		}

 		/*
 		 * It's possible we've removed all dependencies, in which case we
 		 * don't bother adding it to the list.
 		 */
 		if (deps->ndeps > 0)
 		{
 			func_dependencies[nfunc_dependencies] = deps;
 			total_ndeps += deps->ndeps;
 			nfunc_dependencies++;
 		}
 	}

 	/* if no matching stats could be found then we've nothing to do */
 	if (nfunc_dependencies == 0)
 	{
 		pfree(func_dependencies);
 		bms_free(clauses_attnums);
 		pfree(list_attnums);
 		pfree(unique_exprs);
 		return 1.0;
 	}

 	/*
 	 * Work out which dependencies we can apply, starting with the
 	 * widest/strongest ones, and proceeding to smaller/weaker ones.
 	 */
 	dependencies = (MVDependency **) palloc(sizeof(MVDependency *) *
 											total_ndeps);
 	ndependencies = 0;

 	while (true)
 	{
 		MVDependency *dependency;
 		AttrNumber	attnum;

 		/* the widest/strongest dependency, fully matched by clauses */
 		dependency = find_strongest_dependency(func_dependencies,
 											   nfunc_dependencies,
 											   clauses_attnums);
 		if (!dependency)
 			break;

 		dependencies[ndependencies++] = dependency;

 		/* Ignore dependencies using this implied attribute in later loops */
 		attnum = dependency->attributes[dependency->nattributes - 1];
 		clauses_attnums = bms_del_member(clauses_attnums, attnum);
 	}

 	/*
 	 * If we found applicable dependencies, use them to estimate all
 	 * compatible clauses on attributes that they refer to.
 	 */
 	if (ndependencies != 0)
 		s1 = clauselist_apply_dependencies(root, clauses, varRelid, jointype,
 										   sjinfo, dependencies, ndependencies,
 										   list_attnums, estimatedclauses);

 	/* free deserialized functional dependencies (and then the array) */
 	for (i = 0; i < nfunc_dependencies; i++)
 		pfree(func_dependencies[i]);

 	pfree(dependencies);
 	pfree(func_dependencies);
 	bms_free(clauses_attnums);
 	pfree(list_attnums);
 	pfree(unique_exprs);

 	return s1;
 }