src/backend/executor/execGrouping.c - cloudberry - Git at Google

 /*-------------------------------------------------------------------------
  *
  * execGrouping.c
  *	  executor utility routines for grouping, hashing, and aggregation
  *
  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  *
  * IDENTIFICATION
  *	  src/backend/executor/execGrouping.c
  *
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"

 #include <limits.h>

 #include "access/parallel.h"
 #include "common/hashfn.h"
 #include "executor/executor.h"
 #include "miscadmin.h"
 #include "utils/lsyscache.h"
 #include "utils/memutils.h"

 static int	TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2);
 static inline uint32 TupleHashTableHash_internal(struct tuplehash_hash *tb,
 												 const MinimalTuple tuple);
 static inline TupleHashEntry LookupTupleHashEntry_internal(TupleHashTable hashtable,
 														   TupleTableSlot *slot,
 														   bool *isnew, uint32 hash);

 /*
  * Define parameters for tuple hash table code generation. The interface is
  * *also* declared in execnodes.h (to generate the types, which are externally
  * visible).
  */
 #define SH_PREFIX tuplehash
 #define SH_ELEMENT_TYPE TupleHashEntryData
 #define SH_KEY_TYPE MinimalTuple
 #define SH_KEY firstTuple
 #define SH_HASH_KEY(tb, key) TupleHashTableHash_internal(tb, key)
 #define SH_EQUAL(tb, a, b) TupleHashTableMatch(tb, a, b) == 0
 #define SH_SCOPE extern
 #define SH_STORE_HASH
 #define SH_GET_HASH(tb, a) a->hash
 #define SH_DEFINE
 #include "lib/simplehash.h"


 /*****************************************************************************
  *		Utility routines for grouping tuples together
  *****************************************************************************/

 /*
  * execTuplesMatchPrepare
  *		Build expression that can be evaluated using ExecQual(), returning
  *		whether an ExprContext's inner/outer tuples are NOT DISTINCT
  */
 ExprState *
 execTuplesMatchPrepare(TupleDesc desc,
 					   int numCols,
 					   const AttrNumber *keyColIdx,
 					   const Oid *eqOperators,
 					   const Oid *collations,
 					   PlanState *parent)
 {
 	Oid		   *eqFunctions = (Oid *) palloc(numCols * sizeof(Oid));
 	int			i;
 	ExprState  *expr;

 	if (numCols == 0)
 		return NULL;

 	/* lookup equality functions */
 	for (i = 0; i < numCols; i++)
 		eqFunctions[i] = get_opcode(eqOperators[i]);

 	/* build actual expression */
 	expr = ExecBuildGroupingEqual(desc, desc, NULL, NULL,
 								  numCols, keyColIdx, eqFunctions, collations,
 								  parent);

 	return expr;
 }

 /*
  * execTuplesHashPrepare
  *		Look up the equality and hashing functions needed for a TupleHashTable.
  *
  * This is similar to execTuplesMatchPrepare, but we also need to find the
  * hash functions associated with the equality operators.  *eqFunctions and
  * *hashFunctions receive the palloc'd result arrays.
  *
  * Note: we expect that the given operators are not cross-type comparisons.
  */
 void
 execTuplesHashPrepare(int numCols,
 					  const Oid *eqOperators,
 					  Oid **eqFuncOids,
 					  FmgrInfo **hashFunctions)
 {
 	int			i;

 	*eqFuncOids = (Oid *) palloc(numCols * sizeof(Oid));
 	*hashFunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));

 	for (i = 0; i < numCols; i++)
 	{
 		Oid			eq_opr = eqOperators[i];
 		Oid			eq_function;
 		Oid			left_hash_function;
 		Oid			right_hash_function;

 		eq_function = get_opcode(eq_opr);
 		if (!get_op_hash_functions(eq_opr,
 								   &left_hash_function, &right_hash_function))
 			elog(ERROR, "could not find hash function for hash operator %u",
 				 eq_opr);
 		/* We're not supporting cross-type cases here */
 		Assert(left_hash_function == right_hash_function);
 		(*eqFuncOids)[i] = eq_function;
 		fmgr_info(right_hash_function, &(*hashFunctions)[i]);
 	}
 }


 /*****************************************************************************
  *		Utility routines for all-in-memory hash tables
  *
  * These routines build hash tables for grouping tuples together (eg, for
  * hash aggregation).  There is one entry for each not-distinct set of tuples
  * presented.
  *****************************************************************************/

 /*
  * Construct an empty TupleHashTable
  *
  *	numCols, keyColIdx: identify the tuple fields to use as lookup key
  *	eqfunctions: equality comparison functions to use
  *	hashfunctions: datatype-specific hashing functions to use
  *	nbuckets: initial estimate of hashtable size
  *	additionalsize: size of data stored in ->additional
  *	metacxt: memory context for long-lived allocation, but not per-entry data
  *	tablecxt: memory context in which to store table entries
  *	tempcxt: short-lived context for evaluation hash and comparison functions
  *
  * The function arrays may be made with execTuplesHashPrepare().  Note they
  * are not cross-type functions, but expect to see the table datatype(s)
  * on both sides.
  *
  * Note that keyColIdx, eqfunctions, and hashfunctions must be allocated in
  * storage that will live as long as the hashtable does.
  */
 TupleHashTable
 BuildTupleHashTableExt(PlanState *parent,
 					   TupleDesc inputDesc,
 					   int numCols, AttrNumber *keyColIdx,
 					   const Oid *eqfuncoids,
 					   FmgrInfo *hashfunctions,
 					   Oid *collations,
 					   long nbuckets, Size additionalsize,
 					   MemoryContext metacxt,
 					   MemoryContext tablecxt,
 					   MemoryContext tempcxt,
 					   bool use_variable_hash_iv)
 {
 	TupleHashTable hashtable;
 	Size		entrysize = sizeof(TupleHashEntryData) + additionalsize;
 	Size		hash_mem_limit;
 	MemoryContext oldcontext;
 	bool		allow_jit;

 	/*
 	 * Many callers pass "long" values for nbuckets, which means that we can
 	 * receive a bogus value on 64-bit machines.  It seems unwise to change
 	 * this function's signature in released branches, so instead assume that
 	 * a negative input means long->int overflow occurred.
 	 */
 	if (nbuckets <= 0)
 		nbuckets = INT_MAX;

 	Assert(nbuckets > 0);

 	/* Limit initial table size request to not more than hash_mem */
 	hash_mem_limit = get_hash_memory_limit() / entrysize;
 	if (nbuckets > hash_mem_limit)
 		nbuckets = hash_mem_limit;

 	oldcontext = MemoryContextSwitchTo(metacxt);

 	hashtable = (TupleHashTable) palloc(sizeof(TupleHashTableData));

 	hashtable->numCols = numCols;
 	hashtable->keyColIdx = keyColIdx;
 	hashtable->tab_hash_funcs = hashfunctions;
 	hashtable->tab_collations = collations;
 	hashtable->tablecxt = tablecxt;
 	hashtable->tempcxt = tempcxt;
 	hashtable->entrysize = entrysize;
 	hashtable->tableslot = NULL;	/* will be made on first lookup */
 	hashtable->inputslot = NULL;
 	hashtable->in_hash_funcs = NULL;
 	hashtable->cur_eq_func = NULL;

 	/*
 	 * If parallelism is in use, even if the leader backend is performing the
 	 * scan itself, we don't want to create the hashtable exactly the same way
 	 * in all workers. As hashtables are iterated over in keyspace-order,
 	 * doing so in all processes in the same way is likely to lead to
 	 * "unbalanced" hashtables when the table size initially is
 	 * underestimated.
 	 */
 	if (use_variable_hash_iv)
 		hashtable->hash_iv = murmurhash32(ParallelWorkerNumber);
 	else
 		hashtable->hash_iv = 0;

 	hashtable->hashtab = tuplehash_create(metacxt, nbuckets, hashtable);

 	/*
 	 * We copy the input tuple descriptor just for safety --- we assume all
 	 * input tuples will have equivalent descriptors.
 	 */
 	hashtable->tableslot = MakeSingleTupleTableSlot(CreateTupleDescCopy(inputDesc),
 													&TTSOpsMinimalTuple);

 	/*
 	 * If the old reset interface is used (i.e. BuildTupleHashTable, rather
 	 * than BuildTupleHashTableExt), allowing JIT would lead to the generated
 	 * functions to a) live longer than the query b) be re-generated each time
 	 * the table is being reset. Therefore prevent JIT from being used in that
 	 * case, by not providing a parent node (which prevents accessing the
 	 * JitContext in the EState).
 	 */
 	allow_jit = metacxt != tablecxt;

 	/* build comparator for all columns */
 	/* XXX: should we support non-minimal tuples for the inputslot? */
 	hashtable->tab_eq_func = ExecBuildGroupingEqual(inputDesc, inputDesc,
 													&TTSOpsMinimalTuple, &TTSOpsMinimalTuple,
 													numCols,
 													keyColIdx, eqfuncoids, collations,
 													allow_jit ? parent : NULL);

 	/*
 	 * While not pretty, it's ok to not shut down this context, but instead
 	 * rely on the containing memory context being reset, as
 	 * ExecBuildGroupingEqual() only builds a very simple expression calling
 	 * functions (i.e. nothing that'd employ RegisterExprContextCallback()).
 	 */
 	hashtable->exprcontext = CreateStandaloneExprContext();

 	MemoryContextSwitchTo(oldcontext);

 	return hashtable;
 }

 /*
  * BuildTupleHashTable is a backwards-compatibilty wrapper for
  * BuildTupleHashTableExt(), that allocates the hashtable's metadata in
  * tablecxt. Note that hashtables created this way cannot be reset leak-free
  * with ResetTupleHashTable().
  */
 TupleHashTable
 BuildTupleHashTable(PlanState *parent,
 					TupleDesc inputDesc,
 					int numCols, AttrNumber *keyColIdx,
 					const Oid *eqfuncoids,
 					FmgrInfo *hashfunctions,
 					Oid *collations,
 					long nbuckets, Size additionalsize,
 					MemoryContext tablecxt,
 					MemoryContext tempcxt,
 					bool use_variable_hash_iv)
 {
 	return BuildTupleHashTableExt(parent,
 								  inputDesc,
 								  numCols, keyColIdx,
 								  eqfuncoids,
 								  hashfunctions,
 								  collations,
 								  nbuckets, additionalsize,
 								  tablecxt,
 								  tablecxt,
 								  tempcxt,
 								  use_variable_hash_iv);
 }

 /*
  * Reset contents of the hashtable to be empty, preserving all the non-content
  * state. Note that the tablecxt passed to BuildTupleHashTableExt() should
  * also be reset, otherwise there will be leaks.
  */
 void
 ResetTupleHashTable(TupleHashTable hashtable)
 {
 	tuplehash_reset(hashtable->hashtab);
 }

 /*
  * Find or create a hashtable entry for the tuple group containing the
  * given tuple.  The tuple must be the same type as the hashtable entries.
  *
  * If isnew is NULL, we do not create new entries; we return NULL if no
  * match is found.
  *
  * If hash is not NULL, we set it to the calculated hash value. This allows
  * callers access to the hash value even if no entry is returned.
  *
  * If isnew isn't NULL, then a new entry is created if no existing entry
  * matches.  On return, *isnew is true if the entry is newly created,
  * false if it existed already.  ->additional_data in the new entry has
  * been zeroed.
  */
 TupleHashEntry
 LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
 					 bool *isnew, uint32 *hash)
 {
 	TupleHashEntry entry;
 	MemoryContext oldContext;
 	uint32		local_hash;

 	/* Need to run the hash functions in short-lived context */
 	oldContext = MemoryContextSwitchTo(hashtable->tempcxt);

 	/* set up data needed by hash and match functions */
 	hashtable->inputslot = slot;
 	hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
 	hashtable->cur_eq_func = hashtable->tab_eq_func;

 	local_hash = TupleHashTableHash_internal(hashtable->hashtab, NULL);
 	entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, local_hash);

 	if (hash != NULL)
 		*hash = local_hash;

 	Assert(entry == NULL || entry->hash == local_hash);

 	MemoryContextSwitchTo(oldContext);

 	return entry;
 }

 /*
  * Compute the hash value for a tuple
  */
 uint32
 TupleHashTableHash(TupleHashTable hashtable, TupleTableSlot *slot)
 {
 	MemoryContext oldContext;
 	uint32		hash;

 	hashtable->inputslot = slot;
 	hashtable->in_hash_funcs = hashtable->tab_hash_funcs;

 	/* Need to run the hash functions in short-lived context */
 	oldContext = MemoryContextSwitchTo(hashtable->tempcxt);

 	hash = TupleHashTableHash_internal(hashtable->hashtab, NULL);

 	MemoryContextSwitchTo(oldContext);

 	return hash;
 }

 /*
  * A variant of LookupTupleHashEntry for callers that have already computed
  * the hash value.
  */
 TupleHashEntry
 LookupTupleHashEntryHash(TupleHashTable hashtable, TupleTableSlot *slot,
 						 bool *isnew, uint32 hash)
 {
 	TupleHashEntry entry;
 	MemoryContext oldContext;

 	/* Need to run the hash functions in short-lived context */
 	oldContext = MemoryContextSwitchTo(hashtable->tempcxt);

 	/* set up data needed by hash and match functions */
 	hashtable->inputslot = slot;
 	hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
 	hashtable->cur_eq_func = hashtable->tab_eq_func;

 	entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, hash);
 	Assert(entry == NULL || entry->hash == hash);

 	MemoryContextSwitchTo(oldContext);

 	return entry;
 }

 /*
  * Search for a hashtable entry matching the given tuple.  No entry is
  * created if there's not a match.  This is similar to the non-creating
  * case of LookupTupleHashEntry, except that it supports cross-type
  * comparisons, in which the given tuple is not of the same type as the
  * table entries.  The caller must provide the hash functions to use for
  * the input tuple, as well as the equality functions, since these may be
  * different from the table's internal functions.
  */
 TupleHashEntry
 FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
 				   ExprState *eqcomp,
 				   FmgrInfo *hashfunctions)
 {
 	TupleHashEntry entry;
 	MemoryContext oldContext;
 	MinimalTuple key;

 	/* Need to run the hash functions in short-lived context */
 	oldContext = MemoryContextSwitchTo(hashtable->tempcxt);

 	/* Set up data needed by hash and match functions */
 	hashtable->inputslot = slot;
 	hashtable->in_hash_funcs = hashfunctions;
 	hashtable->cur_eq_func = eqcomp;

 	/* Search the hash table */
 	key = NULL;					/* flag to reference inputslot */
 	entry = tuplehash_lookup(hashtable->hashtab, key);
 	MemoryContextSwitchTo(oldContext);

 	return entry;
 }

 /*
  * If tuple is NULL, use the input slot instead. This convention avoids the
  * need to materialize virtual input tuples unless they actually need to get
  * copied into the table.
  *
  * Also, the caller must select an appropriate memory context for running
  * the hash functions. (dynahash.c doesn't change CurrentMemoryContext.)
  */
 static uint32
 TupleHashTableHash_internal(struct tuplehash_hash *tb,
 							const MinimalTuple tuple)
 {
 	TupleHashTable hashtable = (TupleHashTable) tb->private_data;
 	int			numCols = hashtable->numCols;
 	AttrNumber *keyColIdx = hashtable->keyColIdx;
 	uint32		hashkey = hashtable->hash_iv;
 	TupleTableSlot *slot;
 	FmgrInfo   *hashfunctions;
 	int			i;

 	if (tuple == NULL)
 	{
 		/* Process the current input tuple for the table */
 		slot = hashtable->inputslot;
 		hashfunctions = hashtable->in_hash_funcs;
 	}
 	else
 	{
 		/*
 		 * Process a tuple already stored in the table.
 		 *
 		 * (this case never actually occurs due to the way simplehash.h is
 		 * used, as the hash-value is stored in the entries)
 		 */
 		slot = hashtable->tableslot;
 		ExecStoreMinimalTuple(tuple, slot, false);
 		hashfunctions = hashtable->tab_hash_funcs;
 	}

 	for (i = 0; i < numCols; i++)
 	{
 		AttrNumber	att = keyColIdx[i];
 		Datum		attr;
 		bool		isNull;

 		/* rotate hashkey left 1 bit at each step */
 		hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0);

 		attr = slot_getattr(slot, att, &isNull);

 		if (!isNull)			/* treat nulls as having hash key 0 */
 		{
 			uint32		hkey;

 			hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i],
 													hashtable->tab_collations[i],
 													attr));
 			hashkey ^= hkey;
 		}
 	}

 	/*
 	 * The way hashes are combined above, among each other and with the IV,
 	 * doesn't lead to good bit perturbation. As the IV's goal is to lead to
 	 * achieve that, perform a round of hashing of the combined hash -
 	 * resulting in near perfect perturbation.
 	 */
 	return murmurhash32(hashkey);
 }

 /*
  * Does the work of LookupTupleHashEntry and LookupTupleHashEntryHash. Useful
  * so that we can avoid switching the memory context multiple times for
  * LookupTupleHashEntry.
  *
  * NB: This function may or may not change the memory context. Caller is
  * expected to change it back.
  */
 static inline TupleHashEntry
 LookupTupleHashEntry_internal(TupleHashTable hashtable, TupleTableSlot *slot,
 							  bool *isnew, uint32 hash)
 {
 	TupleHashEntryData *entry;
 	bool		found;
 	MinimalTuple key;

 	key = NULL;					/* flag to reference inputslot */

 	if (isnew)
 	{
 		entry = tuplehash_insert_hash(hashtable->hashtab, key, hash, &found);

 		if (found)
 		{
 			/* found pre-existing entry */
 			*isnew = false;
 		}
 		else
 		{
 			/* created new entry */
 			*isnew = true;
 			/* zero caller data */
 			entry->additional = NULL;
 			MemoryContextSwitchTo(hashtable->tablecxt);
 			/* Copy the first tuple into the table context */
 			entry->firstTuple = ExecCopySlotMinimalTuple(slot);
 		}
 	}
 	else
 	{
 		entry = tuplehash_lookup_hash(hashtable->hashtab, key, hash);
 	}

 	return entry;
 }

 /*
  * See whether two tuples (presumably of the same hash value) match
  */
 static int
 TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2)
 {
 	TupleTableSlot *slot1;
 	TupleTableSlot *slot2;
 	TupleHashTable hashtable = (TupleHashTable) tb->private_data;
 	ExprContext *econtext = hashtable->exprcontext;

 	/*
 	 * We assume that simplehash.h will only ever call us with the first
 	 * argument being an actual table entry, and the second argument being
 	 * LookupTupleHashEntry's dummy TupleHashEntryData.  The other direction
 	 * could be supported too, but is not currently required.
 	 */
 	Assert(tuple1 != NULL);
 	slot1 = hashtable->tableslot;
 	ExecStoreMinimalTuple(tuple1, slot1, false);
 	Assert(tuple2 == NULL);
 	slot2 = hashtable->inputslot;

 	/* For crosstype comparisons, the inputslot must be first */
 	econtext->ecxt_innertuple = slot2;
 	econtext->ecxt_outertuple = slot1;
 	return !ExecQualAndReset(hashtable->cur_eq_func, econtext);
 }
	/*-------------------------------------------------------------------------
	*
	* execGrouping.c
	* executor utility routines for grouping, hashing, and aggregation
	*
	* Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
	* Portions Copyright (c) 1994, Regents of the University of California
	*
	*
	* IDENTIFICATION
	* src/backend/executor/execGrouping.c
	*
	*-------------------------------------------------------------------------
	*/
	#include "postgres.h"

	#include <limits.h>

	#include "access/parallel.h"
	#include "common/hashfn.h"
	#include "executor/executor.h"
	#include "miscadmin.h"
	#include "utils/lsyscache.h"
	#include "utils/memutils.h"

	static int TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2);
	static inline uint32 TupleHashTableHash_internal(struct tuplehash_hash *tb,
	const MinimalTuple tuple);
	static inline TupleHashEntry LookupTupleHashEntry_internal(TupleHashTable hashtable,
	TupleTableSlot *slot,
	bool *isnew, uint32 hash);

	/*
	* Define parameters for tuple hash table code generation. The interface is
	* also declared in execnodes.h (to generate the types, which are externally
	* visible).
	*/
	#define SH_PREFIX tuplehash
	#define SH_ELEMENT_TYPE TupleHashEntryData
	#define SH_KEY_TYPE MinimalTuple
	#define SH_KEY firstTuple
	#define SH_HASH_KEY(tb, key) TupleHashTableHash_internal(tb, key)
	#define SH_EQUAL(tb, a, b) TupleHashTableMatch(tb, a, b) == 0
	#define SH_SCOPE extern
	#define SH_STORE_HASH
	#define SH_GET_HASH(tb, a) a->hash
	#define SH_DEFINE
	#include "lib/simplehash.h"


	/*****************************************************************************
	* Utility routines for grouping tuples together
	*****************************************************************************/

	/*
	* execTuplesMatchPrepare
	* Build expression that can be evaluated using ExecQual(), returning
	* whether an ExprContext's inner/outer tuples are NOT DISTINCT
	*/
	ExprState *
	execTuplesMatchPrepare(TupleDesc desc,
	int numCols,
	const AttrNumber *keyColIdx,
	const Oid *eqOperators,
	const Oid *collations,
	PlanState *parent)
	{
	Oid eqFunctions = (Oid ) palloc(numCols * sizeof(Oid));
	int i;
	ExprState *expr;

	if (numCols == 0)
	return NULL;

	/* lookup equality functions */
	for (i = 0; i < numCols; i++)
	eqFunctions[i] = get_opcode(eqOperators[i]);

	/* build actual expression */
	expr = ExecBuildGroupingEqual(desc, desc, NULL, NULL,
	numCols, keyColIdx, eqFunctions, collations,
	parent);

	return expr;
	}

	/*
	* execTuplesHashPrepare
	* Look up the equality and hashing functions needed for a TupleHashTable.
	*
	* This is similar to execTuplesMatchPrepare, but we also need to find the
	* hash functions associated with the equality operators. *eqFunctions and
	* *hashFunctions receive the palloc'd result arrays.
	*
	* Note: we expect that the given operators are not cross-type comparisons.
	*/
	void
	execTuplesHashPrepare(int numCols,
	const Oid *eqOperators,
	Oid **eqFuncOids,
	FmgrInfo **hashFunctions)
	{
	int i;

	eqFuncOids = (Oid ) palloc(numCols * sizeof(Oid));
	hashFunctions = (FmgrInfo ) palloc(numCols * sizeof(FmgrInfo));

	for (i = 0; i < numCols; i++)
	{
	Oid eq_opr = eqOperators[i];
	Oid eq_function;
	Oid left_hash_function;
	Oid right_hash_function;

	eq_function = get_opcode(eq_opr);
	if (!get_op_hash_functions(eq_opr,
	&left_hash_function, &right_hash_function))
	elog(ERROR, "could not find hash function for hash operator %u",
	eq_opr);
	/* We're not supporting cross-type cases here */
	Assert(left_hash_function == right_hash_function);
	(*eqFuncOids)[i] = eq_function;
	fmgr_info(right_hash_function, &(*hashFunctions)[i]);
	}
	}


	/*****************************************************************************
	* Utility routines for all-in-memory hash tables
	*
	* These routines build hash tables for grouping tuples together (eg, for
	* hash aggregation). There is one entry for each not-distinct set of tuples
	* presented.
	*****************************************************************************/

	/*
	* Construct an empty TupleHashTable
	*
	* numCols, keyColIdx: identify the tuple fields to use as lookup key
	* eqfunctions: equality comparison functions to use
	* hashfunctions: datatype-specific hashing functions to use
	* nbuckets: initial estimate of hashtable size
	* additionalsize: size of data stored in ->additional
	* metacxt: memory context for long-lived allocation, but not per-entry data
	* tablecxt: memory context in which to store table entries
	* tempcxt: short-lived context for evaluation hash and comparison functions
	*
	* The function arrays may be made with execTuplesHashPrepare(). Note they
	* are not cross-type functions, but expect to see the table datatype(s)
	* on both sides.
	*
	* Note that keyColIdx, eqfunctions, and hashfunctions must be allocated in
	* storage that will live as long as the hashtable does.
	*/
	TupleHashTable
	BuildTupleHashTableExt(PlanState *parent,
	TupleDesc inputDesc,
	int numCols, AttrNumber *keyColIdx,
	const Oid *eqfuncoids,
	FmgrInfo *hashfunctions,
	Oid *collations,
	long nbuckets, Size additionalsize,
	MemoryContext metacxt,
	MemoryContext tablecxt,
	MemoryContext tempcxt,
	bool use_variable_hash_iv)
	{
	TupleHashTable hashtable;
	Size entrysize = sizeof(TupleHashEntryData) + additionalsize;
	Size hash_mem_limit;
	MemoryContext oldcontext;
	bool allow_jit;

	/*
	* Many callers pass "long" values for nbuckets, which means that we can
	* receive a bogus value on 64-bit machines. It seems unwise to change
	* this function's signature in released branches, so instead assume that
	* a negative input means long->int overflow occurred.
	*/
	if (nbuckets <= 0)
	nbuckets = INT_MAX;

	Assert(nbuckets > 0);

	/* Limit initial table size request to not more than hash_mem */
	hash_mem_limit = get_hash_memory_limit() / entrysize;
	if (nbuckets > hash_mem_limit)
	nbuckets = hash_mem_limit;

	oldcontext = MemoryContextSwitchTo(metacxt);

	hashtable = (TupleHashTable) palloc(sizeof(TupleHashTableData));

	hashtable->numCols = numCols;
	hashtable->keyColIdx = keyColIdx;
	hashtable->tab_hash_funcs = hashfunctions;
	hashtable->tab_collations = collations;
	hashtable->tablecxt = tablecxt;
	hashtable->tempcxt = tempcxt;
	hashtable->entrysize = entrysize;
	hashtable->tableslot = NULL; /* will be made on first lookup */
	hashtable->inputslot = NULL;
	hashtable->in_hash_funcs = NULL;
	hashtable->cur_eq_func = NULL;

	/*
	* If parallelism is in use, even if the leader backend is performing the
	* scan itself, we don't want to create the hashtable exactly the same way
	* in all workers. As hashtables are iterated over in keyspace-order,
	* doing so in all processes in the same way is likely to lead to
	* "unbalanced" hashtables when the table size initially is
	* underestimated.
	*/
	if (use_variable_hash_iv)
	hashtable->hash_iv = murmurhash32(ParallelWorkerNumber);
	else
	hashtable->hash_iv = 0;

	hashtable->hashtab = tuplehash_create(metacxt, nbuckets, hashtable);

	/*
	* We copy the input tuple descriptor just for safety --- we assume all
	* input tuples will have equivalent descriptors.
	*/
	hashtable->tableslot = MakeSingleTupleTableSlot(CreateTupleDescCopy(inputDesc),
	&TTSOpsMinimalTuple);

	/*
	* If the old reset interface is used (i.e. BuildTupleHashTable, rather
	* than BuildTupleHashTableExt), allowing JIT would lead to the generated
	* functions to a) live longer than the query b) be re-generated each time
	* the table is being reset. Therefore prevent JIT from being used in that
	* case, by not providing a parent node (which prevents accessing the
	* JitContext in the EState).
	*/
	allow_jit = metacxt != tablecxt;

	/* build comparator for all columns */
	/* XXX: should we support non-minimal tuples for the inputslot? */
	hashtable->tab_eq_func = ExecBuildGroupingEqual(inputDesc, inputDesc,
	&TTSOpsMinimalTuple, &TTSOpsMinimalTuple,
	numCols,
	keyColIdx, eqfuncoids, collations,
	allow_jit ? parent : NULL);

	/*
	* While not pretty, it's ok to not shut down this context, but instead
	* rely on the containing memory context being reset, as
	* ExecBuildGroupingEqual() only builds a very simple expression calling
	* functions (i.e. nothing that'd employ RegisterExprContextCallback()).
	*/
	hashtable->exprcontext = CreateStandaloneExprContext();

	MemoryContextSwitchTo(oldcontext);

	return hashtable;
	}

	/*
	* BuildTupleHashTable is a backwards-compatibilty wrapper for
	* BuildTupleHashTableExt(), that allocates the hashtable's metadata in
	* tablecxt. Note that hashtables created this way cannot be reset leak-free
	* with ResetTupleHashTable().
	*/
	TupleHashTable
	BuildTupleHashTable(PlanState *parent,
	TupleDesc inputDesc,
	int numCols, AttrNumber *keyColIdx,
	const Oid *eqfuncoids,
	FmgrInfo *hashfunctions,
	Oid *collations,
	long nbuckets, Size additionalsize,
	MemoryContext tablecxt,
	MemoryContext tempcxt,
	bool use_variable_hash_iv)
	{
	return BuildTupleHashTableExt(parent,
	inputDesc,
	numCols, keyColIdx,
	eqfuncoids,
	hashfunctions,
	collations,
	nbuckets, additionalsize,
	tablecxt,
	tablecxt,
	tempcxt,
	use_variable_hash_iv);
	}

	/*
	* Reset contents of the hashtable to be empty, preserving all the non-content
	* state. Note that the tablecxt passed to BuildTupleHashTableExt() should
	* also be reset, otherwise there will be leaks.
	*/
	void
	ResetTupleHashTable(TupleHashTable hashtable)
	{
	tuplehash_reset(hashtable->hashtab);
	}

	/*
	* Find or create a hashtable entry for the tuple group containing the
	* given tuple. The tuple must be the same type as the hashtable entries.
	*
	* If isnew is NULL, we do not create new entries; we return NULL if no
	* match is found.
	*
	* If hash is not NULL, we set it to the calculated hash value. This allows
	* callers access to the hash value even if no entry is returned.
	*
	* If isnew isn't NULL, then a new entry is created if no existing entry
	* matches. On return, *isnew is true if the entry is newly created,
	* false if it existed already. ->additional_data in the new entry has
	* been zeroed.
	*/
	TupleHashEntry
	LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
	bool isnew, uint32 hash)
	{
	TupleHashEntry entry;
	MemoryContext oldContext;
	uint32 local_hash;

	/* Need to run the hash functions in short-lived context */
	oldContext = MemoryContextSwitchTo(hashtable->tempcxt);

	/* set up data needed by hash and match functions */
	hashtable->inputslot = slot;
	hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
	hashtable->cur_eq_func = hashtable->tab_eq_func;

	local_hash = TupleHashTableHash_internal(hashtable->hashtab, NULL);
	entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, local_hash);

	if (hash != NULL)
	*hash = local_hash;

	Assert(entry == NULL \|\| entry->hash == local_hash);

	MemoryContextSwitchTo(oldContext);

	return entry;
	}

	/*
	* Compute the hash value for a tuple
	*/
	uint32
	TupleHashTableHash(TupleHashTable hashtable, TupleTableSlot *slot)
	{
	MemoryContext oldContext;
	uint32 hash;

	hashtable->inputslot = slot;
	hashtable->in_hash_funcs = hashtable->tab_hash_funcs;

	/* Need to run the hash functions in short-lived context */
	oldContext = MemoryContextSwitchTo(hashtable->tempcxt);

	hash = TupleHashTableHash_internal(hashtable->hashtab, NULL);

	MemoryContextSwitchTo(oldContext);

	return hash;
	}

	/*
	* A variant of LookupTupleHashEntry for callers that have already computed
	* the hash value.
	*/
	TupleHashEntry
	LookupTupleHashEntryHash(TupleHashTable hashtable, TupleTableSlot *slot,
	bool *isnew, uint32 hash)
	{
	TupleHashEntry entry;
	MemoryContext oldContext;

	/* Need to run the hash functions in short-lived context */
	oldContext = MemoryContextSwitchTo(hashtable->tempcxt);

	/* set up data needed by hash and match functions */
	hashtable->inputslot = slot;
	hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
	hashtable->cur_eq_func = hashtable->tab_eq_func;

	entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, hash);
	Assert(entry == NULL \|\| entry->hash == hash);

	MemoryContextSwitchTo(oldContext);

	return entry;
	}

	/*
	* Search for a hashtable entry matching the given tuple. No entry is
	* created if there's not a match. This is similar to the non-creating
	* case of LookupTupleHashEntry, except that it supports cross-type
	* comparisons, in which the given tuple is not of the same type as the
	* table entries. The caller must provide the hash functions to use for
	* the input tuple, as well as the equality functions, since these may be
	* different from the table's internal functions.
	*/
	TupleHashEntry
	FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
	ExprState *eqcomp,
	FmgrInfo *hashfunctions)
	{
	TupleHashEntry entry;
	MemoryContext oldContext;
	MinimalTuple key;

	/* Need to run the hash functions in short-lived context */
	oldContext = MemoryContextSwitchTo(hashtable->tempcxt);

	/* Set up data needed by hash and match functions */
	hashtable->inputslot = slot;
	hashtable->in_hash_funcs = hashfunctions;
	hashtable->cur_eq_func = eqcomp;

	/* Search the hash table */
	key = NULL; /* flag to reference inputslot */
	entry = tuplehash_lookup(hashtable->hashtab, key);
	MemoryContextSwitchTo(oldContext);

	return entry;
	}

	/*
	* If tuple is NULL, use the input slot instead. This convention avoids the
	* need to materialize virtual input tuples unless they actually need to get
	* copied into the table.
	*
	* Also, the caller must select an appropriate memory context for running
	* the hash functions. (dynahash.c doesn't change CurrentMemoryContext.)
	*/
	static uint32
	TupleHashTableHash_internal(struct tuplehash_hash *tb,
	const MinimalTuple tuple)
	{
	TupleHashTable hashtable = (TupleHashTable) tb->private_data;
	int numCols = hashtable->numCols;
	AttrNumber *keyColIdx = hashtable->keyColIdx;
	uint32 hashkey = hashtable->hash_iv;
	TupleTableSlot *slot;
	FmgrInfo *hashfunctions;
	int i;

	if (tuple == NULL)
	{
	/* Process the current input tuple for the table */
	slot = hashtable->inputslot;
	hashfunctions = hashtable->in_hash_funcs;
	}
	else
	{
	/*
	* Process a tuple already stored in the table.
	*
	* (this case never actually occurs due to the way simplehash.h is
	* used, as the hash-value is stored in the entries)
	*/
	slot = hashtable->tableslot;
	ExecStoreMinimalTuple(tuple, slot, false);
	hashfunctions = hashtable->tab_hash_funcs;
	}

	for (i = 0; i < numCols; i++)
	{
	AttrNumber att = keyColIdx[i];
	Datum attr;
	bool isNull;

	/* rotate hashkey left 1 bit at each step */
	hashkey = (hashkey << 1) \| ((hashkey & 0x80000000) ? 1 : 0);

	attr = slot_getattr(slot, att, &isNull);

	if (!isNull) /* treat nulls as having hash key 0 */
	{
	uint32 hkey;

	hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i],
	hashtable->tab_collations[i],
	attr));
	hashkey ^= hkey;
	}
	}

	/*
	* The way hashes are combined above, among each other and with the IV,
	* doesn't lead to good bit perturbation. As the IV's goal is to lead to
	* achieve that, perform a round of hashing of the combined hash -
	* resulting in near perfect perturbation.
	*/
	return murmurhash32(hashkey);
	}

	/*
	* Does the work of LookupTupleHashEntry and LookupTupleHashEntryHash. Useful
	* so that we can avoid switching the memory context multiple times for
	* LookupTupleHashEntry.
	*
	* NB: This function may or may not change the memory context. Caller is
	* expected to change it back.
	*/
	static inline TupleHashEntry
	LookupTupleHashEntry_internal(TupleHashTable hashtable, TupleTableSlot *slot,
	bool *isnew, uint32 hash)
	{
	TupleHashEntryData *entry;
	bool found;
	MinimalTuple key;

	key = NULL; /* flag to reference inputslot */

	if (isnew)
	{
	entry = tuplehash_insert_hash(hashtable->hashtab, key, hash, &found);

	if (found)
	{
	/* found pre-existing entry */
	*isnew = false;
	}
	else
	{
	/* created new entry */
	*isnew = true;
	/* zero caller data */
	entry->additional = NULL;
	MemoryContextSwitchTo(hashtable->tablecxt);
	/* Copy the first tuple into the table context */
	entry->firstTuple = ExecCopySlotMinimalTuple(slot);
	}
	}
	else
	{
	entry = tuplehash_lookup_hash(hashtable->hashtab, key, hash);
	}

	return entry;
	}

	/*
	* See whether two tuples (presumably of the same hash value) match
	*/
	static int
	TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2)
	{
	TupleTableSlot *slot1;
	TupleTableSlot *slot2;
	TupleHashTable hashtable = (TupleHashTable) tb->private_data;
	ExprContext *econtext = hashtable->exprcontext;

	/*
	* We assume that simplehash.h will only ever call us with the first
	* argument being an actual table entry, and the second argument being
	* LookupTupleHashEntry's dummy TupleHashEntryData. The other direction
	* could be supported too, but is not currently required.
	*/
	Assert(tuple1 != NULL);
	slot1 = hashtable->tableslot;
	ExecStoreMinimalTuple(tuple1, slot1, false);
	Assert(tuple2 == NULL);
	slot2 = hashtable->inputslot;

	/* For crosstype comparisons, the inputslot must be first */
	econtext->ecxt_innertuple = slot2;
	econtext->ecxt_outertuple = slot1;
	return !ExecQualAndReset(hashtable->cur_eq_func, econtext);
	}