src/backend/executor/nodeSetOp.c - cloudberry - Git at Google

 /*-------------------------------------------------------------------------
  *
  * nodeSetOp.c
  *	  Routines to handle INTERSECT and EXCEPT selection
  *
  * The input of a SetOp node consists of tuples from two relations,
  * which have been combined into one dataset, with a junk attribute added
  * that shows which relation each tuple came from.  In SETOP_SORTED mode,
  * the input has furthermore been sorted according to all the grouping
  * columns (ie, all the non-junk attributes).  The SetOp node scans each
  * group of identical tuples to determine how many came from each input
  * relation.  Then it is a simple matter to emit the output demanded by the
  * SQL spec for INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL.
  *
  * In SETOP_HASHED mode, the input is delivered in no particular order,
  * except that we know all the tuples from one input relation will come before
  * all the tuples of the other.  The planner guarantees that the first input
  * relation is the left-hand one for EXCEPT, and tries to make the smaller
  * input relation come first for INTERSECT.  We build a hash table in memory
  * with one entry for each group of identical tuples, and count the number of
  * tuples in the group from each relation.  After seeing all the input, we
  * scan the hashtable and generate the correct output using those counts.
  * We can avoid making hashtable entries for any tuples appearing only in the
  * second input relation, since they cannot result in any output.
  *
  * This node type is not used for UNION or UNION ALL, since those can be
  * implemented more cheaply (there's no need for the junk attribute to
  * identify the source relation).
  *
  * Note that SetOp does no qual checking nor projection.  The delivered
  * output tuples are just copies of the first-to-arrive tuple in each
  * input group.
  *
  *
  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  *
  * IDENTIFICATION
  *	  src/backend/executor/nodeSetOp.c
  *
  *-------------------------------------------------------------------------
  */

 #include "postgres.h"

 #include "access/htup_details.h"
 #include "executor/executor.h"
 #include "executor/nodeSetOp.h"
 #include "miscadmin.h"
 #include "utils/memutils.h"


 /*
  * SetOpStatePerGroupData - per-group working state
  *
  * These values are working state that is initialized at the start of
  * an input tuple group and updated for each input tuple.
  *
  * In SETOP_SORTED mode, we need only one of these structs, and it's kept in
  * the plan state node.  In SETOP_HASHED mode, the hash table contains one
  * of these for each tuple group.
  */
 typedef struct SetOpStatePerGroupData
 {
 	long		numLeft;		/* number of left-input dups in group */
 	long		numRight;		/* number of right-input dups in group */
 }			SetOpStatePerGroupData;


 static TupleTableSlot *setop_retrieve_direct(SetOpState *setopstate);
 static void setop_fill_hash_table(SetOpState *setopstate);
 static TupleTableSlot *setop_retrieve_hash_table(SetOpState *setopstate);


 /*
  * Initialize state for a new group of input values.
  */
 static inline void
 initialize_counts(SetOpStatePerGroup pergroup)
 {
 	pergroup->numLeft = pergroup->numRight = 0;
 }

 /*
  * Advance the appropriate counter for one input tuple.
  */
 static inline void
 advance_counts(SetOpStatePerGroup pergroup, int flag)
 {
 	if (flag)
 		pergroup->numRight++;
 	else
 		pergroup->numLeft++;
 }

 /*
  * Fetch the "flag" column from an input tuple.
  * This is an integer column with value 0 for left side, 1 for right side.
  */
 static int
 fetch_tuple_flag(SetOpState *setopstate, TupleTableSlot *inputslot)
 {
 	SetOp	   *node = (SetOp *) setopstate->ps.plan;
 	int			flag;
 	bool		isNull;

 	flag = DatumGetInt32(slot_getattr(inputslot,
 									  node->flagColIdx,
 									  &isNull));
 	Assert(!isNull);
 	Assert(flag == 0 || flag == 1);
 	return flag;
 }

 /*
  * Initialize the hash table to empty.
  */
 static void
 build_hash_table(SetOpState *setopstate)
 {
 	SetOp	   *node = (SetOp *) setopstate->ps.plan;
 	ExprContext *econtext = setopstate->ps.ps_ExprContext;
 	TupleDesc	desc = ExecGetResultType(outerPlanState(setopstate));

 	Assert(node->strategy == SETOP_HASHED);
 	Assert(node->numGroups > 0);

 	setopstate->hashtable = BuildTupleHashTableExt(&setopstate->ps,
 												   desc,
 												   node->numCols,
 												   node->dupColIdx,
 												   setopstate->eqfuncoids,
 												   setopstate->hashfunctions,
 												   node->dupCollations,
 												   node->numGroups,
 												   0,
 												   setopstate->ps.state->es_query_cxt,
 												   setopstate->tableContext,
 												   econtext->ecxt_per_tuple_memory,
 												   false);
 }

 /*
  * We've completed processing a tuple group.  Decide how many copies (if any)
  * of its representative row to emit, and store the count into numOutput.
  * This logic is straight from the SQL92 specification.
  */
 static void
 set_output_count(SetOpState *setopstate, SetOpStatePerGroup pergroup)
 {
 	SetOp	   *plannode = (SetOp *) setopstate->ps.plan;

 	switch (plannode->cmd)
 	{
 		case SETOPCMD_INTERSECT:
 			if (pergroup->numLeft > 0 && pergroup->numRight > 0)
 				setopstate->numOutput = 1;
 			else
 				setopstate->numOutput = 0;
 			break;
 		case SETOPCMD_INTERSECT_ALL:
 			setopstate->numOutput =
 				(pergroup->numLeft < pergroup->numRight) ?
 				pergroup->numLeft : pergroup->numRight;
 			break;
 		case SETOPCMD_EXCEPT:
 			if (pergroup->numLeft > 0 && pergroup->numRight == 0)
 				setopstate->numOutput = 1;
 			else
 				setopstate->numOutput = 0;
 			break;
 		case SETOPCMD_EXCEPT_ALL:
 			setopstate->numOutput =
 				(pergroup->numLeft < pergroup->numRight) ?
 				0 : (pergroup->numLeft - pergroup->numRight);
 			break;
 		default:
 			elog(ERROR, "unrecognized set op: %d", (int) plannode->cmd);
 			break;
 	}
 }


 /* ----------------------------------------------------------------
  *		ExecSetOp
  * ----------------------------------------------------------------
  */
 static TupleTableSlot *			/* return: a tuple or NULL */
 ExecSetOp(PlanState *pstate)
 {
 	SetOpState *node = castNode(SetOpState, pstate);
 	SetOp	   *plannode = (SetOp *) node->ps.plan;
 	TupleTableSlot *resultTupleSlot = node->ps.ps_ResultTupleSlot;

 	CHECK_FOR_INTERRUPTS();

 	/*
 	 * If the previously-returned tuple needs to be returned more than once,
 	 * keep returning it.
 	 */
 	if (node->numOutput > 0)
 	{
 		node->numOutput--;
 		return resultTupleSlot;
 	}

 	/* Otherwise, we're done if we are out of groups */
 	if (node->setop_done)
 		return NULL;

 	/* Fetch the next tuple group according to the correct strategy */
 	if (plannode->strategy == SETOP_HASHED)
 	{
 		if (!node->table_filled)
 			setop_fill_hash_table(node);
 		return setop_retrieve_hash_table(node);
 	}
 	else
 		return setop_retrieve_direct(node);
 }

 /*
  * ExecSetOp for non-hashed case
  */
 static TupleTableSlot *
 setop_retrieve_direct(SetOpState *setopstate)
 {
 	PlanState  *outerPlan;
 	SetOpStatePerGroup pergroup;
 	TupleTableSlot *outerslot;
 	TupleTableSlot *resultTupleSlot;
 	ExprContext *econtext = setopstate->ps.ps_ExprContext;

 	/*
 	 * get state info from node
 	 */
 	outerPlan = outerPlanState(setopstate);
 	pergroup = (SetOpStatePerGroup) setopstate->pergroup;
 	resultTupleSlot = setopstate->ps.ps_ResultTupleSlot;

 	/*
 	 * We loop retrieving groups until we find one we should return
 	 */
 	while (!setopstate->setop_done)
 	{
 		/*
 		 * If we don't already have the first tuple of the new group, fetch it
 		 * from the outer plan.
 		 */
 		if (setopstate->grp_firstTuple == NULL)
 		{
 			outerslot = ExecProcNode(outerPlan);
 			if (!TupIsNull(outerslot))
 			{
 				/* Make a copy of the first input tuple */
 				setopstate->grp_firstTuple = ExecCopySlotHeapTuple(outerslot);
 			}
 			else
 			{
 				/* outer plan produced no tuples at all */
 				setopstate->setop_done = true;
 				return NULL;
 			}
 		}

 		/*
 		 * Store the copied first input tuple in the tuple table slot reserved
 		 * for it.  The tuple will be deleted when it is cleared from the
 		 * slot.
 		 */
 		ExecStoreHeapTuple(setopstate->grp_firstTuple,
 						   resultTupleSlot,
 						   true);
 		setopstate->grp_firstTuple = NULL;	/* don't keep two pointers */

 		/* Initialize working state for a new input tuple group */
 		initialize_counts(pergroup);

 		/* Count the first input tuple */
 		advance_counts(pergroup,
 					   fetch_tuple_flag(setopstate, resultTupleSlot));

 		/*
 		 * Scan the outer plan until we exhaust it or cross a group boundary.
 		 */
 		for (;;)
 		{
 			outerslot = ExecProcNode(outerPlan);
 			if (TupIsNull(outerslot))
 			{
 				/* no more outer-plan tuples available */
 				setopstate->setop_done = true;
 				break;
 			}

 			/*
 			 * Check whether we've crossed a group boundary.
 			 */
 			econtext->ecxt_outertuple = resultTupleSlot;
 			econtext->ecxt_innertuple = outerslot;

 			if (!ExecQualAndReset(setopstate->eqfunction, econtext))
 			{
 				/*
 				 * Save the first input tuple of the next group.
 				 */
 				setopstate->grp_firstTuple = ExecCopySlotHeapTuple(outerslot);
 				break;
 			}

 			/* Still in same group, so count this tuple */
 			advance_counts(pergroup,
 						   fetch_tuple_flag(setopstate, outerslot));
 		}

 		/*
 		 * Done scanning input tuple group.  See if we should emit any copies
 		 * of result tuple, and if so return the first copy.
 		 */
 		set_output_count(setopstate, pergroup);

 		if (setopstate->numOutput > 0)
 		{
 			setopstate->numOutput--;
 			return resultTupleSlot;
 		}
 	}

 	/* No more groups */
 	ExecClearTuple(resultTupleSlot);
 	return NULL;
 }

 /*
  * ExecSetOp for hashed case: phase 1, read input and build hash table
  */
 static void
 setop_fill_hash_table(SetOpState *setopstate)
 {
 	SetOp	   *node = (SetOp *) setopstate->ps.plan;
 	PlanState  *outerPlan;
 	int			firstFlag;
 	bool		in_first_rel PG_USED_FOR_ASSERTS_ONLY;
 	ExprContext *econtext = setopstate->ps.ps_ExprContext;

 	/*
 	 * get state info from node
 	 */
 	outerPlan = outerPlanState(setopstate);
 	firstFlag = node->firstFlag;
 	/* verify planner didn't mess up */
 	Assert(firstFlag == 0 ||
 		   (firstFlag == 1 &&
 			(node->cmd == SETOPCMD_INTERSECT ||
 			 node->cmd == SETOPCMD_INTERSECT_ALL)));

 	/*
 	 * Process each outer-plan tuple, and then fetch the next one, until we
 	 * exhaust the outer plan.
 	 */
 	in_first_rel = true;
 	for (;;)
 	{
 		TupleTableSlot *outerslot;
 		int			flag;
 		TupleHashEntryData *entry;
 		bool		isnew;

 		outerslot = ExecProcNode(outerPlan);
 		if (TupIsNull(outerslot))
 			break;

 		/* Identify whether it's left or right input */
 		flag = fetch_tuple_flag(setopstate, outerslot);

 		if (flag == firstFlag)
 		{
 			/* (still) in first input relation */
 			Assert(in_first_rel);

 			/* Find or build hashtable entry for this tuple's group */
 			entry = LookupTupleHashEntry(setopstate->hashtable, outerslot,
 										 &isnew, NULL);

 			/* If new tuple group, initialize counts */
 			if (isnew)
 			{
 				entry->additional = (SetOpStatePerGroup)
 					MemoryContextAlloc(setopstate->hashtable->tablecxt,
 									   sizeof(SetOpStatePerGroupData));
 				initialize_counts((SetOpStatePerGroup) entry->additional);
 			}

 			/* Advance the counts */
 			advance_counts((SetOpStatePerGroup) entry->additional, flag);
 		}
 		else
 		{
 			/* reached second relation */
 			in_first_rel = false;

 			/* For tuples not seen previously, do not make hashtable entry */
 			entry = LookupTupleHashEntry(setopstate->hashtable, outerslot,
 										 NULL, NULL);

 			/* Advance the counts if entry is already present */
 			if (entry)
 				advance_counts((SetOpStatePerGroup) entry->additional, flag);
 		}

 		/* Must reset expression context after each hashtable lookup */
 		ResetExprContext(econtext);
 	}

 	setopstate->table_filled = true;
 	/* Initialize to walk the hash table */
 	ResetTupleHashIterator(setopstate->hashtable, &setopstate->hashiter);
 }

 /*
  * ExecSetOp for hashed case: phase 2, retrieving groups from hash table
  */
 static TupleTableSlot *
 setop_retrieve_hash_table(SetOpState *setopstate)
 {
 	TupleHashEntryData *entry;
 	TupleTableSlot *resultTupleSlot;

 	/*
 	 * get state info from node
 	 */
 	resultTupleSlot = setopstate->ps.ps_ResultTupleSlot;

 	/*
 	 * We loop retrieving groups until we find one we should return
 	 */
 	while (!setopstate->setop_done)
 	{
 		CHECK_FOR_INTERRUPTS();

 		/*
 		 * Find the next entry in the hash table
 		 */
 		entry = ScanTupleHashTable(setopstate->hashtable, &setopstate->hashiter);
 		if (entry == NULL)
 		{
 			/* No more entries in hashtable, so done */
 			setopstate->setop_done = true;
 			return NULL;
 		}

 		/*
 		 * See if we should emit any copies of this tuple, and if so return
 		 * the first copy.
 		 */
 		set_output_count(setopstate, (SetOpStatePerGroup) entry->additional);

 		if (setopstate->numOutput > 0)
 		{
 			setopstate->numOutput--;
 			return ExecStoreMinimalTuple(entry->firstTuple,
 										 resultTupleSlot,
 										 false);
 		}
 	}

 	/* No more groups */
 	ExecClearTuple(resultTupleSlot);
 	return NULL;
 }

 /* ----------------------------------------------------------------
  *		ExecInitSetOp
  *
  *		This initializes the setop node state structures and
  *		the node's subplan.
  * ----------------------------------------------------------------
  */
 SetOpState *
 ExecInitSetOp(SetOp *node, EState *estate, int eflags)
 {
 	SetOpState *setopstate;
 	TupleDesc	outerDesc;

 	/* check for unsupported flags */
 	Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));

 	/*
 	 * create state structure
 	 */
 	setopstate = makeNode(SetOpState);
 	setopstate->ps.plan = (Plan *) node;
 	setopstate->ps.state = estate;
 	setopstate->ps.ExecProcNode = ExecSetOp;

 	setopstate->eqfuncoids = NULL;
 	setopstate->hashfunctions = NULL;
 	setopstate->setop_done = false;
 	setopstate->numOutput = 0;
 	setopstate->pergroup = NULL;
 	setopstate->grp_firstTuple = NULL;
 	setopstate->hashtable = NULL;
 	setopstate->tableContext = NULL;

 	/*
 	 * create expression context
 	 */
 	ExecAssignExprContext(estate, &setopstate->ps);

 	/*
 	 * If hashing, we also need a longer-lived context to store the hash
 	 * table.  The table can't just be kept in the per-query context because
 	 * we want to be able to throw it away in ExecReScanSetOp.
 	 */
 	if (node->strategy == SETOP_HASHED)
 		setopstate->tableContext =
 			AllocSetContextCreate(CurrentMemoryContext,
 								  "SetOp hash table",
 								  ALLOCSET_DEFAULT_SIZES);

 	/*
 	 * initialize child nodes
 	 *
 	 * If we are hashing then the child plan does not need to handle REWIND
 	 * efficiently; see ExecReScanSetOp.
 	 */
 	if (node->strategy == SETOP_HASHED)
 		eflags &= ~EXEC_FLAG_REWIND;
 	outerPlanState(setopstate) = ExecInitNode(outerPlan(node), estate, eflags);
 	outerDesc = ExecGetResultType(outerPlanState(setopstate));

 	/*
 	 * Initialize result slot and type. Setop nodes do no projections, so
 	 * initialize projection info for this node appropriately.
 	 */
 	ExecInitResultTupleSlotTL(&setopstate->ps,
 							  node->strategy == SETOP_HASHED ?
 							  &TTSOpsMinimalTuple : &TTSOpsHeapTuple);
 	setopstate->ps.ps_ProjInfo = NULL;

 	/*
 	 * Precompute fmgr lookup data for inner loop. We need both equality and
 	 * hashing functions to do it by hashing, but only equality if not
 	 * hashing.
 	 */
 	if (node->strategy == SETOP_HASHED)
 		execTuplesHashPrepare(node->numCols,
 							  node->dupOperators,
 							  &setopstate->eqfuncoids,
 							  &setopstate->hashfunctions);
 	else
 		setopstate->eqfunction =
 			execTuplesMatchPrepare(outerDesc,
 								   node->numCols,
 								   node->dupColIdx,
 								   node->dupOperators,
 								   node->dupCollations,
 								   &setopstate->ps);

 	if (node->strategy == SETOP_HASHED)
 	{
 		build_hash_table(setopstate);
 		setopstate->table_filled = false;
 	}
 	else
 	{
 		setopstate->pergroup =
 			(SetOpStatePerGroup) palloc0(sizeof(SetOpStatePerGroupData));
 	}

 	return setopstate;
 }

 /* ----------------------------------------------------------------
  *		ExecEndSetOp
  *
  *		This shuts down the subplan and frees resources allocated
  *		to this node.
  * ----------------------------------------------------------------
  */
 void
 ExecEndSetOp(SetOpState *node)
 {
 	/* clean up tuple table */
 	ExecClearTuple(node->ps.ps_ResultTupleSlot);

 	/* free subsidiary stuff including hashtable */
 	if (node->tableContext)
 		MemoryContextDelete(node->tableContext);
 	ExecFreeExprContext(&node->ps);

 	ExecEndNode(outerPlanState(node));
 }


 void
 ExecReScanSetOp(SetOpState *node)
 {
 	PlanState  *outerPlan = outerPlanState(node);

 	ExecClearTuple(node->ps.ps_ResultTupleSlot);
 	node->setop_done = false;
 	node->numOutput = 0;

 	if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED)
 	{
 		/*
 		 * In the hashed case, if we haven't yet built the hash table then we
 		 * can just return; nothing done yet, so nothing to undo. If subnode's
 		 * chgParam is not NULL then it will be re-scanned by ExecProcNode,
 		 * else no reason to re-scan it at all.
 		 */
 		if (!node->table_filled)
 			return;

 		/*
 		 * If we do have the hash table and the subplan does not have any
 		 * parameter changes, then we can just rescan the existing hash table;
 		 * no need to build it again.
 		 */
 		if (outerPlan->chgParam == NULL)
 		{
 			ResetTupleHashIterator(node->hashtable, &node->hashiter);
 			return;
 		}
 	}

 	/* Release first tuple of group, if we have made a copy */
 	if (node->grp_firstTuple != NULL)
 	{
 		heap_freetuple(node->grp_firstTuple);
 		node->grp_firstTuple = NULL;
 	}

 	/* Release any hashtable storage */
 	if (node->tableContext)
 		MemoryContextResetAndDeleteChildren(node->tableContext);

 	/* And rebuild empty hashtable if needed */
 	if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED)
 	{
 		ResetTupleHashTable(node->hashtable);
 		node->table_filled = false;
 	}

 	/*
 	 * if chgParam of subnode is not null then plan will be re-scanned by
 	 * first ExecProcNode.
 	 */
 	if (outerPlan->chgParam == NULL)
 		ExecReScan(outerPlan);
 }
	/*-------------------------------------------------------------------------
	*
	* nodeSetOp.c
	* Routines to handle INTERSECT and EXCEPT selection
	*
	* The input of a SetOp node consists of tuples from two relations,
	* which have been combined into one dataset, with a junk attribute added
	* that shows which relation each tuple came from. In SETOP_SORTED mode,
	* the input has furthermore been sorted according to all the grouping
	* columns (ie, all the non-junk attributes). The SetOp node scans each
	* group of identical tuples to determine how many came from each input
	* relation. Then it is a simple matter to emit the output demanded by the
	* SQL spec for INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL.
	*
	* In SETOP_HASHED mode, the input is delivered in no particular order,
	* except that we know all the tuples from one input relation will come before
	* all the tuples of the other. The planner guarantees that the first input
	* relation is the left-hand one for EXCEPT, and tries to make the smaller
	* input relation come first for INTERSECT. We build a hash table in memory
	* with one entry for each group of identical tuples, and count the number of
	* tuples in the group from each relation. After seeing all the input, we
	* scan the hashtable and generate the correct output using those counts.
	* We can avoid making hashtable entries for any tuples appearing only in the
	* second input relation, since they cannot result in any output.
	*
	* This node type is not used for UNION or UNION ALL, since those can be
	* implemented more cheaply (there's no need for the junk attribute to
	* identify the source relation).
	*
	* Note that SetOp does no qual checking nor projection. The delivered
	* output tuples are just copies of the first-to-arrive tuple in each
	* input group.
	*
	*
	* Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
	* Portions Copyright (c) 1994, Regents of the University of California
	*
	*
	* IDENTIFICATION
	* src/backend/executor/nodeSetOp.c
	*
	*-------------------------------------------------------------------------
	*/

	#include "postgres.h"

	#include "access/htup_details.h"
	#include "executor/executor.h"
	#include "executor/nodeSetOp.h"
	#include "miscadmin.h"
	#include "utils/memutils.h"


	/*
	* SetOpStatePerGroupData - per-group working state
	*
	* These values are working state that is initialized at the start of
	* an input tuple group and updated for each input tuple.
	*
	* In SETOP_SORTED mode, we need only one of these structs, and it's kept in
	* the plan state node. In SETOP_HASHED mode, the hash table contains one
	* of these for each tuple group.
	*/
	typedef struct SetOpStatePerGroupData
	{
	long numLeft; /* number of left-input dups in group */
	long numRight; /* number of right-input dups in group */
	} SetOpStatePerGroupData;


	static TupleTableSlot setop_retrieve_direct(SetOpState setopstate);
	static void setop_fill_hash_table(SetOpState *setopstate);
	static TupleTableSlot setop_retrieve_hash_table(SetOpState setopstate);


	/*
	* Initialize state for a new group of input values.
	*/
	static inline void
	initialize_counts(SetOpStatePerGroup pergroup)
	{
	pergroup->numLeft = pergroup->numRight = 0;
	}

	/*
	* Advance the appropriate counter for one input tuple.
	*/
	static inline void
	advance_counts(SetOpStatePerGroup pergroup, int flag)
	{
	if (flag)
	pergroup->numRight++;
	else
	pergroup->numLeft++;
	}

	/*
	* Fetch the "flag" column from an input tuple.
	* This is an integer column with value 0 for left side, 1 for right side.
	*/
	static int
	fetch_tuple_flag(SetOpState setopstate, TupleTableSlot inputslot)
	{
	SetOp node = (SetOp ) setopstate->ps.plan;
	int flag;
	bool isNull;

	flag = DatumGetInt32(slot_getattr(inputslot,
	node->flagColIdx,
	&isNull));
	Assert(!isNull);
	Assert(flag == 0 \|\| flag == 1);
	return flag;
	}

	/*
	* Initialize the hash table to empty.
	*/
	static void
	build_hash_table(SetOpState *setopstate)
	{
	SetOp node = (SetOp ) setopstate->ps.plan;
	ExprContext *econtext = setopstate->ps.ps_ExprContext;
	TupleDesc desc = ExecGetResultType(outerPlanState(setopstate));

	Assert(node->strategy == SETOP_HASHED);
	Assert(node->numGroups > 0);

	setopstate->hashtable = BuildTupleHashTableExt(&setopstate->ps,
	desc,
	node->numCols,
	node->dupColIdx,
	setopstate->eqfuncoids,
	setopstate->hashfunctions,
	node->dupCollations,
	node->numGroups,
	0,
	setopstate->ps.state->es_query_cxt,
	setopstate->tableContext,
	econtext->ecxt_per_tuple_memory,
	false);
	}

	/*
	* We've completed processing a tuple group. Decide how many copies (if any)
	* of its representative row to emit, and store the count into numOutput.
	* This logic is straight from the SQL92 specification.
	*/
	static void
	set_output_count(SetOpState *setopstate, SetOpStatePerGroup pergroup)
	{
	SetOp plannode = (SetOp ) setopstate->ps.plan;

	switch (plannode->cmd)
	{
	case SETOPCMD_INTERSECT:
	if (pergroup->numLeft > 0 && pergroup->numRight > 0)
	setopstate->numOutput = 1;
	else
	setopstate->numOutput = 0;
	break;
	case SETOPCMD_INTERSECT_ALL:
	setopstate->numOutput =
	(pergroup->numLeft < pergroup->numRight) ?
	pergroup->numLeft : pergroup->numRight;
	break;
	case SETOPCMD_EXCEPT:
	if (pergroup->numLeft > 0 && pergroup->numRight == 0)
	setopstate->numOutput = 1;
	else
	setopstate->numOutput = 0;
	break;
	case SETOPCMD_EXCEPT_ALL:
	setopstate->numOutput =
	(pergroup->numLeft < pergroup->numRight) ?
	0 : (pergroup->numLeft - pergroup->numRight);
	break;
	default:
	elog(ERROR, "unrecognized set op: %d", (int) plannode->cmd);
	break;
	}
	}


	/* ----------------------------------------------------------------
	* ExecSetOp
	* ----------------------------------------------------------------
	*/
	static TupleTableSlot * /* return: a tuple or NULL */
	ExecSetOp(PlanState *pstate)
	{
	SetOpState *node = castNode(SetOpState, pstate);
	SetOp plannode = (SetOp ) node->ps.plan;
	TupleTableSlot *resultTupleSlot = node->ps.ps_ResultTupleSlot;

	CHECK_FOR_INTERRUPTS();

	/*
	* If the previously-returned tuple needs to be returned more than once,
	* keep returning it.
	*/
	if (node->numOutput > 0)
	{
	node->numOutput--;
	return resultTupleSlot;
	}

	/* Otherwise, we're done if we are out of groups */
	if (node->setop_done)
	return NULL;

	/* Fetch the next tuple group according to the correct strategy */
	if (plannode->strategy == SETOP_HASHED)
	{
	if (!node->table_filled)
	setop_fill_hash_table(node);
	return setop_retrieve_hash_table(node);
	}
	else
	return setop_retrieve_direct(node);
	}

	/*
	* ExecSetOp for non-hashed case
	*/
	static TupleTableSlot *
	setop_retrieve_direct(SetOpState *setopstate)
	{
	PlanState *outerPlan;
	SetOpStatePerGroup pergroup;
	TupleTableSlot *outerslot;
	TupleTableSlot *resultTupleSlot;
	ExprContext *econtext = setopstate->ps.ps_ExprContext;

	/*
	* get state info from node
	*/
	outerPlan = outerPlanState(setopstate);
	pergroup = (SetOpStatePerGroup) setopstate->pergroup;
	resultTupleSlot = setopstate->ps.ps_ResultTupleSlot;

	/*
	* We loop retrieving groups until we find one we should return
	*/
	while (!setopstate->setop_done)
	{
	/*
	* If we don't already have the first tuple of the new group, fetch it
	* from the outer plan.
	*/
	if (setopstate->grp_firstTuple == NULL)
	{
	outerslot = ExecProcNode(outerPlan);
	if (!TupIsNull(outerslot))
	{
	/* Make a copy of the first input tuple */
	setopstate->grp_firstTuple = ExecCopySlotHeapTuple(outerslot);
	}
	else
	{
	/* outer plan produced no tuples at all */
	setopstate->setop_done = true;
	return NULL;
	}
	}

	/*
	* Store the copied first input tuple in the tuple table slot reserved
	* for it. The tuple will be deleted when it is cleared from the
	* slot.
	*/
	ExecStoreHeapTuple(setopstate->grp_firstTuple,
	resultTupleSlot,
	true);
	setopstate->grp_firstTuple = NULL; /* don't keep two pointers */

	/* Initialize working state for a new input tuple group */
	initialize_counts(pergroup);

	/* Count the first input tuple */
	advance_counts(pergroup,
	fetch_tuple_flag(setopstate, resultTupleSlot));

	/*
	* Scan the outer plan until we exhaust it or cross a group boundary.
	*/
	for (;;)
	{
	outerslot = ExecProcNode(outerPlan);
	if (TupIsNull(outerslot))
	{
	/* no more outer-plan tuples available */
	setopstate->setop_done = true;
	break;
	}

	/*
	* Check whether we've crossed a group boundary.
	*/
	econtext->ecxt_outertuple = resultTupleSlot;
	econtext->ecxt_innertuple = outerslot;

	if (!ExecQualAndReset(setopstate->eqfunction, econtext))
	{
	/*
	* Save the first input tuple of the next group.
	*/
	setopstate->grp_firstTuple = ExecCopySlotHeapTuple(outerslot);
	break;
	}

	/* Still in same group, so count this tuple */
	advance_counts(pergroup,
	fetch_tuple_flag(setopstate, outerslot));
	}

	/*
	* Done scanning input tuple group. See if we should emit any copies
	* of result tuple, and if so return the first copy.
	*/
	set_output_count(setopstate, pergroup);

	if (setopstate->numOutput > 0)
	{
	setopstate->numOutput--;
	return resultTupleSlot;
	}
	}

	/* No more groups */
	ExecClearTuple(resultTupleSlot);
	return NULL;
	}

	/*
	* ExecSetOp for hashed case: phase 1, read input and build hash table
	*/
	static void
	setop_fill_hash_table(SetOpState *setopstate)
	{
	SetOp node = (SetOp ) setopstate->ps.plan;
	PlanState *outerPlan;
	int firstFlag;
	bool in_first_rel PG_USED_FOR_ASSERTS_ONLY;
	ExprContext *econtext = setopstate->ps.ps_ExprContext;

	/*
	* get state info from node
	*/
	outerPlan = outerPlanState(setopstate);
	firstFlag = node->firstFlag;
	/* verify planner didn't mess up */
	Assert(firstFlag == 0 \|\|
	(firstFlag == 1 &&
	(node->cmd == SETOPCMD_INTERSECT \|\|
	node->cmd == SETOPCMD_INTERSECT_ALL)));

	/*
	* Process each outer-plan tuple, and then fetch the next one, until we
	* exhaust the outer plan.
	*/
	in_first_rel = true;
	for (;;)
	{
	TupleTableSlot *outerslot;
	int flag;
	TupleHashEntryData *entry;
	bool isnew;

	outerslot = ExecProcNode(outerPlan);
	if (TupIsNull(outerslot))
	break;

	/* Identify whether it's left or right input */
	flag = fetch_tuple_flag(setopstate, outerslot);

	if (flag == firstFlag)
	{
	/* (still) in first input relation */
	Assert(in_first_rel);

	/* Find or build hashtable entry for this tuple's group */
	entry = LookupTupleHashEntry(setopstate->hashtable, outerslot,
	&isnew, NULL);

	/* If new tuple group, initialize counts */
	if (isnew)
	{
	entry->additional = (SetOpStatePerGroup)
	MemoryContextAlloc(setopstate->hashtable->tablecxt,
	sizeof(SetOpStatePerGroupData));
	initialize_counts((SetOpStatePerGroup) entry->additional);
	}

	/* Advance the counts */
	advance_counts((SetOpStatePerGroup) entry->additional, flag);
	}
	else
	{
	/* reached second relation */
	in_first_rel = false;

	/* For tuples not seen previously, do not make hashtable entry */
	entry = LookupTupleHashEntry(setopstate->hashtable, outerslot,
	NULL, NULL);

	/* Advance the counts if entry is already present */
	if (entry)
	advance_counts((SetOpStatePerGroup) entry->additional, flag);
	}

	/* Must reset expression context after each hashtable lookup */
	ResetExprContext(econtext);
	}

	setopstate->table_filled = true;
	/* Initialize to walk the hash table */
	ResetTupleHashIterator(setopstate->hashtable, &setopstate->hashiter);
	}

	/*
	* ExecSetOp for hashed case: phase 2, retrieving groups from hash table
	*/
	static TupleTableSlot *
	setop_retrieve_hash_table(SetOpState *setopstate)
	{
	TupleHashEntryData *entry;
	TupleTableSlot *resultTupleSlot;

	/*
	* get state info from node
	*/
	resultTupleSlot = setopstate->ps.ps_ResultTupleSlot;

	/*
	* We loop retrieving groups until we find one we should return
	*/
	while (!setopstate->setop_done)
	{
	CHECK_FOR_INTERRUPTS();

	/*
	* Find the next entry in the hash table
	*/
	entry = ScanTupleHashTable(setopstate->hashtable, &setopstate->hashiter);
	if (entry == NULL)
	{
	/* No more entries in hashtable, so done */
	setopstate->setop_done = true;
	return NULL;
	}

	/*
	* See if we should emit any copies of this tuple, and if so return
	* the first copy.
	*/
	set_output_count(setopstate, (SetOpStatePerGroup) entry->additional);

	if (setopstate->numOutput > 0)
	{
	setopstate->numOutput--;
	return ExecStoreMinimalTuple(entry->firstTuple,
	resultTupleSlot,
	false);
	}
	}

	/* No more groups */
	ExecClearTuple(resultTupleSlot);
	return NULL;
	}

	/* ----------------------------------------------------------------
	* ExecInitSetOp
	*
	* This initializes the setop node state structures and
	* the node's subplan.
	* ----------------------------------------------------------------
	*/
	SetOpState *
	ExecInitSetOp(SetOp node, EState estate, int eflags)
	{
	SetOpState *setopstate;
	TupleDesc outerDesc;

	/* check for unsupported flags */
	Assert(!(eflags & (EXEC_FLAG_BACKWARD \| EXEC_FLAG_MARK)));

	/*
	* create state structure
	*/
	setopstate = makeNode(SetOpState);
	setopstate->ps.plan = (Plan *) node;
	setopstate->ps.state = estate;
	setopstate->ps.ExecProcNode = ExecSetOp;

	setopstate->eqfuncoids = NULL;
	setopstate->hashfunctions = NULL;
	setopstate->setop_done = false;
	setopstate->numOutput = 0;
	setopstate->pergroup = NULL;
	setopstate->grp_firstTuple = NULL;
	setopstate->hashtable = NULL;
	setopstate->tableContext = NULL;

	/*
	* create expression context
	*/
	ExecAssignExprContext(estate, &setopstate->ps);

	/*
	* If hashing, we also need a longer-lived context to store the hash
	* table. The table can't just be kept in the per-query context because
	* we want to be able to throw it away in ExecReScanSetOp.
	*/
	if (node->strategy == SETOP_HASHED)
	setopstate->tableContext =
	AllocSetContextCreate(CurrentMemoryContext,
	"SetOp hash table",
	ALLOCSET_DEFAULT_SIZES);

	/*
	* initialize child nodes
	*
	* If we are hashing then the child plan does not need to handle REWIND
	* efficiently; see ExecReScanSetOp.
	*/
	if (node->strategy == SETOP_HASHED)
	eflags &= ~EXEC_FLAG_REWIND;
	outerPlanState(setopstate) = ExecInitNode(outerPlan(node), estate, eflags);
	outerDesc = ExecGetResultType(outerPlanState(setopstate));

	/*
	* Initialize result slot and type. Setop nodes do no projections, so
	* initialize projection info for this node appropriately.
	*/
	ExecInitResultTupleSlotTL(&setopstate->ps,
	node->strategy == SETOP_HASHED ?
	&TTSOpsMinimalTuple : &TTSOpsHeapTuple);
	setopstate->ps.ps_ProjInfo = NULL;

	/*
	* Precompute fmgr lookup data for inner loop. We need both equality and
	* hashing functions to do it by hashing, but only equality if not
	* hashing.
	*/
	if (node->strategy == SETOP_HASHED)
	execTuplesHashPrepare(node->numCols,
	node->dupOperators,
	&setopstate->eqfuncoids,
	&setopstate->hashfunctions);
	else
	setopstate->eqfunction =
	execTuplesMatchPrepare(outerDesc,
	node->numCols,
	node->dupColIdx,
	node->dupOperators,
	node->dupCollations,
	&setopstate->ps);

	if (node->strategy == SETOP_HASHED)
	{
	build_hash_table(setopstate);
	setopstate->table_filled = false;
	}
	else
	{
	setopstate->pergroup =
	(SetOpStatePerGroup) palloc0(sizeof(SetOpStatePerGroupData));
	}

	return setopstate;
	}

	/* ----------------------------------------------------------------
	* ExecEndSetOp
	*
	* This shuts down the subplan and frees resources allocated
	* to this node.
	* ----------------------------------------------------------------
	*/
	void
	ExecEndSetOp(SetOpState *node)
	{
	/* clean up tuple table */
	ExecClearTuple(node->ps.ps_ResultTupleSlot);

	/* free subsidiary stuff including hashtable */
	if (node->tableContext)
	MemoryContextDelete(node->tableContext);
	ExecFreeExprContext(&node->ps);

	ExecEndNode(outerPlanState(node));
	}


	void
	ExecReScanSetOp(SetOpState *node)
	{
	PlanState *outerPlan = outerPlanState(node);

	ExecClearTuple(node->ps.ps_ResultTupleSlot);
	node->setop_done = false;
	node->numOutput = 0;

	if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED)
	{
	/*
	* In the hashed case, if we haven't yet built the hash table then we
	* can just return; nothing done yet, so nothing to undo. If subnode's
	* chgParam is not NULL then it will be re-scanned by ExecProcNode,
	* else no reason to re-scan it at all.
	*/
	if (!node->table_filled)
	return;

	/*
	* If we do have the hash table and the subplan does not have any
	* parameter changes, then we can just rescan the existing hash table;
	* no need to build it again.
	*/
	if (outerPlan->chgParam == NULL)
	{
	ResetTupleHashIterator(node->hashtable, &node->hashiter);
	return;
	}
	}

	/* Release first tuple of group, if we have made a copy */
	if (node->grp_firstTuple != NULL)
	{
	heap_freetuple(node->grp_firstTuple);
	node->grp_firstTuple = NULL;
	}

	/* Release any hashtable storage */
	if (node->tableContext)
	MemoryContextResetAndDeleteChildren(node->tableContext);

	/* And rebuild empty hashtable if needed */
	if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED)
	{
	ResetTupleHashTable(node->hashtable);
	node->table_filled = false;
	}

	/*
	* if chgParam of subnode is not null then plan will be re-scanned by
	* first ExecProcNode.
	*/
	if (outerPlan->chgParam == NULL)
	ExecReScan(outerPlan);
	}