src/backend/executor/execUtils.c - cloudberry - Git at Google

 /*-------------------------------------------------------------------------
  *
  * execUtils.c
  *	  miscellaneous executor utility routines
  *
  * Portions Copyright (c) 2005-2008, Greenplum inc
  * Portions Copyright (c) 2012-Present VMware, Inc. or its affiliates.
  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  *
  * IDENTIFICATION
  *	  src/backend/executor/execUtils.c
  *
  *-------------------------------------------------------------------------
  */
 /*
  * INTERFACE ROUTINES
  *		CreateExecutorState		Create/delete executor working state
  *		FreeExecutorState
  *		CreateExprContext
  *		CreateStandaloneExprContext
  *		FreeExprContext
  *		ReScanExprContext
  *
  *		ExecAssignExprContext	Common code for plan node init routines.
  *		etc
  *
  *		ExecOpenScanRelation	Common code for scan node init routines.
  *
  *		ExecInitRangeTable		Set up executor's range-table-related data.
  *
  *		ExecGetRangeTableRelation		Fetch Relation for a rangetable entry.
  *
  *		executor_errposition	Report syntactic position of an error.
  *
  *		RegisterExprContextCallback    Register function shutdown callback
  *		UnregisterExprContextCallback  Deregister function shutdown callback
  *
  *		GetAttributeByName		Runtime extraction of columns from tuples.
  *		GetAttributeByNum
  *
  *	 NOTES
  *		This file has traditionally been the place to stick misc.
  *		executor support stuff that doesn't really go anyplace else.
  */

 #include "postgres.h"
 #include "pgstat.h"

 #include "access/genam.h"
 #include "access/heapam.h"
 #include "access/parallel.h"
 #include "access/relscan.h"
 #include "access/table.h"
 #include "access/tableam.h"
 #include "access/transam.h"
 #include "catalog/index.h"
 #include "executor/execdebug.h"
 #include "executor/execUtils.h"
 #include "executor/executor.h"
 #include "executor/execPartition.h"
 #include "jit/jit.h"
 #include "mb/pg_wchar.h"
 #include "miscadmin.h"
 #include "nodes/nodeFuncs.h"
 #include "parser/parsetree.h"
 #include "partitioning/partdesc.h"
 #include "storage/lmgr.h"
 #include "utils/builtins.h"
 #include "utils/memutils.h"
 #include "utils/rel.h"
 #include "utils/typcache.h"

 #include "nodes/primnodes.h"
 #include "nodes/execnodes.h"

 #include "cdb/cdbutil.h"
 #include "cdb/cdbvars.h"
 #include "cdb/cdbdisp_query.h"
 #include "cdb/cdbdispatchresult.h"
 #include "cdb/ml_ipc.h"
 #include "cdb/cdbmotion.h"
 #include "cdb/cdbsreh.h"
 #include "cdb/memquota.h"
 #include "executor/instrument.h"
 #include "executor/spi.h"
 #include "utils/elog.h"
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
 #include "storage/ipc.h"
 #include "cdb/cdbexplain.h"
 #include "cdb/cdbllize.h"
 #include "utils/guc.h"
 #include "utils/workfile_mgr.h"
 #include "utils/metrics_utils.h"

 #include "cdb/memquota.h"

 static bool tlist_matches_tupdesc(PlanState *ps, List *tlist, Index varno, TupleDesc tupdesc);
 static void ShutdownExprContext(ExprContext *econtext, bool isCommit);
 static List *flatten_logic_exprs(Node *node);
 ProcessDispatchResult_hook_type ProcessDispatchResult_hook = NULL;


 /* ----------------------------------------------------------------
  *				 Executor state and memory management functions
  * ----------------------------------------------------------------
  */

 /* ----------------
  *		CreateExecutorState
  *
  *		Create and initialize an EState node, which is the root of
  *		working storage for an entire Executor invocation.
  *
  * Principally, this creates the per-query memory context that will be
  * used to hold all working data that lives till the end of the query.
  * Note that the per-query context will become a child of the caller's
  * CurrentMemoryContext.
  * ----------------
  */
 EState *
 CreateExecutorState(void)
 {
 	EState	   *estate;
 	MemoryContext qcontext;
 	MemoryContext oldcontext;

 	/*
 	 * Create the per-query context for this Executor run.
 	 */
 	qcontext = AllocSetContextCreate(CurrentMemoryContext,
 									 "ExecutorState",
 									 ALLOCSET_DEFAULT_SIZES);
 	MemoryContextDeclareAccountingRoot(qcontext);

 	/*
 	 * Make the EState node within the per-query context.  This way, we don't
 	 * need a separate pfree() operation for it at shutdown.
 	 */
 	oldcontext = MemoryContextSwitchTo(qcontext);

 	estate = makeNode(EState);

 	/*
 	 * Initialize all fields of the Executor State structure
 	 */
 	estate->es_direction = ForwardScanDirection;
 	estate->es_snapshot = InvalidSnapshot;	/* caller must initialize this */
 	estate->es_crosscheck_snapshot = InvalidSnapshot;	/* no crosscheck */
 	estate->es_range_table = NIL;
 	estate->es_range_table_size = 0;
 	estate->es_relations = NULL;
 	estate->es_rowmarks = NULL;
 	estate->es_plannedstmt = NULL;

 	estate->es_junkFilter = NULL;

 	estate->es_output_cid = (CommandId) 0;

 	estate->es_result_relations = NULL;
 	estate->es_opened_result_relations = NIL;
 	estate->es_tuple_routing_result_relations = NIL;
 	estate->es_trig_target_relations = NIL;

 	estate->es_param_list_info = NULL;
 	estate->es_param_exec_vals = NULL;

 	estate->es_queryEnv = NULL;

 	estate->es_query_cxt = qcontext;

 	estate->es_tupleTable = NIL;

 	estate->es_processed = 0;

 	estate->es_top_eflags = 0;
 	estate->es_instrument = 0;
 	estate->es_finished = false;

 	estate->es_exprcontexts = NIL;

 	estate->es_subplanstates = NIL;

 	estate->es_auxmodifytables = NIL;

 	estate->es_per_tuple_exprcontext = NULL;

 	estate->es_sourceText = NULL;

 	estate->es_use_parallel_mode = false;

 	estate->es_jit_flags = 0;
 	estate->es_jit = NULL;

 	estate->es_sliceTable = NULL;
 	estate->interconnect_context = NULL;
 	estate->motionlayer_context = NULL;
 	estate->es_interconnect_is_setup = false;
 	estate->active_recv_id = -1;
 	estate->es_got_eos = false;
 	estate->cancelUnfinished = false;

 	estate->dispatcherState = NULL;

 	estate->currentSliceId = 0;
 	estate->useMppParallelMode = false;
 	estate->eliminateAliens = false;

 	estate->gp_bypass_unique_check = false;

 	/*
 	 * Return the executor state structure
 	 */
 	MemoryContextSwitchTo(oldcontext);

 	return estate;
 }

 /* ----------------
  *		FreeExecutorState
  *
  *		Release an EState along with all remaining working storage.
  *
  * Note: this is not responsible for releasing non-memory resources, such as
  * open relations or buffer pins.  But it will shut down any still-active
  * ExprContexts within the EState and deallocate associated JITed expressions.
  * That is sufficient cleanup for situations where the EState has only been
  * used for expression evaluation, and not to run a complete Plan.
  *
  * This can be called in any memory context ... so long as it's not one
  * of the ones to be freed.
  *
  * In Cloudberry, this also clears the PartitionState, even though that's a
  * non-memory resource, as that can be allocated for expression evaluation even
  * when there is no Plan.
  * ----------------
  */
 void
 FreeExecutorState(EState *estate)
 {
 	/*
 	 * Shut down and free any remaining ExprContexts.  We do this explicitly
 	 * to ensure that any remaining shutdown callbacks get called (since they
 	 * might need to release resources that aren't simply memory within the
 	 * per-query memory context).
 	 */
 	while (estate->es_exprcontexts)
 	{
 		/*
 		 * XXX: seems there ought to be a faster way to implement this than
 		 * repeated list_delete(), no?
 		 */
 		FreeExprContext((ExprContext *) linitial(estate->es_exprcontexts),
 						true);
 		/* FreeExprContext removed the list link for us */
 	}

 	estate->dispatcherState = NULL;

 	/* release JIT context, if allocated */
 	if (estate->es_jit)
 	{
 		jit_release_context(estate->es_jit);
 		estate->es_jit = NULL;
 	}

 	/* release partition directory, if allocated */
 	if (estate->es_partition_directory)
 	{
 		DestroyPartitionDirectory(estate->es_partition_directory);
 		estate->es_partition_directory = NULL;
 	}

 	/*
 	 * Free the per-query memory context, thereby releasing all working
 	 * memory, including the EState node itself.
 	 */
 	MemoryContextDelete(estate->es_query_cxt);
 }

 /*
  * Internal implementation for CreateExprContext() and CreateWorkExprContext()
  * that allows control over the AllocSet parameters.
  */
 static ExprContext *
 CreateExprContextInternal(EState *estate, Size minContextSize,
 						  Size initBlockSize, Size maxBlockSize)
 {
 	ExprContext *econtext;
 	MemoryContext oldcontext;

 	/* Create the ExprContext node within the per-query memory context */
 	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);

 	econtext = makeNode(ExprContext);

 	/* Initialize fields of ExprContext */
 	econtext->ecxt_scantuple = NULL;
 	econtext->ecxt_innertuple = NULL;
 	econtext->ecxt_outertuple = NULL;

 	econtext->ecxt_per_query_memory = estate->es_query_cxt;

 	/*
 	 * Create working memory for expression evaluation in this context.
 	 */
 	econtext->ecxt_per_tuple_memory =
 		AllocSetContextCreate(estate->es_query_cxt,
 							  "ExprContext",
 							  minContextSize,
 							  initBlockSize,
 							  maxBlockSize);

 	econtext->ecxt_param_exec_vals = estate->es_param_exec_vals;
 	econtext->ecxt_param_list_info = estate->es_param_list_info;

 	econtext->ecxt_aggvalues = NULL;
 	econtext->ecxt_aggnulls = NULL;

 	econtext->caseValue_datum = (Datum) 0;
 	econtext->caseValue_isNull = true;

 	econtext->domainValue_datum = (Datum) 0;
 	econtext->domainValue_isNull = true;

 	econtext->ecxt_estate = estate;

 	econtext->ecxt_callbacks = NULL;

 	/*
 	 * Link the ExprContext into the EState to ensure it is shut down when the
 	 * EState is freed.  Because we use lcons(), shutdowns will occur in
 	 * reverse order of creation, which may not be essential but can't hurt.
 	 */
 	estate->es_exprcontexts = lcons(econtext, estate->es_exprcontexts);

 	MemoryContextSwitchTo(oldcontext);

 	return econtext;
 }

 /* ----------------
  *		CreateExprContext
  *
  *		Create a context for expression evaluation within an EState.
  *
  * An executor run may require multiple ExprContexts (we usually make one
  * for each Plan node, and a separate one for per-output-tuple processing
  * such as constraint checking).  Each ExprContext has its own "per-tuple"
  * memory context.
  *
  * Note we make no assumption about the caller's memory context.
  * ----------------
  */
 ExprContext *
 CreateExprContext(EState *estate)
 {
 	return CreateExprContextInternal(estate, ALLOCSET_DEFAULT_SIZES);
 }


 /* ----------------
  *		CreateWorkExprContext
  *
  * Like CreateExprContext, but specifies the AllocSet sizes to be reasonable
  * in proportion to work_mem. If the maximum block allocation size is too
  * large, it's easy to skip right past work_mem with a single allocation.
  * ----------------
  */
 ExprContext *
 CreateWorkExprContext(EState *estate)
 {
 	Size		minContextSize = ALLOCSET_DEFAULT_MINSIZE;
 	Size		initBlockSize = ALLOCSET_DEFAULT_INITSIZE;
 	Size		maxBlockSize = ALLOCSET_DEFAULT_MAXSIZE;

 	/* choose the maxBlockSize to be no larger than 1/16 of work_mem */
 	while (16 * maxBlockSize > work_mem * 1024L)
 		maxBlockSize >>= 1;

 	if (maxBlockSize < ALLOCSET_DEFAULT_INITSIZE)
 		maxBlockSize = ALLOCSET_DEFAULT_INITSIZE;

 	return CreateExprContextInternal(estate, minContextSize,
 									 initBlockSize, maxBlockSize);
 }

 /* ----------------
  *		CreateStandaloneExprContext
  *
  *		Create a context for standalone expression evaluation.
  *
  * An ExprContext made this way can be used for evaluation of expressions
  * that contain no Params, subplans, or Var references (it might work to
  * put tuple references into the scantuple field, but it seems unwise).
  *
  * The ExprContext struct is allocated in the caller's current memory
  * context, which also becomes its "per query" context.
  *
  * It is caller's responsibility to free the ExprContext when done,
  * or at least ensure that any shutdown callbacks have been called
  * (ReScanExprContext() is suitable).  Otherwise, non-memory resources
  * might be leaked.
  * ----------------
  */
 ExprContext *
 CreateStandaloneExprContext(void)
 {
 	ExprContext *econtext;

 	/* Create the ExprContext node within the caller's memory context */
 	econtext = makeNode(ExprContext);

 	/* Initialize fields of ExprContext */
 	econtext->ecxt_scantuple = NULL;
 	econtext->ecxt_innertuple = NULL;
 	econtext->ecxt_outertuple = NULL;

 	econtext->ecxt_per_query_memory = CurrentMemoryContext;

 	/*
 	 * Create working memory for expression evaluation in this context.
 	 */
 	econtext->ecxt_per_tuple_memory =
 		AllocSetContextCreate(CurrentMemoryContext,
 							  "ExprContext",
 							  ALLOCSET_DEFAULT_SIZES);

 	econtext->ecxt_param_exec_vals = NULL;
 	econtext->ecxt_param_list_info = NULL;

 	econtext->ecxt_aggvalues = NULL;
 	econtext->ecxt_aggnulls = NULL;

 	econtext->caseValue_datum = (Datum) 0;
 	econtext->caseValue_isNull = true;

 	econtext->domainValue_datum = (Datum) 0;
 	econtext->domainValue_isNull = true;

 	econtext->ecxt_estate = NULL;

 	econtext->ecxt_callbacks = NULL;

 	return econtext;
 }

 /* ----------------
  *		FreeExprContext
  *
  *		Free an expression context, including calling any remaining
  *		shutdown callbacks.
  *
  * Since we free the temporary context used for expression evaluation,
  * any previously computed pass-by-reference expression result will go away!
  *
  * If isCommit is false, we are being called in error cleanup, and should
  * not call callbacks but only release memory.  (It might be better to call
  * the callbacks and pass the isCommit flag to them, but that would require
  * more invasive code changes than currently seems justified.)
  *
  * Note we make no assumption about the caller's memory context.
  * ----------------
  */
 void
 FreeExprContext(ExprContext *econtext, bool isCommit)
 {
 	EState	   *estate;

 	/* Call any registered callbacks */
 	ShutdownExprContext(econtext, isCommit);
 	/* And clean up the memory used */
 	MemoryContextDelete(econtext->ecxt_per_tuple_memory);
 	/* Unlink self from owning EState, if any */
 	estate = econtext->ecxt_estate;
 	if (estate)
 		estate->es_exprcontexts = list_delete_ptr(estate->es_exprcontexts,
 												  econtext);
 	/* And delete the ExprContext node */
 	pfree(econtext);
 }

 /*
  * ReScanExprContext
  *
  *		Reset an expression context in preparation for a rescan of its
  *		plan node.  This requires calling any registered shutdown callbacks,
  *		since any partially complete set-returning-functions must be canceled.
  *
  * Note we make no assumption about the caller's memory context.
  */
 void
 ReScanExprContext(ExprContext *econtext)
 {
 	/* Call any registered callbacks */
 	ShutdownExprContext(econtext, true);
 	/* And clean up the memory used */
 	MemoryContextReset(econtext->ecxt_per_tuple_memory);
 }

 /*
  * Build a per-output-tuple ExprContext for an EState.
  *
  * This is normally invoked via GetPerTupleExprContext() macro,
  * not directly.
  */
 ExprContext *
 MakePerTupleExprContext(EState *estate)
 {
 	if (estate->es_per_tuple_exprcontext == NULL)
 		estate->es_per_tuple_exprcontext = CreateExprContext(estate);

 	return estate->es_per_tuple_exprcontext;
 }


 /* ----------------------------------------------------------------
  *				 miscellaneous node-init support functions
  *
  * Note: all of these are expected to be called with CurrentMemoryContext
  * equal to the per-query memory context.
  * ----------------------------------------------------------------
  */

 /* ----------------
  *		ExecAssignExprContext
  *
  *		This initializes the ps_ExprContext field.  It is only necessary
  *		to do this for nodes which use ExecQual or ExecProject
  *		because those routines require an econtext. Other nodes that
  *		don't have to evaluate expressions don't need to do this.
  * ----------------
  */
 void
 ExecAssignExprContext(EState *estate, PlanState *planstate)
 {
 	planstate->ps_ExprContext = CreateExprContext(estate);
 }

 /* ----------------
  *		ExecGetResultType
  * ----------------
  */
 TupleDesc
 ExecGetResultType(PlanState *planstate)
 {
 	return planstate->ps_ResultTupleDesc;
 }

 /*
  * ExecGetResultSlotOps - information about node's type of result slot
  */
 const TupleTableSlotOps *
 ExecGetResultSlotOps(PlanState *planstate, bool *isfixed)
 {
 	if (planstate->resultopsset && planstate->resultops)
 	{
 		if (isfixed)
 			*isfixed = planstate->resultopsfixed;
 		return planstate->resultops;
 	}

 	if (isfixed)
 	{
 		if (planstate->resultopsset)
 			*isfixed = planstate->resultopsfixed;
 		else if (planstate->ps_ResultTupleSlot)
 			*isfixed = TTS_FIXED(planstate->ps_ResultTupleSlot);
 		else
 			*isfixed = false;
 	}

 	if (!planstate->ps_ResultTupleSlot)
 		return &TTSOpsVirtual;

 	return planstate->ps_ResultTupleSlot->tts_ops;
 }


 /* ----------------
  *		ExecAssignProjectionInfo
  *
  * forms the projection information from the node's targetlist
  *
  * Notes for inputDesc are same as for ExecBuildProjectionInfo: supply it
  * for a relation-scan node, can pass NULL for upper-level nodes
  * ----------------
  */
 void
 ExecAssignProjectionInfo(PlanState *planstate,
 						 TupleDesc inputDesc)
 {
 	planstate->ps_ProjInfo =
 		ExecBuildProjectionInfo(planstate->plan->targetlist,
 								planstate->ps_ExprContext,
 								planstate->ps_ResultTupleSlot,
 								planstate,
 								inputDesc);
 }


 /* ----------------
  *		ExecConditionalAssignProjectionInfo
  *
  * as ExecAssignProjectionInfo, but store NULL rather than building projection
  * info if no projection is required
  * ----------------
  */
 void
 ExecConditionalAssignProjectionInfo(PlanState *planstate, TupleDesc inputDesc,
 									Index varno)
 {
 	if (tlist_matches_tupdesc(planstate,
 							  planstate->plan->targetlist,
 							  varno,
 							  inputDesc))
 	{
 		planstate->ps_ProjInfo = NULL;
 		planstate->resultopsset = planstate->scanopsset;
 		planstate->resultopsfixed = planstate->scanopsfixed;
 		planstate->resultops = planstate->scanops;
 	}
 	else
 	{
 		if (!planstate->ps_ResultTupleSlot)
 		{
 			ExecInitResultSlot(planstate, &TTSOpsVirtual);
 			planstate->resultops = &TTSOpsVirtual;
 			planstate->resultopsfixed = true;
 			planstate->resultopsset = true;
 		}
 		ExecAssignProjectionInfo(planstate, inputDesc);
 	}
 }

 static bool
 tlist_matches_tupdesc(PlanState *ps, List *tlist, Index varno, TupleDesc tupdesc)
 {
 	int			numattrs = tupdesc->natts;
 	int			attrno;
 	ListCell   *tlist_item = list_head(tlist);

 	/* Check the tlist attributes */
 	for (attrno = 1; attrno <= numattrs; attrno++)
 	{
 		Form_pg_attribute att_tup = TupleDescAttr(tupdesc, attrno - 1);
 		Var		   *var;

 		if (tlist_item == NULL)
 			return false;		/* tlist too short */
 		var = (Var *) ((TargetEntry *) lfirst(tlist_item))->expr;
 		if (!var || !IsA(var, Var))
 			return false;		/* tlist item not a Var */
 		/* if these Asserts fail, planner messed up */
 		Assert(var->varno == varno);
 		Assert(var->varlevelsup == 0);
 		if (var->varattno != attrno)
 			return false;		/* out of order */
 		if (att_tup->attisdropped)
 			return false;		/* table contains dropped columns */
 		if (att_tup->atthasmissing)
 			return false;		/* table contains cols with missing values */

 		/*
 		 * Note: usually the Var's type should match the tupdesc exactly, but
 		 * in situations involving unions of columns that have different
 		 * typmods, the Var may have come from above the union and hence have
 		 * typmod -1.  This is a legitimate situation since the Var still
 		 * describes the column, just not as exactly as the tupdesc does. We
 		 * could change the planner to prevent it, but it'd then insert
 		 * projection steps just to convert from specific typmod to typmod -1,
 		 * which is pretty silly.
 		 */
 		if (var->vartype != att_tup->atttypid ||
 			(var->vartypmod != att_tup->atttypmod &&
 			 var->vartypmod != -1))
 			return false;		/* type mismatch */

 		tlist_item = lnext(tlist, tlist_item);
 	}

 	if (tlist_item)
 		return false;			/* tlist too long */

 	return true;
 }

 /* ----------------
  *		ExecFreeExprContext
  *
  * A plan node's ExprContext should be freed explicitly during executor
  * shutdown because there may be shutdown callbacks to call.  (Other resources
  * made by the above routines, such as projection info, don't need to be freed
  * explicitly because they're just memory in the per-query memory context.)
  *
  * However ... there is no particular need to do it during ExecEndNode,
  * because FreeExecutorState will free any remaining ExprContexts within
  * the EState.  Letting FreeExecutorState do it allows the ExprContexts to
  * be freed in reverse order of creation, rather than order of creation as
  * will happen if we delete them here, which saves O(N^2) work in the list
  * cleanup inside FreeExprContext.
  * ----------------
  */
 void
 ExecFreeExprContext(PlanState *planstate)
 {
 	/*
 	 * Per above discussion, don't actually delete the ExprContext. We do
 	 * unlink it from the plan node, though.
 	 */
 	planstate->ps_ExprContext = NULL;
 }


 /* ----------------------------------------------------------------
  *				  Scan node support
  * ----------------------------------------------------------------
  */

 /* ----------------
  *		ExecAssignScanType
  * ----------------
  */
 void
 ExecAssignScanType(ScanState *scanstate, TupleDesc tupDesc)
 {
 	TupleTableSlot *slot = scanstate->ss_ScanTupleSlot;

 	ExecSetSlotDescriptor(slot, tupDesc);
 }

 /* ----------------
  *		ExecCreateScanSlotFromOuterPlan
  * ----------------
  */
 void
 ExecCreateScanSlotFromOuterPlan(EState *estate,
 								ScanState *scanstate,
 								const TupleTableSlotOps *tts_ops)
 {
 	PlanState  *outerPlan;
 	TupleDesc	tupDesc;

 	outerPlan = outerPlanState(scanstate);
 	tupDesc = ExecGetResultType(outerPlan);

 	ExecInitScanTupleSlot(estate, scanstate, tupDesc, tts_ops);
 }

 /* ----------------------------------------------------------------
  *		ExecRelationIsTargetRelation
  *
  *		Detect whether a relation (identified by rangetable index)
  *		is one of the target relations of the query.
  *
  * Note: This is currently no longer used in core.  We keep it around
  * because FDWs may wish to use it to determine if their foreign table
  * is a target relation.
  * ----------------------------------------------------------------
  */
 bool
 ExecRelationIsTargetRelation(EState *estate, Index scanrelid)
 {
 	return list_member_int(estate->es_plannedstmt->resultRelations, scanrelid);
 }

 /* ----------------------------------------------------------------
  *		ExecOpenScanRelation
  *
  *		Open the heap relation to be scanned by a base-level scan plan node.
  *		This should be called during the node's ExecInit routine.
  * ----------------------------------------------------------------
  */
 Relation
 ExecOpenScanRelation(EState *estate, Index scanrelid, int eflags)
 {
 	Relation	rel;

 	/* Open the relation. */
 	rel = ExecGetRangeTableRelation(estate, scanrelid);

 	/*
 	 * Complain if we're attempting a scan of an unscannable relation, except
 	 * when the query won't actually be run.  This is a slightly klugy place
 	 * to do this, perhaps, but there is no better place.
 	 */
 	if ((eflags & (EXEC_FLAG_EXPLAIN_ONLY | EXEC_FLAG_WITH_NO_DATA)) == 0 &&
 		!RelationIsScannable(rel))
 		ereport(ERROR,
 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
 				 errmsg("materialized view \"%s\" has not been populated",
 						RelationGetRelationName(rel)),
 				 errhint("Use the REFRESH MATERIALIZED VIEW command.")));

 	return rel;
 }

 /*
  * ExecInitRangeTable
  *		Set up executor's range-table-related data
  *
  * In addition to the range table proper, initialize arrays that are
  * indexed by rangetable index.
  */
 void
 ExecInitRangeTable(EState *estate, List *rangeTable)
 {
 	/* Remember the range table List as-is */
 	estate->es_range_table = rangeTable;

 	/* Set size of associated arrays */
 	estate->es_range_table_size = list_length(rangeTable);

 	/*
 	 * Allocate an array to store an open Relation corresponding to each
 	 * rangetable entry, and initialize entries to NULL.  Relations are opened
 	 * and stored here as needed.
 	 */
 	estate->es_relations = (Relation *)
 		palloc0(estate->es_range_table_size * sizeof(Relation));

 	/*
 	 * es_result_relations and es_rowmarks are also parallel to
 	 * es_range_table, but are allocated only if needed.
 	 */
 	estate->es_result_relations = NULL;
 	estate->es_rowmarks = NULL;
 }

 /*
  * ExecGetRangeTableRelation
  *		Open the Relation for a range table entry, if not already done
  *
  * The Relations will be closed again in ExecEndPlan().
  */
 Relation
 ExecGetRangeTableRelation(EState *estate, Index rti)
 {
 	Relation	rel;

 	Assert(rti > 0 && rti <= estate->es_range_table_size);

 	rel = estate->es_relations[rti - 1];
 	if (rel == NULL)
 	{
 		/* First time through, so open the relation */
 		RangeTblEntry *rte = exec_rt_fetch(rti, estate);

 		Assert(rte->rtekind == RTE_RELATION);

 		/* GPDB: a QE process is not holding the locks yet, same as a parallel worker. */
 		if (!IsParallelWorker() && Gp_role != GP_ROLE_EXECUTE)
 		{
 			/*
 			 * In a normal query, we should already have the appropriate lock,
 			 * but verify that through an Assert.  Since there's already an
 			 * Assert inside table_open that insists on holding some lock, it
 			 * seems sufficient to check this only when rellockmode is higher
 			 * than the minimum.
 			 */
 			rel = table_open(rte->relid, NoLock);
 			Assert(rte->rellockmode == AccessShareLock ||
 				   CheckRelationLockedByMe(rel, rte->rellockmode, false));
 		}
 		else
 		{
 			/*
 			 * If we are a parallel worker, we need to obtain our own local
 			 * lock on the relation.  This ensures sane behavior in case the
 			 * parent process exits before we do.
 			 */
 			rel = table_open(rte->relid, rte->rellockmode);
 		}

 		estate->es_relations[rti - 1] = rel;
 	}

 	return rel;
 }

 /*
  * ExecInitResultRelation
  *		Open relation given by the passed-in RT index and fill its
  *		ResultRelInfo node
  *
  * Here, we also save the ResultRelInfo in estate->es_result_relations array
  * such that it can be accessed later using the RT index.
  */
 void
 ExecInitResultRelation(EState *estate, ResultRelInfo *resultRelInfo,
 					   Index rti)
 {
 	Relation	resultRelationDesc;

 	resultRelationDesc = ExecGetRangeTableRelation(estate, rti);
 	InitResultRelInfo(resultRelInfo,
 					  resultRelationDesc,
 					  rti,
 					  NULL,
 					  estate->es_instrument);

 	if (estate->es_result_relations == NULL)
 		estate->es_result_relations = (ResultRelInfo **)
 			palloc0(estate->es_range_table_size * sizeof(ResultRelInfo *));
 	estate->es_result_relations[rti - 1] = resultRelInfo;

 	/*
 	 * Saving in the list allows to avoid needlessly traversing the whole
 	 * array when only a few of its entries are possibly non-NULL.
 	 */
 	estate->es_opened_result_relations =
 		lappend(estate->es_opened_result_relations, resultRelInfo);
 }

 /*
  * UpdateChangedParamSet
  *		Add changed parameters to a plan node's chgParam set
  */
 void
 UpdateChangedParamSet(PlanState *node, Bitmapset *newchg)
 {
 	Bitmapset  *parmset;

 	/*
 	 * The plan node only depends on params listed in its allParam set. Don't
 	 * include anything else into its chgParam set.
 	 */
 	parmset = bms_intersect(node->plan->allParam, newchg);

 	/*
 	 * Keep node->chgParam == NULL if there's not actually any members; this
 	 * allows the simplest possible tests in executor node files.
 	 */
 	if (!bms_is_empty(parmset))
 		node->chgParam = bms_join(node->chgParam, parmset);
 	else
 		bms_free(parmset);
 }

 /*
  * executor_errposition
  *		Report an execution-time cursor position, if possible.
  *
  * This is expected to be used within an ereport() call.  The return value
  * is a dummy (always 0, in fact).
  *
  * The locations stored in parsetrees are byte offsets into the source string.
  * We have to convert them to 1-based character indexes for reporting to
  * clients.  (We do things this way to avoid unnecessary overhead in the
  * normal non-error case: computing character indexes would be much more
  * expensive than storing token offsets.)
  */
 int
 executor_errposition(EState *estate, int location)
 {
 	int			pos;

 	/* No-op if location was not provided */
 	if (location < 0)
 		return 0;
 	/* Can't do anything if source text is not available */
 	if (estate == NULL || estate->es_sourceText == NULL)
 		return 0;
 	/* Convert offset to character number */
 	pos = pg_mbstrlen_with_len(estate->es_sourceText, location) + 1;
 	/* And pass it to the ereport mechanism */
 	return errposition(pos);
 }

 /*
  * Register a shutdown callback in an ExprContext.
  *
  * Shutdown callbacks will be called (in reverse order of registration)
  * when the ExprContext is deleted or rescanned.  This provides a hook
  * for functions called in the context to do any cleanup needed --- it's
  * particularly useful for functions returning sets.  Note that the
  * callback will *not* be called in the event that execution is aborted
  * by an error.
  */
 void
 RegisterExprContextCallback(ExprContext *econtext,
 							ExprContextCallbackFunction function,
 							Datum arg)
 {
 	ExprContext_CB *ecxt_callback;

 	/* Save the info in appropriate memory context */
 	ecxt_callback = (ExprContext_CB *)
 		MemoryContextAlloc(econtext->ecxt_per_query_memory,
 						   sizeof(ExprContext_CB));

 	ecxt_callback->function = function;
 	ecxt_callback->arg = arg;

 	/* link to front of list for appropriate execution order */
 	ecxt_callback->next = econtext->ecxt_callbacks;
 	econtext->ecxt_callbacks = ecxt_callback;
 }

 /*
  * Deregister a shutdown callback in an ExprContext.
  *
  * Any list entries matching the function and arg will be removed.
  * This can be used if it's no longer necessary to call the callback.
  */
 void
 UnregisterExprContextCallback(ExprContext *econtext,
 							  ExprContextCallbackFunction function,
 							  Datum arg)
 {
 	ExprContext_CB **prev_callback;
 	ExprContext_CB *ecxt_callback;

 	prev_callback = &econtext->ecxt_callbacks;

 	while ((ecxt_callback = *prev_callback) != NULL)
 	{
 		if (ecxt_callback->function == function && ecxt_callback->arg == arg)
 		{
 			*prev_callback = ecxt_callback->next;
 			pfree(ecxt_callback);
 		}
 		else
 			prev_callback = &ecxt_callback->next;
 	}
 }

 /*
  * Call all the shutdown callbacks registered in an ExprContext.
  *
  * The callback list is emptied (important in case this is only a rescan
  * reset, and not deletion of the ExprContext).
  *
  * If isCommit is false, just clean the callback list but don't call 'em.
  * (See comment for FreeExprContext.)
  */
 static void
 ShutdownExprContext(ExprContext *econtext, bool isCommit)
 {
 	ExprContext_CB *ecxt_callback;
 	MemoryContext oldcontext;

 	/* Fast path in normal case where there's nothing to do. */
 	if (econtext->ecxt_callbacks == NULL)
 		return;

 	/*
 	 * Call the callbacks in econtext's per-tuple context.  This ensures that
 	 * any memory they might leak will get cleaned up.
 	 */
 	oldcontext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory);

 	/*
 	 * Call each callback function in reverse registration order.
 	 */
 	while ((ecxt_callback = econtext->ecxt_callbacks) != NULL)
 	{
 		econtext->ecxt_callbacks = ecxt_callback->next;
 		if (isCommit)
 			ecxt_callback->function(ecxt_callback->arg);
 		pfree(ecxt_callback);
 	}

 	MemoryContextSwitchTo(oldcontext);
 }

 /*
  *		GetAttributeByName
  *		GetAttributeByNum
  *
  *		These functions return the value of the requested attribute
  *		out of the given tuple Datum.
  *		C functions which take a tuple as an argument are expected
  *		to use these.  Ex: overpaid(EMP) might call GetAttributeByNum().
  *		Note: these are actually rather slow because they do a typcache
  *		lookup on each call.
  */
 Datum
 GetAttributeByName(HeapTupleHeader tuple, const char *attname, bool *isNull)
 {
 	AttrNumber	attrno;
 	Datum		result;
 	Oid			tupType;
 	int32		tupTypmod;
 	TupleDesc	tupDesc;
 	HeapTupleData tmptup;
 	int			i;

 	if (attname == NULL)
 		elog(ERROR, "invalid attribute name");

 	if (isNull == NULL)
 		elog(ERROR, "a NULL isNull pointer was passed");

 	if (tuple == NULL)
 	{
 		/* Kinda bogus but compatible with old behavior... */
 		*isNull = true;
 		return (Datum) 0;
 	}

 	tupType = HeapTupleHeaderGetTypeId(tuple);
 	tupTypmod = HeapTupleHeaderGetTypMod(tuple);
 	tupDesc = lookup_rowtype_tupdesc(tupType, tupTypmod);

 	attrno = InvalidAttrNumber;
 	for (i = 0; i < tupDesc->natts; i++)
 	{
 		Form_pg_attribute att = TupleDescAttr(tupDesc, i);

 		if (namestrcmp(&(att->attname), attname) == 0)
 		{
 			attrno = att->attnum;
 			break;
 		}
 	}

 	if (attrno == InvalidAttrNumber)
 		elog(ERROR, "attribute \"%s\" does not exist", attname);

 	/*
 	 * heap_getattr needs a HeapTuple not a bare HeapTupleHeader.  We set all
 	 * the fields in the struct just in case user tries to inspect system
 	 * columns.
 	 */
 	tmptup.t_len = HeapTupleHeaderGetDatumLength(tuple);
 	ItemPointerSetInvalid(&(tmptup.t_self));
 	tmptup.t_tableOid = InvalidOid;
 	tmptup.t_data = tuple;

 	result = heap_getattr(&tmptup,
 						  attrno,
 						  tupDesc,
 						  isNull);

 	ReleaseTupleDesc(tupDesc);

 	return result;
 }

 Datum
 GetAttributeByNum(HeapTupleHeader tuple,
 				  AttrNumber attrno,
 				  bool *isNull)
 {
 	Datum		result;
 	Oid			tupType;
 	int32		tupTypmod;
 	TupleDesc	tupDesc;
 	HeapTupleData tmptup;

 	if (!AttributeNumberIsValid(attrno))
 		elog(ERROR, "invalid attribute number %d", attrno);

 	if (isNull == NULL)
 		elog(ERROR, "a NULL isNull pointer was passed");

 	if (tuple == NULL)
 	{
 		/* Kinda bogus but compatible with old behavior... */
 		*isNull = true;
 		return (Datum) 0;
 	}

 	tupType = HeapTupleHeaderGetTypeId(tuple);
 	tupTypmod = HeapTupleHeaderGetTypMod(tuple);
 	tupDesc = lookup_rowtype_tupdesc(tupType, tupTypmod);

 	/*
 	 * heap_getattr needs a HeapTuple not a bare HeapTupleHeader.  We set all
 	 * the fields in the struct just in case user tries to inspect system
 	 * columns.
 	 */
 	tmptup.t_len = HeapTupleHeaderGetDatumLength(tuple);
 	ItemPointerSetInvalid(&(tmptup.t_self));
 	tmptup.t_tableOid = InvalidOid;
 	tmptup.t_data = tuple;

 	result = heap_getattr(&tmptup,
 						  attrno,
 						  tupDesc,
 						  isNull);

 	ReleaseTupleDesc(tupDesc);

 	return result;
 }

 /*
  * Number of items in a tlist (including any resjunk items!)
  */
 int
 ExecTargetListLength(List *targetlist)
 {
 	/* This used to be more complex, but fjoins are dead */
 	return list_length(targetlist);
 }

 /*
  * Number of items in a tlist, not including any resjunk items
  */
 int
 ExecCleanTargetListLength(List *targetlist)
 {
 	int			len = 0;
 	ListCell   *tl;

 	foreach(tl, targetlist)
 	{
 		TargetEntry *curTle = lfirst_node(TargetEntry, tl);

 		if (!curTle->resjunk)
 			len++;
 	}
 	return len;
 }

 /*
  * Return a relInfo's tuple slot for a trigger's OLD tuples.
  */
 TupleTableSlot *
 ExecGetTriggerOldSlot(EState *estate, ResultRelInfo *relInfo)
 {
 	if (relInfo->ri_TrigOldSlot == NULL)
 	{
 		Relation	rel = relInfo->ri_RelationDesc;
 		MemoryContext oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);

 		relInfo->ri_TrigOldSlot =
 			ExecInitExtraTupleSlot(estate,
 								   RelationGetDescr(rel),
 								   table_slot_callbacks(rel));

 		MemoryContextSwitchTo(oldcontext);
 	}

 	return relInfo->ri_TrigOldSlot;
 }

 /*
  * Return a relInfo's tuple slot for a trigger's NEW tuples.
  */
 TupleTableSlot *
 ExecGetTriggerNewSlot(EState *estate, ResultRelInfo *relInfo)
 {
 	if (relInfo->ri_TrigNewSlot == NULL)
 	{
 		Relation	rel = relInfo->ri_RelationDesc;
 		MemoryContext oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);

 		relInfo->ri_TrigNewSlot =
 			ExecInitExtraTupleSlot(estate,
 								   RelationGetDescr(rel),
 								   table_slot_callbacks(rel));

 		MemoryContextSwitchTo(oldcontext);
 	}

 	return relInfo->ri_TrigNewSlot;
 }

 /*
  * Return a relInfo's tuple slot for processing returning tuples.
  */
 TupleTableSlot *
 ExecGetReturningSlot(EState *estate, ResultRelInfo *relInfo)
 {
 	if (relInfo->ri_ReturningSlot == NULL)
 	{
 		Relation	rel = relInfo->ri_RelationDesc;
 		MemoryContext oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);

 		relInfo->ri_ReturningSlot =
 			ExecInitExtraTupleSlot(estate,
 								   RelationGetDescr(rel),
 								   table_slot_callbacks(rel));

 		MemoryContextSwitchTo(oldcontext);
 	}

 	return relInfo->ri_ReturningSlot;
 }

 /*
  * Return the map needed to convert given child result relation's tuples to
  * the rowtype of the query's main target ("root") relation.  Note that a
  * NULL result is valid and means that no conversion is needed.
  */
 TupleConversionMap *
 ExecGetChildToRootMap(ResultRelInfo *resultRelInfo)
 {
 	/* If we didn't already do so, compute the map for this child. */
 	if (!resultRelInfo->ri_ChildToRootMapValid)
 	{
 		ResultRelInfo *rootRelInfo = resultRelInfo->ri_RootResultRelInfo;

 		if (rootRelInfo)
 			resultRelInfo->ri_ChildToRootMap =
 				convert_tuples_by_name(RelationGetDescr(resultRelInfo->ri_RelationDesc),
 									   RelationGetDescr(rootRelInfo->ri_RelationDesc));
 		else					/* this isn't a child result rel */
 			resultRelInfo->ri_ChildToRootMap = NULL;

 		resultRelInfo->ri_ChildToRootMapValid = true;
 	}

 	return resultRelInfo->ri_ChildToRootMap;
 }

 /* Return a bitmap representing columns being inserted */
 Bitmapset *
 ExecGetInsertedCols(ResultRelInfo *relinfo, EState *estate)
 {
 	/*
 	 * The columns are stored in the range table entry.  If this ResultRelInfo
 	 * represents a partition routing target, and doesn't have an entry of its
 	 * own in the range table, fetch the parent's RTE and map the columns to
 	 * the order they are in the partition.
 	 */
 	if (relinfo->ri_RangeTableIndex != 0)
 	{
 		RangeTblEntry *rte = exec_rt_fetch(relinfo->ri_RangeTableIndex, estate);

 		return rte->insertedCols;
 	}
 	else if (relinfo->ri_RootResultRelInfo)
 	{
 		ResultRelInfo *rootRelInfo = relinfo->ri_RootResultRelInfo;
 		RangeTblEntry *rte = exec_rt_fetch(rootRelInfo->ri_RangeTableIndex, estate);

 		if (relinfo->ri_RootToPartitionMap != NULL)
 			return execute_attr_map_cols(relinfo->ri_RootToPartitionMap->attrMap,
 										 rte->insertedCols);
 		else
 			return rte->insertedCols;
 	}
 	else
 	{
 		/*
 		 * The relation isn't in the range table and it isn't a partition
 		 * routing target.  This ResultRelInfo must've been created only for
 		 * firing triggers and the relation is not being inserted into.  (See
 		 * ExecGetTriggerResultRel.)
 		 */
 		return NULL;
 	}
 }

 /* Return a bitmap representing columns being updated */
 Bitmapset *
 ExecGetUpdatedCols(ResultRelInfo *relinfo, EState *estate)
 {
 	/* see ExecGetInsertedCols() */
 	if (relinfo->ri_RangeTableIndex != 0)
 	{
 		RangeTblEntry *rte = exec_rt_fetch(relinfo->ri_RangeTableIndex, estate);

 		return rte->updatedCols;
 	}
 	else if (relinfo->ri_RootResultRelInfo)
 	{
 		ResultRelInfo *rootRelInfo = relinfo->ri_RootResultRelInfo;
 		RangeTblEntry *rte = exec_rt_fetch(rootRelInfo->ri_RangeTableIndex, estate);

 		if (relinfo->ri_RootToPartitionMap != NULL)
 			return execute_attr_map_cols(relinfo->ri_RootToPartitionMap->attrMap,
 										 rte->updatedCols);
 		else
 			return rte->updatedCols;
 	}
 	else
 		return NULL;
 }

 /* Return a bitmap representing generated columns being updated */
 Bitmapset *
 ExecGetExtraUpdatedCols(ResultRelInfo *relinfo, EState *estate)
 {
 	/* see ExecGetInsertedCols() */
 	if (relinfo->ri_RangeTableIndex != 0)
 	{
 		RangeTblEntry *rte = exec_rt_fetch(relinfo->ri_RangeTableIndex, estate);

 		return rte->extraUpdatedCols;
 	}
 	else if (relinfo->ri_RootResultRelInfo)
 	{
 		ResultRelInfo *rootRelInfo = relinfo->ri_RootResultRelInfo;
 		RangeTblEntry *rte = exec_rt_fetch(rootRelInfo->ri_RangeTableIndex, estate);

 		if (relinfo->ri_RootToPartitionMap != NULL)
 			return execute_attr_map_cols(relinfo->ri_RootToPartitionMap->attrMap,
 										 rte->extraUpdatedCols);
 		else
 			return rte->extraUpdatedCols;
 	}
 	else
 		return NULL;
 }

 /* Return columns being updated, including generated columns */
 Bitmapset *
 ExecGetAllUpdatedCols(ResultRelInfo *relinfo, EState *estate)
 {
 	return bms_union(ExecGetUpdatedCols(relinfo, estate),
 					 ExecGetExtraUpdatedCols(relinfo, estate));
 }

 /*
  * flatten_logic_exprs
  * This function is only used by ExecPrefetchJoinQual.
  * ExecPrefetchJoinQual need to prefetch subplan in join
  * qual that contains motion to materialize it to avoid
  * motion deadlock. This function is going to flatten
  * the bool exprs to avoid shortcut of bool logic.
  * An example is:
  * (a and b or c) or (d or e and f or g) and (h and i or j)
  * will be transformed to
  * (a, b, c, d, e, f, g, h, i, j).
  */
 static List *
 flatten_logic_exprs(Node *node)
 {
 	if (node == NULL)
 		return NIL;

 	if (IsA(node, BoolExpr))
 	{
 		BoolExpr *be = (BoolExpr *) node;
 		return flatten_logic_exprs((Node *) (be->args));
 	}

 	if (IsA(node, List))
 	{
 		List     *es = (List *) node;
 		List     *result = NIL;
 		ListCell *lc = NULL;

 		foreach(lc, es)
 		{
 			Node *n = (Node *) lfirst(lc);
 			result = list_concat(result,
 								 flatten_logic_exprs(n));
 		}

 		return result;
 	}

 	return list_make1(node);
 }

 /*
  * fake_outer_params
  *   helper function to fake the nestloop's nestParams
  *   so that prefetch inner or prefetch joinqual will
  *   not encounter NULL pointer reference issue. It is
  *   only invoked in ExecNestLoop and ExecPrefetchJoinQual
  *   when the join is a nestloop join.
  */
 void
 fake_outer_params(JoinState *node)
 {
 	ExprContext    *econtext = node->ps.ps_ExprContext;
 	PlanState      *inner = innerPlanState(node);
 	TupleTableSlot *outerTupleSlot = econtext->ecxt_outertuple;
 	NestLoop       *nl = (NestLoop *) (node->ps.plan);
 	ListCell       *lc = NULL;

 	/* only nestloop contains nestParams */
 	Assert(IsA(node->ps.plan, NestLoop));

 	/* econtext->ecxt_outertuple must have been set fakely. */
 	Assert(outerTupleSlot != NULL);
 	/*
 	 * fetch the values of any outer Vars that must be passed to the
 	 * inner scan, and store them in the appropriate PARAM_EXEC slots.
 	 */
 	foreach(lc, nl->nestParams)
 	{
 		NestLoopParam *nlp = (NestLoopParam *) lfirst(lc);
 		int			paramno = nlp->paramno;
 		ParamExecData *prm;

 		prm = &(econtext->ecxt_param_exec_vals[paramno]);
 		/* Param value should be an OUTER_VAR var */
 		Assert(IsA(nlp->paramval, Var));
 		Assert(nlp->paramval->varno == OUTER_VAR);
 		Assert(nlp->paramval->varattno > 0);
 		prm->value = slot_getattr(outerTupleSlot,
 								  nlp->paramval->varattno,
 								  &(prm->isnull));
 		/* Flag parameter value as changed */
 		inner->chgParam = bms_add_member(inner->chgParam,
 										 paramno);
 	}
 }

 /*
  * Prefetch JoinQual or NonJoinQual to prevent motion hazard.
  *
  * A motion hazard is a deadlock between motions, a classic motion hazard in a
  * join executor is formed by its inner and outer motions, it can be prevented
  * by prefetching the inner plan, refer to motion_sanity_check() for details.
  *
  * A similar motion hazard can be formed by the outer motion and the join qual
  * motion(or non join qual motion).  A join executor fetches a outer tuple,
  * filters it with the qual, then repeat the process on all the outer tuples.
  * When there are motions in both outer plan and the join qual then below state
  * is possible:
  *
  * 0. processes A and B belong to the join slice, process C belongs to the
  *    outer slice, process D belongs to the JoinQual(NonJoinQual) slice;
  * 1. A has read the first outer tuple and is fetching tuples from D;
  * 2. D is waiting for ACK from B;
  * 3. B is fetching the first outer tuple from C;
  * 4. C is waiting for ACK from A;
  *
  * So a deadlock is formed A->D->B->C->A.  We can prevent it also by
  * prefetching the join qual or non join qual
  *
  * An example is demonstrated and explained in test case
  * src/test/regress/sql/deadlock2.sql.
  *
  * Return true if the JoinQual or NonJoinQual is prefetched.
  */
 void
 ExecPrefetchQual(JoinState *node, bool isJoinQual)
 {
 	EState	   *estate = node->ps.state;
 	ExprContext *econtext = node->ps.ps_ExprContext;
 	PlanState  *inner = innerPlanState(node);
 	PlanState  *outer = outerPlanState(node);
 	TupleTableSlot *innertuple = econtext->ecxt_innertuple;

 	ListCell   *lc = NULL;
 	List       *quals = NIL;

 	ExprState  *qual;

 	if (isJoinQual)
 		qual = node->joinqual;
 	else
 		qual = node->ps.qual;

 	Assert(qual);

 	/* Outer tuples should not be fetched before us */
 	Assert(econtext->ecxt_outertuple == NULL);

 	/* Build fake inner & outer tuples */
 	econtext->ecxt_innertuple = ExecInitNullTupleSlot(estate,
 													  ExecGetResultType(inner),
 													  &TTSOpsVirtual);
 	econtext->ecxt_outertuple = ExecInitNullTupleSlot(estate,
 													  ExecGetResultType(outer),
 													  &TTSOpsVirtual);

 	if (IsA(node->ps.plan, NestLoop))
 	{
 		NestLoop *nl = (NestLoop *) (node->ps.plan);
 		if (nl->nestParams)
 			fake_outer_params(node);
 	}

 	quals = flatten_logic_exprs((Node *) qual);

 	/* Fetch subplan with the fake inner & outer tuples */
 	foreach(lc, quals)
 	{
 		/*
 		 * Force every qual is prefech because
 		 * our target is to materialize motion node.
 		 */
 		ExprState  *clause = (ExprState *) lfirst(lc);
 		(void) ExecQual(clause, econtext);
 	}

 	/* Restore previous state */
 	econtext->ecxt_innertuple = innertuple;
 	econtext->ecxt_outertuple = NULL;
 }

 /* ----------------------------------------------------------------
  *		CDB Slice Table utilities
  * ----------------------------------------------------------------
  */


 static void
 FillSliceGangInfo(ExecSlice *slice, PlanSlice *ps)
 {
 	int factor = ps->parallel_workers ? ps->parallel_workers : 1;
 	int numsegments = ps->numsegments;

 	DirectDispatchInfo *dd = &ps->directDispatch;

 	slice->useMppParallelMode = (ps->parallel_workers != 0);
 	slice->parallel_workers = factor;
 	switch (slice->gangType)
 	{
 		case GANGTYPE_UNALLOCATED:
 			/*
 			 * It's either the root slice or an InitPlan slice that runs in
 			 * the QD process, or really unused slice.
 			 */
 			/* CBDB_PARALLEL_FIXME: QD process should never be parallel, do we need to plus factor? */
 			slice->planNumSegments = 1;
 			break;
 		case GANGTYPE_PRIMARY_WRITER:
 		case GANGTYPE_PRIMARY_READER:
 			slice->planNumSegments = numsegments * factor;
 			if (dd->isDirectDispatch)
 			{
 				int i;
 				ListCell *lc;

 				foreach(lc, dd->contentIds)
 				{
 					int segment = lfirst_int(lc);
 					for (i = 0; i < factor; i++)
 						slice->segments = lappend_int(slice->segments, segment);
 				}
 			}
 			else
 			{
 				int i, j;
 				slice->segments = NIL;
 				for (i = 0; i < numsegments; i++)
 					for (j = 0; j < factor; j++)
 						slice->segments = lappend_int(slice->segments, i % getgpsegmentCount());
 			}
 			break;
 		case GANGTYPE_ENTRYDB_READER:
 			/* CBDB_PARALLEL_FIXME: QD parallel is disabled */
 			slice->planNumSegments = 1;
 			slice->segments = list_make1_int(-1);
 			break;
 		case GANGTYPE_SINGLETON_READER:
 			/*
 			 * CBDB_PARALLEL_FIXME:
 			 * Could be parallel, parallel scan on replica tables.
 			 */
 			slice->planNumSegments = 1 * factor;
 			int i;
 			for (i = 0; i < factor; i++)
 				slice->segments = lappend_int(slice->segments, ps->segindex);
 			break;
 		default:
 			elog(ERROR, "unexpected gang type");
 	}
 }

 /*
  * Create the executor slice table.
  *
  * The planner constructed a slice table, in plannedstmt->slices. Turn that
  * into an "executor slice table", with slightly more information. The gangs
  * to execute the slices will be set up later.
  */
 SliceTable *
 InitSliceTable(EState *estate, PlannedStmt *plannedstmt)
 {
 	SliceTable *table;
 	int			i;
 	int			numSlices;
 	MemoryContext oldcontext;

 	numSlices = plannedstmt->numSlices;
 	Assert(numSlices > 0);

 	if (gp_max_slices > 0 && numSlices > gp_max_slices)
 		ereport(ERROR,
 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
 				 errmsg("at most %d slices are allowed in a query, current number: %d",
 						gp_max_slices, numSlices),
 				 errhint("rewrite your query or adjust GUC gp_max_slices")));

 	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);

 	table = makeNode(SliceTable);
 	table->instrument_options = INSTRUMENT_NONE;
 	table->hasMotions = false;

 	/*
 	 * Initialize the executor slice table.
 	 *
 	 * We have most of the information in the planner slice table. In addition to that,
 	 * we set up the parent-child relationships.
 	 */
 	table->slices = palloc0(sizeof(ExecSlice) * numSlices);
 	for (i = 0; i < numSlices; i++)
 	{
 		ExecSlice  *currExecSlice = &table->slices[i];
 		PlanSlice  *currPlanSlice = &plannedstmt->slices[i];
 		int			parentIndex;
 		int			rootIndex;

 		currExecSlice->sliceIndex = i;
 		currExecSlice->planNumSegments = currPlanSlice->numsegments;
 		currExecSlice->segments = NIL;
 		currExecSlice->primaryGang = NULL;
 		currExecSlice->primaryProcesses = NIL;

 		parentIndex = currPlanSlice->parentIndex;
 		if (parentIndex < -1 || parentIndex >= numSlices)
 			elog(ERROR, "invalid parent slice index %d", currPlanSlice->parentIndex);

 		if (parentIndex >= 0)
 		{
 			ExecSlice *parentExecSlice = &table->slices[parentIndex];
 			int			counter;

 			/* Sending slice is a child of recv slice */
 			parentExecSlice->children = lappend_int(parentExecSlice->children, currPlanSlice->sliceIndex);

 			/* Find the root slice */
 			rootIndex = i;
 			counter = 0;
 			while (plannedstmt->slices[rootIndex].parentIndex >= 0)
 			{
 				rootIndex = plannedstmt->slices[rootIndex].parentIndex;

 				if (counter++ > numSlices)
 					elog(ERROR, "circular parent-child relationship in slice table");
 			}
 			table->hasMotions = true;
 		}
 		else
 			rootIndex = i;

 		/* find root of this slice. All the parents should be initialized already */

 		currExecSlice->parentIndex = parentIndex;
 		currExecSlice->rootIndex = rootIndex;
 		currExecSlice->gangType = currPlanSlice->gangType;

 		FillSliceGangInfo(currExecSlice, currPlanSlice);
 	}
 	table->numSlices = numSlices;

 	/*
 	 * For CTAS although the data is distributed on part of the
 	 * segments, the catalog changes must be dispatched to all the
 	 * segments, so a full gang is required.
 	 */
 	if ((plannedstmt->intoClause != NULL || plannedstmt->copyIntoClause != NULL || plannedstmt->refreshClause))
 	{
 		if (table->slices[0].gangType == GANGTYPE_PRIMARY_WRITER)
 		{
 			int			numsegments = getgpsegmentCount();

 			table->slices[0].segments = NIL;
 			for (i = 0; i < numsegments; i++)
 				table->slices[0].segments = lappend_int(table->slices[0].segments, i);
 		}
 	}

 	MemoryContextSwitchTo(oldcontext);

 	return table;
 }

 /*
  * A forgiving slice table indexer that returns the indexed Slice* or NULL
  */
 ExecSlice *
 getCurrentSlice(EState *estate, int sliceIndex)
 {
 	SliceTable *sliceTable = estate->es_sliceTable;

     if (sliceTable &&
 		sliceIndex >= 0 &&
 		sliceIndex < sliceTable->numSlices)
 		return &sliceTable->slices[sliceIndex];

     return NULL;
 }

 /* Should the slice run on the QD?
  *
  * N.B. Not the same as !sliceRunsOnQE(slice), when slice is NULL.
  */
 bool
 sliceRunsOnQD(ExecSlice *slice)
 {
 	return (slice != NULL && slice->gangType == GANGTYPE_UNALLOCATED);
 }


 /* Should the slice run on a QE?
  *
  * N.B. Not the same as !sliceRunsOnQD(slice), when slice is NULL.
  */
 bool
 sliceRunsOnQE(ExecSlice *slice)
 {
 	return (slice != NULL && slice->gangType != GANGTYPE_UNALLOCATED);
 }

 /* Forward declarations */
 static bool AssignWriterGangFirst(CdbDispatcherState *ds, SliceTable *sliceTable, int sliceIndex);
 static void InventorySliceTree(CdbDispatcherState *ds, SliceTable *sliceTable, int sliceIndex);

 /*
  * Function AssignGangs runs on the QD and finishes construction of the
  * global slice table for a plan by assigning gangs allocated by the
  * executor factory to the slices of the slice table.
  *
  * On entry, the executor slice table (at queryDesc->estate->es_sliceTable)
  * has been initialized and has correct (by InitSliceTable function)
  *
  * Gang assignment involves taking an inventory of the requirements of
  * each slice tree in the slice table, asking the executor factory to
  * allocate a minimal set of gangs that can satisfy any of the slice trees,
  * and associating the allocated gangs with slices in the slice table.
  *
  * On successful exit, the CDBProcess lists (primaryProcesses, mirrorProcesses)
  * and the Gang pointers (primaryGang, mirrorGang) are set correctly in each
  * slice in the slice table.
  */
 void
 AssignGangs(CdbDispatcherState *ds, QueryDesc *queryDesc)
 {
 	SliceTable	*sliceTable;
 	EState		*estate;
 	int			rootIdx;

 	estate = queryDesc->estate;
 	sliceTable = estate->es_sliceTable;
 	rootIdx = RootSliceIndex(queryDesc->estate);

 	/* cleanup processMap because initPlan and main Plan share the same slice table */
 	for (int i = 0; i < sliceTable->numSlices; i++)
 		sliceTable->slices[i].processesMap = NULL;

 	AssignWriterGangFirst(ds, sliceTable, rootIdx);
 	InventorySliceTree(ds, sliceTable, rootIdx);
 }

 /*
  * AssignWriterGangFirst() - Try to assign writer gang first.
  *
  * For the gang allocation, our current implementation required the first
  * allocated gang must be the writer gang.
  * This has several reasons:
  * - For lock holding, Because of our MPP structure, we assign a LockHolder
  *   for each segment when executing a query. lockHolder is the gang member that
  *   should hold and manage locks for this transaction. On the QEs, it should
  *   normally be the Writer gang member. More details please refer to
  *   lockHolderProcPtr in lock.c.
  * - For SharedSnapshot among session's gang processes on a particular segment.
  *   During initPostgres(), reader QE will try to lookup the shared slot written
  *   by writer QE. More details please reger to sharedsnapshot.c.
  *
  * Normally, the writer slice will be assign writer gang first when iterate the
  * slice table. But this is not true for writable CTE (with only one writer gang).
  * For below statement:
  *
  * WITH updated AS (update tbl set rank = 6 where id = 5 returning rank)
  * select * from tbl where rank in (select rank from updated);
  *                                           QUERY PLAN
  * ----------------------------------------------------------------------------------------------
  *  Gather Motion 3:1  (slice1; segments: 3)
  *    ->  Seq Scan on tbl
  *          Filter: (hashed SubPlan 1)
  *          SubPlan 1
  *            ->  Broadcast Motion 1:3  (slice2; segments: 1)
  *                  ->  Update on tbl
  *                        ->  Seq Scan on tbl
  *                              Filter: (id = 5)
  *  Slice 0: Dispatcher; root 0; parent -1; gang size 0
  *  Slice 1: Reader; root 0; parent 0; gang size 3
  *  Slice 2: Primary Writer; root 0; parent 1; gang size 1
  *
  * If we sill assign writer gang to Slice 1 here, the writer process will execute
  * on reader gang. So, find the writer slice and assign writer gang first.
  */
 static bool
 AssignWriterGangFirst(CdbDispatcherState *ds, SliceTable *sliceTable, int sliceIndex)
 {
 	ExecSlice	   *slice = &sliceTable->slices[sliceIndex];

 	if (slice->gangType == GANGTYPE_PRIMARY_WRITER)
 	{
 		Assert(slice->primaryGang == NULL);
 		Assert(slice->segments != NIL);
 		slice->primaryGang = AllocateGang(ds, slice->gangType, slice->segments);
 		setupCdbProcessList(slice);
 		return true;
 	}
 	else
 	{
 		ListCell *cell;
 		foreach(cell, slice->children)
 		{
 			int			childIndex = lfirst_int(cell);
 			if (AssignWriterGangFirst(ds, sliceTable, childIndex))
 				return true;
 		}
 	}
 	return false;
 }

 /*
  * Helper for AssignGangs takes a simple inventory of the gangs required
  * by a slice tree.  Recursive.  Closely coupled with AssignGangs.	Not
  * generally useful.
  */
 static void
 InventorySliceTree(CdbDispatcherState *ds, SliceTable *sliceTable, int sliceIndex)
 {
 	ExecSlice *slice = &sliceTable->slices[sliceIndex];
 	ListCell *cell;

 	if (slice->gangType == GANGTYPE_UNALLOCATED)
 	{
 		slice->primaryGang = NULL;
 		slice->primaryProcesses = getCdbProcessesForQD(true);
 	}
 	else if (!slice->primaryGang)
 	{
 		Assert(slice->segments != NIL);
 		slice->primaryGang = AllocateGang(ds, slice->gangType, slice->segments);
 		setupCdbProcessList(slice);
 	}

 	foreach(cell, slice->children)
 	{
 		int			childIndex = lfirst_int(cell);

 		InventorySliceTree(ds, sliceTable, childIndex);
 	}
 }

 /*
  * Choose the execution identity (who does this executor serve?).
  * There are types:
  *
  * 1. No-Op (ignore) -- this occurs when the specified direction is
  *	 NoMovementScanDirection or when Gp_role is GP_ROLE_DISPATCH
  *	 and the current slice belongs to a QE.
  *
  * 2. Executor serves a Root Slice -- this occurs when Gp_role is
  *   GP_ROLE_UTILITY or the current slice is a root.  It corresponds
  *   to the "normal" path through the executor in that we enter the plan
  *   at the top and count on the motion nodes at the fringe of the top
  *   slice to return without ever calling nodes below them.
  *
  * 3. Executor serves a Non-Root Slice on a QE -- this occurs when
  *   Gp_role is GP_ROLE_EXECUTE and the current slice is not a root
  *   slice. It corresponds to a QE running a slice with a motion node on
  *	 top.  The call, thus, returns no tuples (since they all go out
  *	 on the interconnect to the receiver version of the motion node),
  *	 but it does execute the indicated slice down to any fringe
  *	 motion nodes (as in case 2).
  */
 GpExecIdentity
 getGpExecIdentity(QueryDesc *queryDesc,
 				  ScanDirection direction,
 				  EState	   *estate)
 {
 	ExecSlice *currentSlice;

 	currentSlice = getCurrentSlice(estate, LocallyExecutingSliceIndex(estate));
 	if (currentSlice)
     {
         if (Gp_role == GP_ROLE_EXECUTE ||
             sliceRunsOnQD(currentSlice))
             currentSliceId = currentSlice->sliceIndex;
     }

 	/* select the strategy */
 	if (direction == NoMovementScanDirection)
 	{
 		return GP_IGNORE;
 	}
 	else if (Gp_role == GP_ROLE_DISPATCH && sliceRunsOnQE(currentSlice))
 	{
 		return GP_IGNORE;
 	}
 	else if (Gp_role == GP_ROLE_EXECUTE && LocallyExecutingSliceIndex(estate) != RootSliceIndex(estate))
 	{
 		return GP_NON_ROOT_ON_QE;
 	}
 	else
 	{
 		return GP_ROOT_SLICE;
 	}
 }

 /*
  * End the gp-specific part of the executor.
  *
  * In here we collect the dispatch results if there are any, tear
  * down the interconnect if it is set-up.
  */
 void mppExecutorFinishup(QueryDesc *queryDesc)
 {
 	EState	   *estate;
 	ExecSlice  *currentSlice;
 	int			primaryWriterSliceIndex;

 	/* caller must have switched into per-query memory context already */
 	estate = queryDesc->estate;

 	currentSlice = getCurrentSlice(estate, LocallyExecutingSliceIndex(estate));
 	primaryWriterSliceIndex = PrimaryWriterSliceIndex(estate);

 	/* Teardown the Interconnect */
 	if (estate->es_interconnect_is_setup)
 	{
 		Assert(CurrentMotionIPCLayer);
 		CurrentMotionIPCLayer->TeardownInterconnect(estate->interconnect_context, false);
 		estate->interconnect_context = NULL;
 		estate->es_interconnect_is_setup = false;
 	}
 	/*
 	 * If we are finishing a query before all the tuples of the query
 	 * plan were fetched we must call ExecSquelchNode before checking
 	 * the dispatch results in order to tell we no longer
 	 * need any more tuples.
 	 */
 	if (Gp_role == GP_ROLE_DISPATCH && !estate->es_got_eos &&
 		!(estate->es_top_eflags & EXEC_FLAG_EXPLAIN_ONLY))
 	{
 		ExecSquelchNode(queryDesc->planstate, true);
 	}
 	/*
 	 * If QD, wait for QEs to finish and check their results.
 	 */
 	if (estate->dispatcherState && estate->dispatcherState->primaryResults)
 	{
 		CdbDispatchResults *pr = NULL;
 		CdbDispatcherState *ds = estate->dispatcherState;
 		DispatchWaitMode waitMode = DISPATCH_WAIT_NONE;
 		ErrorData *qeError = NULL;

 		/*
 		 * Wait for completion of all QEs.  We send a "graceful" query
 		 * finish, not cancel signal.  Since the query has succeeded,
 		 * don't confuse QEs by sending erroneous message.
 		 */
 		if (estate->cancelUnfinished)
 			waitMode = DISPATCH_WAIT_FINISH;

 		cdbdisp_checkDispatchResult(ds, waitMode);

 		pr = cdbdisp_getDispatchResults(ds, &qeError);

 		if (qeError)
 		{
 			estate->dispatcherState = NULL;
 			FlushErrorState();
 			ThrowErrorData(qeError);
 		}

 		if (ProcessDispatchResult_hook)
 			ProcessDispatchResult_hook(ds);

 		/* collect pgstat from QEs for current transaction level */
 		pgstat_combine_from_qe(pr, primaryWriterSliceIndex);

 		/* get num of rows processed from writer QEs. */
 		estate->es_processed +=
 			cdbdisp_sumCmdTuples(pr, primaryWriterSliceIndex);

 		/* sum up rejected rows if any (single row error handling only) */
 		cdbdisp_sumRejectedRows(pr);

 		/*
 		 * Check and free the results of all gangs. If any QE had an
 		 * error, report it and exit to our error handler via PG_THROW.
 		 * NB: This call doesn't wait, because we already waited above.
 		 */
 		estate->dispatcherState = NULL;
 		cdbdisp_destroyDispatcherState(ds);
 	}
 }

 /*
  * QD wait for QEs to finish and check their results.
  */
 uint64 mppExecutorWait(QueryDesc *queryDesc)
 {
 	EState	   *estate;
 	ExecSlice  *currentSlice;
 	int 		primaryWriterSliceIndex;
 	uint64		es_processed = 0;

 	/* caller must have switched into per-query memory context already */
 	estate = queryDesc->estate;

 	currentSlice = getCurrentSlice(estate, LocallyExecutingSliceIndex(estate));
 	primaryWriterSliceIndex = PrimaryWriterSliceIndex(estate);

 	/*
 	 * If QD, wait for QEs to finish and check their results.
 	 */
 	if (estate->dispatcherState && estate->dispatcherState->primaryResults)
 	{
 		CdbDispatchResults *pr = NULL;
 		CdbDispatcherState *ds = estate->dispatcherState;
 		DispatchWaitMode waitMode = DISPATCH_WAIT_NONE;
 		ErrorData *qeError = NULL;

 		/*
 		 * If we are finishing a query before all the tuples of the query
 		 * plan were fetched we must call ExecSquelchNode before checking
 		 * the dispatch results in order to tell the nodes below we no longer
 		 * need any more tuples.
 		 */
 		if (!estate->es_got_eos)
 		{
 			ExecSquelchNode(queryDesc->planstate, true);
 		}

 		/*
 		 * Wait for completion of all QEs.  We send a "graceful" query
 		 * finish, not cancel signal.  Since the query has succeeded,
 		 * don't confuse QEs by sending erroneous message.
 		 */
 		if (estate->cancelUnfinished)
 			waitMode = DISPATCH_WAIT_FINISH;

 		cdbdisp_checkDispatchResult(ds, waitMode);

 		pr = cdbdisp_getDispatchResults(ds, &qeError);

 		if (qeError)
 		{
 			FlushErrorState();
 			ReThrowError(qeError);
 		}

 		if (ProcessDispatchResult_hook)
 			ProcessDispatchResult_hook(ds);

 		/* collect pgstat from QEs for current transaction level */
 		pgstat_combine_from_qe(pr, primaryWriterSliceIndex);

 		if (queryDesc->planstate->instrument && queryDesc->planstate->instrument->need_cdb)
 		{
 			cdbexplain_recvExecStats(queryDesc->planstate, ds->primaryResults,
 										LocallyExecutingSliceIndex(queryDesc->estate),
 										estate->showstatctx);
 		}
 		/* get num of rows processed from writer QEs. */
 		es_processed +=
 			cdbdisp_sumCmdTuples(pr, primaryWriterSliceIndex);

 		/* sum up rejected rows if any (single row error handling only) */
 		cdbdisp_sumRejectedRows(pr);

 		/*
 		 * Check and free the results of all gangs. If any QE had an
 		 * error, report it and exit to our error handler via PG_THROW.
 		 * NB: This call doesn't wait, because we already waited above.
 		 */
 		estate->dispatcherState = NULL;
 		cdbdisp_destroyDispatcherState(ds);
 	}

 	return es_processed;
 }

 /*
  * Cleanup the gp-specific parts of the query executor.
  *
  * Will normally be called after an error from within a CATCH block.
  */
 void mppExecutorCleanup(QueryDesc *queryDesc)
 {
 	CdbDispatcherState *ds;
 	EState	   *estate;

 	/* caller must have switched into per-query memory context already */
 	estate = queryDesc->estate;
 	ds = estate->dispatcherState;

 	/* GPDB hook for collecting query info */
 	if (query_info_collect_hook && QueryCancelCleanup)
 		(*query_info_collect_hook)(METRICS_QUERY_CANCELING, queryDesc);
 	/* Clean up the interconnect. */
 	if (estate->es_interconnect_is_setup)
 	{
 		Assert(CurrentMotionIPCLayer);
 		CurrentMotionIPCLayer->TeardownInterconnect(estate->interconnect_context, true);
 		estate->es_interconnect_is_setup = false;
 	}

 	/*
 	 * Request any commands still executing on qExecs to stop.
 	 * Wait for them to finish and clean up the dispatching structures.
 	 * Replace current error info with QE error info if more interesting.
 	 */
 	if (ds)
 	{
 		estate->dispatcherState = NULL;
 		CdbDispatchHandleError(ds);
 	}

 	/* GPDB hook for collecting query info */
 	if (query_info_collect_hook)
 		(*query_info_collect_hook)(QueryCancelCleanup ? METRICS_QUERY_CANCELED : METRICS_QUERY_ERROR, queryDesc);

 	ReportOOMConsumption();

 	/**
 	 * Since there was an error, clean up the function scan stack.
 	 */
 	if (!IsResManagerMemoryPolicyNone())
 	{
 		SPI_InitMemoryReservation();
 	}
 }

 /**
  * This method is used to determine how much memory a specific operator
  * is supposed to use (in KB).
  */
 uint64 PlanStateOperatorMemKB(const PlanState *ps)
 {
 	Assert(ps);
 	Assert(ps->plan);
 	uint64 result = 0;
 	if (ps->plan->operatorMemKB == 0)
 	{
 		/**
 		 * There are some statements that do not go through the resource queue and these
 		 * plans dont get decorated with the operatorMemKB. Someday, we should fix resource queues.
 		 */
 		result = work_mem;
 	}
 	else
 	{
 		result = ps->plan->operatorMemKB;
 	}

 	return result;
 }

 /**
  * Methods to find motionstate object within a planstate tree given a motion id (which is the same as slice index)
  */
 typedef struct MotionStateFinderContext
 {
 	int motionId; /* Input */
 	MotionState *motionState; /* Output */
 } MotionStateFinderContext;

 /**
  * Walker method that finds motion state node within a planstate tree.
  */
 static CdbVisitOpt
 MotionStateFinderWalker(PlanState *node,
 				  void *context)
 {
 	Assert(context);
 	MotionStateFinderContext *ctx = (MotionStateFinderContext *) context;

 	if (IsA(node, MotionState))
 	{
 		MotionState *ms = (MotionState *) node;
 		Motion *m = (Motion *) ms->ps.plan;
 		if (m->motionID == ctx->motionId)
 		{
 			Assert(ctx->motionState == NULL);
 			ctx->motionState = ms;
 			return CdbVisit_Success;	/* don't visit subtree */
 		}
 	}

 	/* Continue walking */
 	return CdbVisit_Walk;
 }

 /**
  * Given a slice index, find the motionstate that corresponds to this slice index. This will iterate over the planstate tree
  * to get the right node.
  */
 MotionState *
 getMotionState(struct PlanState *ps, int sliceIndex)
 {
 	Assert(ps);
 	Assert(sliceIndex > -1);

 	MotionStateFinderContext ctx;
 	ctx.motionId = sliceIndex;
 	ctx.motionState = NULL;
 	planstate_walk_node(ps, MotionStateFinderWalker, &ctx);

 	if (ctx.motionState == NULL)
 		elog(ERROR, "could not find MotionState for slice %d in executor tree", sliceIndex);

 	return ctx.motionState;
 }

 typedef struct MotionFinderContext
 {
 	plan_tree_base_prefix base; /* Required prefix for plan_tree_walker/mutator */
 	int motionId; /* Input */
 	Motion *motion; /* Output */
 } MotionFinderContext;

 /*
  * Walker to find a motion node that matches a particular motionID
  */
 static bool
 MotionFinderWalker(Plan *node,
 				  void *context)
 {
 	Assert(context);
 	MotionFinderContext *ctx = (MotionFinderContext *) context;


 	if (node == NULL)
 		return false;

 	if (IsA(node, Motion))
 	{
 		Motion *m = (Motion *) node;
 		if (m->motionID == ctx->motionId)
 		{
 			ctx->motion = m;
 			return true;	/* found our node; no more visit */
 		}
 	}

 	/* Continue walking */
 	return plan_tree_walker((Node*)node, MotionFinderWalker, ctx, true);
 }

 /*
  * Given the Plan and a Slice index, find the motion node that is the root of the slice's subtree.
  */
 Motion *findSenderMotion(PlannedStmt *plannedstmt, int sliceIndex)
 {
 	Assert(sliceIndex > -1);

 	Plan *planTree = plannedstmt->planTree;
 	MotionFinderContext ctx;
 	ctx.base.node = (Node*)plannedstmt;
 	ctx.motionId = sliceIndex;
 	ctx.motion = NULL;
 	MotionFinderWalker(planTree, &ctx);
 	return ctx.motion;
 }

 typedef struct SubPlanFinderContext
 {
 	plan_tree_base_prefix base; /* Required prefix for plan_tree_walker/mutator */
 	Bitmapset *bms_subplans; /* Bitmapset for relevant subplans in current slice */
 } SubPlanFinderContext;

 /*
  * Walker to find all the subplans in a PlanTree between 'node' and the next motion node
  */
 static bool
 SubPlanFinderWalker(Node *node, void *context)
 {
 	SubPlanFinderContext *ctx = (SubPlanFinderContext *) context;

 	if (node == NULL)
 		return false;

 	/* don't recurse into other slices */
 	if (IsA(node, Motion))
 		return false;

 	if (IsA(node, SubPlan))
 	{
 		SubPlan *subplan = (SubPlan *) node;
 		int		plan_id = subplan->plan_id;

 		if (!bms_is_member(plan_id, ctx->bms_subplans))
 			ctx->bms_subplans = bms_add_member(ctx->bms_subplans, plan_id);
 		else
 			return false;
 	}

 	/* Continue walking */
 	return plan_tree_walker(node, SubPlanFinderWalker, ctx, true);
 }

 /*
  * Given a plan and a root motion node find all the subplans
  * between 'root' and the next motion node in the tree
  */
 Bitmapset *
 getLocallyExecutableSubplans(PlannedStmt *plannedstmt, Plan *root_plan)
 {
 	SubPlanFinderContext ctx;

 	ctx.base.node = (Node *) plannedstmt;
 	ctx.bms_subplans = NULL;

 	/*
 	 * Note that we begin with plan_tree_walker(root_plan), not
 	 * SubPlanFinderWalker(root_plan). SubPlanFinderWalker() will stop
 	 * at a Motion, but a slice typically has a Motion at the top. We want
 	 * to recurse into the children of the top Motion, as well as any
 	 * initPlans, targetlist, and other fields on the Motion itself. They
 	 * are all considered part of the sending slice.
 	 */
 	(void) plan_tree_walker((Node *) root_plan, SubPlanFinderWalker, &ctx, true);

 	return ctx.bms_subplans;
 }

 /*
  * Copy PARAM_EXEC parameter values that were received from the QD into
  * our EState.
  */
 void
 InstallDispatchedExecParams(QueryDispatchDesc *ddesc, EState *estate)
 {
 	Assert(Gp_role == GP_ROLE_EXECUTE);

 	if (ddesc->paramInfo == NULL)
 		return;

 	for (int i = 0; i < ddesc->paramInfo->nExecParams; i++)
 	{
 		SerializedParamExecData *sprm = &ddesc->paramInfo->execParams[i];
 		ParamExecData *prmExec = &estate->es_param_exec_vals[i];

 		if (!sprm->isvalid)
 			continue;	/* not dispatched */

 		prmExec->execPlan = NULL;
 		prmExec->value = sprm->value;
 		prmExec->isnull = sprm->isnull;
 	}
 }

 /**
  * Provide index of locally executing slice
  */
 int LocallyExecutingSliceIndex(EState *estate)
 {
 	Assert(estate);
 	return (!estate->es_sliceTable ? 0 : estate->es_sliceTable->localSlice);
 }

 /**
  * Provide index of slice being executed on the primary writer gang
  */
 int
 PrimaryWriterSliceIndex(EState *estate)
 {
 	if (!estate->es_sliceTable)
 		return 0;

 	for (int i = 0; i < estate->es_sliceTable->numSlices; i++)
 	{
 		ExecSlice  *slice = &estate->es_sliceTable->slices[i];

 		if (slice->gangType == GANGTYPE_PRIMARY_WRITER)
 			return slice->sliceIndex;
 	}

 	return 0;
 }

 /**
  * Provide root slice of locally executing slice.
  */
 int
 RootSliceIndex(EState *estate)
 {
 	int			result = 0;

 	if (estate->es_sliceTable)
 	{
 		ExecSlice *localSlice = &estate->es_sliceTable->slices[LocallyExecutingSliceIndex(estate)];

 		result = localSlice->rootIndex;

 		Assert(result >= 0 && estate->es_sliceTable->numSlices);
 	}

 	return result;
 }


 #ifdef USE_ASSERT_CHECKING
 /**
  * Assert that slicetable is valid. Must be called after ExecInitMotion, which sets up the slice table
  */
 void AssertSliceTableIsValid(SliceTable *st)
 {
 	if (!st)
 		return;

 	for (int i = 0; i < st->numSlices; i++)
 	{
 		ExecSlice *s = &st->slices[i];

 		/* The n-th slice entry has sliceIndex of n */
 		Assert(s->sliceIndex == i && "slice index incorrect");

 		/*
 		 * FIXME: Sometimes the planner produces a plan with unused SubPlans, which
 		 * might contain Motion nodes. We remove unused SubPlans as part cdbllize(), but
 		 * there is a scenario with Append nodes where they still occur.
 		 * adjust_appendrel_attrs() makes copies of any SubPlans it encounters, which
 		 * happens early in the planning, leaving any SubPlans in target list of the
 		 * Append node to point to the original plan_id. The scan in cdbllize() doesn't
 		 * eliminate such SubPlans. But set_plan_references() will replace any SubPlans
 		 * in the Append's targetlist with references to the outputs of the child nodes,
 		 * leaving the original SubPlan unused.
 		 *
 		 * For now, just tolerate unused slices.
 		 */
 		if (s->rootIndex == -1 && s->parentIndex == -1 && s->gangType == GANGTYPE_UNALLOCATED)
 			continue;

 		/* Parent slice index */
 		if (s->sliceIndex == s->rootIndex)
 		{
 			/* Current slice is a root slice. It will have parent index -1.*/
 			Assert(s->parentIndex == -1 && "expecting parent index of -1");
 		}
 		else
 		{
 			/* All other slices must have a valid parent index */
 			Assert(s->parentIndex >= 0 && s->parentIndex < st->numSlices && "slice's parent index out of range");
 		}

 		/* Current slice's children must consider it the parent */
 		ListCell *lc1 = NULL;
 		foreach (lc1, s->children)
 		{
 			int childIndex = lfirst_int(lc1);
 			Assert(childIndex >= 0 && childIndex < st->numSlices && "invalid child slice");
 			ExecSlice *sc = &st->slices[childIndex];
 			Assert(sc->parentIndex == s->sliceIndex && "slice's child does not consider it the parent");
 		}

 		/* Current slice must be in its parent's children list */
 		if (s->parentIndex >= 0)
 		{
 			ExecSlice *sp = &st->slices[s->parentIndex];

 			bool found = false;
 			foreach (lc1, sp->children)
 			{
 				int childIndex = lfirst_int(lc1);
 				Assert(childIndex >= 0 && childIndex < st->numSlices && "invalid child slice");
 				ExecSlice *sc = &st->slices[childIndex];

 				if (sc->sliceIndex == s->sliceIndex)
 				{
 					found = true;
 					break;
 				}
 			}

 			Assert(found && "slice's parent does not consider it a child");
 		}
 	}
 }
 #endif

 /*
  * During attribute re-mapping for heterogeneous partitions, we use
  * this struct to identify which varno's attributes will be re-mapped.
  * Using this struct as a *context* during expression tree walking, we
  * can skip varattnos that do not belong to a given varno.
  */
 typedef struct AttrMapContext
 {
 	const AttrNumber *newattno; /* The mapping table to remap the varattno */
 	Index		varno;			/* Which rte's varattno to re-map */
 } AttrMapContext;

 /*
  * Remaps the varattno of a varattno in a Var node using an attribute map.
  */
 static bool
 change_varattnos_varno_walker(Node *node, const AttrMapContext *attrMapCxt)
 {
 	if (node == NULL)
 		return false;
 	if (IsA(node, Var))
 	{
 		Var		   *var = (Var *) node;

 		if (var->varlevelsup == 0 && (var->varno == attrMapCxt->varno) &&
 			var->varattno > 0)
 		{
 			/*
 			 * ??? the following may be a problem when the node is multiply
 			 * referenced though stringToNode() doesn't create such a node
 			 * currently.
 			 */
 			Assert(attrMapCxt->newattno[var->varattno - 1] > 0);
 			var->varattno = var->varattnosyn = attrMapCxt->newattno[var->varattno - 1];
 		}
 		return false;
 	}
 	return expression_tree_walker(node, change_varattnos_varno_walker,
 								  (void *) attrMapCxt);
 }

 /*
  * Replace varattno values for a given varno RTE index in an expression
  * tree according to the given map array, that is, varattno N is replaced
  * by newattno[N-1].  It is caller's responsibility to ensure that the array
  * is long enough to define values for all user varattnos present in the tree.
  * System column attnos remain unchanged.
  *
  * Note that the passed node tree is modified in-place!
  */
 void
 change_varattnos_of_a_varno(Node *node, const AttrNumber *newattno, Index varno)
 {
 	AttrMapContext attrMapCxt;

 	attrMapCxt.newattno = newattno;
 	attrMapCxt.varno = varno;

 	(void) change_varattnos_varno_walker(node, &attrMapCxt);
 }