src/backend/executor/execUtils.c - hawq - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 /*-------------------------------------------------------------------------
  *
  * execUtils.c
  *	  miscellaneous executor utility routines
  *
  * Portions Copyright (c) 2005-2008, Greenplum inc
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  *
  * IDENTIFICATION
  *	  $PostgreSQL: pgsql/src/backend/executor/execUtils.c,v 1.140.2.3 2007/02/06 17:35:27 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 /*
  * INTERFACE ROUTINES
  *		CreateExecutorState		Create/delete executor working state
  *		CreateSubExecutorState
  *		FreeExecutorState
  *		CreateExprContext
  *		CreateStandaloneExprContext
  *		FreeExprContext
  *		ReScanExprContext
  *
  *		ExecAssignExprContext	Common code for plan node init routines.
  *		ExecAssignResultType
  *		etc
  *
  *		ExecOpenScanRelation	Common code for scan node init routines.
  *		ExecCloseScanRelation
  *
  *		ExecOpenIndices			\
  *		ExecCloseIndices		 | referenced by InitPlan, EndPlan,
  *		ExecInsertIndexTuples	/  ExecInsert, ExecUpdate
  *
  *		RegisterExprContextCallback    Register function shutdown callback
  *		UnregisterExprContextCallback  Deregister function shutdown callback
  *
  *	 NOTES
  *		This file has traditionally been the place to stick misc.
  *		executor support stuff that doesn't really go anyplace else.
  */

 #include "postgres.h"

 #include "access/genam.h"
 #include "access/heapam.h"
 #include "access/appendonlywriter.h"
 #include "catalog/index.h"
 #include "executor/execdebug.h"
 #include "parser/parsetree.h"
 #include "utils/memutils.h"
 #include "utils/relcache.h"
 #include "utils/workfile_mgr.h"

 #include "cdb/cdbvars.h"
 #include "nodes/primnodes.h"
 #include "nodes/execnodes.h"

 #include "cdb/cdbutil.h"
 #include "cdb/cdbgang.h"
 #include "cdb/cdbvars.h"
 #include "cdb/cdbdisp.h"
 #include "cdb/dispatcher_new.h"
 #include "cdb/cdbdispatchresult.h"
 #include "cdb/ml_ipc.h"
 #include "cdb/cdbmotion.h"
 #include "cdb/cdbsreh.h"
 #include "cdb/memquota.h"
 #include "cdb/cdbsrlz.h"
 #include "catalog/catalog.h" // isMasterOnly()
 #include "executor/spi.h"
 #include "utils/elog.h"
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
 #include "cdb/dispatcher.h"
 #include "postmaster/identity.h"

 #include "storage/ipc.h"

 #include "executor/cwrapper/executor-c.h"

 /* ----------------------------------------------------------------
  *		global counters for number of tuples processed, retrieved,
  *		appended, replaced, deleted.
  * ----------------------------------------------------------------
  */
 int			NTupleProcessed;
 int			NTupleRetrieved;
 int			NTupleReplaced;
 int			NTupleAppended;
 int			NTupleDeleted;
 int			NIndexTupleInserted;
 int			NIndexTupleProcessed;

 DynamicTableScanInfo *dynamicTableScanInfo = NULL;

 static EState *InternalCreateExecutorState(MemoryContext qcontext,
 										   bool is_subquery);
 static void ShutdownExprContext(ExprContext *econtext);


 /* ----------------------------------------------------------------
  *						statistic functions
  * ----------------------------------------------------------------
  */

 /* ----------------------------------------------------------------
  *		ResetTupleCount
  * ----------------------------------------------------------------
  */
 #ifdef NOT_USED
 void
 ResetTupleCount(void)
 {
 	NTupleProcessed = 0;
 	NTupleRetrieved = 0;
 	NTupleAppended = 0;
 	NTupleDeleted = 0;
 	NTupleReplaced = 0;
 	NIndexTupleProcessed = 0;
 }
 #endif

 /* ----------------------------------------------------------------
  *		PrintTupleCount
  * ----------------------------------------------------------------
  */
 #ifdef NOT_USED
 void
 DisplayTupleCount(FILE *statfp)
 {
 	if (NTupleProcessed > 0)
 		fprintf(statfp, "!\t%d tuple%s processed, ", NTupleProcessed,
 				(NTupleProcessed == 1) ? "" : "s");
 	else
 	{
 		fprintf(statfp, "!\tno tuples processed.\n");
 		return;
 	}
 	if (NIndexTupleProcessed > 0)
 		fprintf(statfp, "%d indextuple%s processed, ", NIndexTupleProcessed,
 				(NIndexTupleProcessed == 1) ? "" : "s");
 	if (NIndexTupleInserted > 0)
 		fprintf(statfp, "%d indextuple%s inserted, ", NIndexTupleInserted,
 				(NIndexTupleInserted == 1) ? "" : "s");
 	if (NTupleRetrieved > 0)
 		fprintf(statfp, "%d tuple%s retrieved. ", NTupleRetrieved,
 				(NTupleRetrieved == 1) ? "" : "s");
 	if (NTupleAppended > 0)
 		fprintf(statfp, "%d tuple%s appended. ", NTupleAppended,
 				(NTupleAppended == 1) ? "" : "s");
 	if (NTupleDeleted > 0)
 		fprintf(statfp, "%d tuple%s deleted. ", NTupleDeleted,
 				(NTupleDeleted == 1) ? "" : "s");
 	if (NTupleReplaced > 0)
 		fprintf(statfp, "%d tuple%s replaced. ", NTupleReplaced,
 				(NTupleReplaced == 1) ? "" : "s");
 	fprintf(statfp, "\n");
 }
 #endif


 /* ----------------------------------------------------------------
  *				 Executor state and memory management functions
  * ----------------------------------------------------------------
  */

 /* ----------------
  *		CreateExecutorState
  *
  *		Create and initialize an EState node, which is the root of
  *		working storage for an entire Executor invocation.
  *
  * Principally, this creates the per-query memory context that will be
  * used to hold all working data that lives till the end of the query.
  * Note that the per-query context will become a child of the caller's
  * CurrentMemoryContext.
  * ----------------
  */
 EState *
 CreateExecutorState(void)
 {
 	MemoryContext qcontext;

 	/*
 	 * Create the per-query context for this Executor run.
 	 */
 	qcontext = AllocSetContextCreate(CurrentMemoryContext,
 									 "ExecutorState",
 									 ALLOCSET_DEFAULT_MINSIZE,
 									 ALLOCSET_DEFAULT_INITSIZE,
 									 ALLOCSET_DEFAULT_MAXSIZE);

 	EState *estate = InternalCreateExecutorState(qcontext, false);

 	/*
 	 * Initialize dynamicTableScanInfo. Since this is shared by subqueries,
 	 * this can not be put inside InternalCreateExecutorState.
 	 */
 	MemoryContext oldcontext = MemoryContextSwitchTo(qcontext);

 	estate->dynamicTableScanInfo = palloc0(sizeof(DynamicTableScanInfo));
 	estate->dynamicTableScanInfo->memoryContext = qcontext;

 	MemoryContextSwitchTo(oldcontext);

 	return estate;
 }

 /* ----------------
  *		CreateSubExecutorState
  *
  *		Create and initialize an EState node for a sub-query.
  *
  * Ideally, sub-queries probably shouldn't have their own EState at all,
  * but right now this is necessary because they have their own rangetables
  * and we access the rangetable via the EState.  It is critical that a
  * sub-query share the parent's es_query_cxt, else structures allocated by
  * the sub-query (especially its result tuple descriptor) may disappear
  * too soon during executor shutdown.
  * ----------------
  */
 EState *
 CreateSubExecutorState(EState *parent_estate)
 {
     EState *es = InternalCreateExecutorState(parent_estate->es_query_cxt, true);

     es->showstatctx = parent_estate->showstatctx;                   /*CDB*/

 	/* Subqueries share the same dynamicTableScanInfo with their parents. */
 	es->dynamicTableScanInfo = parent_estate->dynamicTableScanInfo;
 	es->subplanLevel = parent_estate->subplanLevel + 1;
     return es;
 }

 /*
  * Guts of CreateExecutorState/CreateSubExecutorState
  */
 static EState *
 InternalCreateExecutorState(MemoryContext qcontext, bool is_subquery)
 {
 	EState	   *estate;
 	MemoryContext oldcontext;

 	/*
 	 * Make the EState node within the per-query context.  This way, we don't
 	 * need a separate pfree() operation for it at shutdown.
 	 */
 	oldcontext = MemoryContextSwitchTo(qcontext);

 	estate = makeNode(EState);

 	/*
 	 * Initialize all fields of the Executor State structure
 	 */
 	estate->es_direction = ForwardScanDirection;
 	estate->es_snapshot = SnapshotNow;
 	estate->es_crosscheck_snapshot = InvalidSnapshot;	/* no crosscheck */
 	estate->es_range_table = NIL;

 	estate->es_result_relations = NULL;
 	estate->es_num_result_relations = 0;
 	estate->es_result_relation_info = NULL;
 	estate->es_last_inserted_part = InvalidOid;

 	estate->es_junkFilter = NULL;

 	estate->es_trig_tuple_slot = NULL;

 	estate->es_into_relation_descriptor = NULL;
 	estate->es_into_relation_is_bulkload = false;

 	MemSet(&estate->es_into_relation_last_heap_tid, 0, sizeof(ItemPointerData));

 	estate->es_param_list_info = NULL;
 	estate->es_param_exec_vals = NULL;

 	estate->es_query_cxt = qcontext;

 	estate->es_tupleTable = NULL;

 	estate->es_processed = 0;
 	estate->es_lastoid = InvalidOid;
 	estate->es_rowMarks = NIL;

 	estate->es_is_subquery = is_subquery;

 	estate->es_instrument = false;
 	estate->es_select_into = false;
 	estate->es_into_oids = false;

 	estate->es_exprcontexts = NIL;

 	estate->es_per_tuple_exprcontext = NULL;

 	estate->es_plannedstmt = NULL;
 	estate->es_evalPlanQual = NULL;
 	estate->es_evTupleNull = NULL;
 	estate->es_evTuple = NULL;
 	estate->es_useEvalPlan = false;

 	estate->es_sliceTable = NULL;
 	estate->interconnect_context = NULL;
 	estate->motionlayer_context = NULL;
 	estate->es_interconnect_is_setup = false;
 	estate->active_recv_id = -1;
 	estate->es_got_eos = false;
 	estate->cancelUnfinished = false;
 	estate->terminateOnGoing = false;

 	estate->dispatch_data = NULL;
 	estate->mainDispatchData = NULL;

 	estate->currentSliceIdInPlan = 0;
 	estate->currentExecutingSliceId = 0;
 	estate->subplanLevel = 0;
 	estate->rootSliceId = 0;

 	/*
 	 * Return the executor state structure
 	 */
 	MemoryContextSwitchTo(oldcontext);

 	return estate;
 }

 /*
  * freeDynamicTableScanInfo
  *   Free the space for DynamicTableScanInfo.
  */
 static void
 freeDynamicTableScanInfo(DynamicTableScanInfo *scanInfo)
 {
 	Assert(scanInfo != NULL);

 	if (scanInfo->partsMetadata != NIL)
 	{
 		list_free_deep(scanInfo->partsMetadata);
 	}

 	if (scanInfo->numSelectorsPerScanId != NIL)
 	{
 		list_free(scanInfo->numSelectorsPerScanId);
 	}

 	pfree(scanInfo);
 }

 /* ----------------
  *		FreeExecutorState
  *
  *		Release an EState along with all remaining working storage.
  *
  * Note: this is not responsible for releasing non-memory resources,
  * such as open relations or buffer pins.  But it will shut down any
  * still-active ExprContexts within the EState.  That is sufficient
  * cleanup for situations where the EState has only been used for expression
  * evaluation, and not to run a complete Plan.
  *
  * This can be called in any memory context ... so long as it's not one
  * of the ones to be freed.
  * ----------------
  */
 void
 FreeExecutorState(EState *estate)
 {
 	/*
 	 * Shut down and free any remaining ExprContexts.  We do this explicitly
 	 * to ensure that any remaining shutdown callbacks get called (since they
 	 * might need to release resources that aren't simply memory within the
 	 * per-query memory context).
 	 */
 	while (estate->es_exprcontexts)
 	{
 		/*
 		 * XXX: seems there ought to be a faster way to implement this than
 		 * repeated list_delete(), no?
 		 */
 		FreeExprContext((ExprContext *) linitial(estate->es_exprcontexts));
 		/* FreeExprContext removed the list link for us */
 	}

 	/*
 	 * Free dynamicTableScanInfo. In a subquery, we don't do this, since
 	 * the subquery shares the value with its parent.
 	 */
 	if (!estate->es_is_subquery &&
 		estate->dynamicTableScanInfo != NULL)
 	{
 		/*
 		 * In case of an abnormal termination such as elog(FATAL) we jump directly to
 		 * proc_exit, instead of finishing ExecutorRun() that was supposed to restore
 		 * dynamicTableScanInfo. Therefore, in such case we cannot assert that
 		 * dynamicTableScanInfo != estate->dynamicTableScanInfo [JIRA: MPP-23562]
 		 */
 		Assert(proc_exit_inprogress || dynamicTableScanInfo != estate->dynamicTableScanInfo);
 		freeDynamicTableScanInfo(estate->dynamicTableScanInfo);
 		estate->dynamicTableScanInfo = NULL;
 	}

 	/*
 	 * Free the per-query memory context, thereby releasing all working
 	 * memory, including the EState node itself.  In a subquery, we don't
 	 * do this, leaving the memory cleanup to happen when the topmost query
 	 * is closed down.
 	 */
 	if (!estate->es_is_subquery)
 		MemoryContextDelete(estate->es_query_cxt);
 }

 /* ----------------
  *		CreateExprContext
  *
  *		Create a context for expression evaluation within an EState.
  *
  * An executor run may require multiple ExprContexts (we usually make one
  * for each Plan node, and a separate one for per-output-tuple processing
  * such as constraint checking).  Each ExprContext has its own "per-tuple"
  * memory context.
  *
  * Note we make no assumption about the caller's memory context.
  * ----------------
  */
 ExprContext *
 CreateExprContext(EState *estate)
 {
 	ExprContext *econtext;
 	MemoryContext oldcontext;

 	/* Create the ExprContext node within the per-query memory context */
 	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);

 	econtext = makeNode(ExprContext);

 	/* Initialize fields of ExprContext */
 	econtext->ecxt_scantuple = NULL;
 	econtext->ecxt_innertuple = NULL;
 	econtext->ecxt_outertuple = NULL;

 	econtext->ecxt_per_query_memory = estate->es_query_cxt;

 	/*
 	 * Create working memory for expression evaluation in this context.
 	 */
 	econtext->ecxt_per_tuple_memory =
 	    AllocSetContextCreate(estate->es_query_cxt,
 							  "ExprContext",
 							  ALLOCSET_DEFAULT_MINSIZE,
 							  ALLOCSET_DEFAULT_INITSIZE,
 							  ALLOCSET_DEFAULT_MAXSIZE);

 	econtext->ecxt_param_exec_vals = estate->es_param_exec_vals;
 	econtext->ecxt_param_list_info = estate->es_param_list_info;

 	econtext->ecxt_aggvalues = NULL;
 	econtext->ecxt_aggnulls = NULL;

 	econtext->caseValue_datum = (Datum) 0;
 	econtext->caseValue_isNull = true;

 	econtext->domainValue_datum = (Datum) 0;
 	econtext->domainValue_isNull = true;

 	econtext->ecxt_estate = estate;

 	econtext->ecxt_callbacks = NULL;

 	/*
 	 * Link the ExprContext into the EState to ensure it is shut down when the
 	 * EState is freed.  Because we use lcons(), shutdowns will occur in
 	 * reverse order of creation, which may not be essential but can't hurt.
 	 */
 	estate->es_exprcontexts = lcons(econtext, estate->es_exprcontexts);

 	MemoryContextSwitchTo(oldcontext);

 	return econtext;
 }

 /* ----------------
  *		CreateStandaloneExprContext
  *
  *		Create a context for standalone expression evaluation.
  *
  * An ExprContext made this way can be used for evaluation of expressions
  * that contain no Params, subplans, or Var references (it might work to
  * put tuple references into the scantuple field, but it seems unwise).
  *
  * The ExprContext struct is allocated in the caller's current memory
  * context, which also becomes its "per query" context.
  *
  * It is caller's responsibility to free the ExprContext when done,
  * or at least ensure that any shutdown callbacks have been called
  * (ReScanExprContext() is suitable).  Otherwise, non-memory resources
  * might be leaked.
  * ----------------
  */
 ExprContext *
 CreateStandaloneExprContext(void)
 {
 	ExprContext *econtext;

 	/* Create the ExprContext node within the caller's memory context */
 	econtext = makeNode(ExprContext);

 	/* Initialize fields of ExprContext */
 	econtext->ecxt_scantuple = NULL;
 	econtext->ecxt_innertuple = NULL;
 	econtext->ecxt_outertuple = NULL;

 	econtext->ecxt_per_query_memory = CurrentMemoryContext;

 	/*
 	 * Create working memory for expression evaluation in this context.
 	 */
 	econtext->ecxt_per_tuple_memory =
 		AllocSetContextCreate(CurrentMemoryContext,
 							  "ExprContext",
 							  ALLOCSET_DEFAULT_MINSIZE,
 							  ALLOCSET_DEFAULT_INITSIZE,
 							  ALLOCSET_DEFAULT_MAXSIZE);

 	econtext->ecxt_param_exec_vals = NULL;
 	econtext->ecxt_param_list_info = NULL;

 	econtext->ecxt_aggvalues = NULL;
 	econtext->ecxt_aggnulls = NULL;

 	econtext->caseValue_datum = (Datum) 0;
 	econtext->caseValue_isNull = true;

 	econtext->domainValue_datum = (Datum) 0;
 	econtext->domainValue_isNull = true;

 	econtext->ecxt_estate = NULL;

 	econtext->ecxt_callbacks = NULL;

 	return econtext;
 }

 /* ----------------
  *		FreeExprContext
  *
  *		Free an expression context, including calling any remaining
  *		shutdown callbacks.
  *
  * Since we free the temporary context used for expression evaluation,
  * any previously computed pass-by-reference expression result will go away!
  *
  * Note we make no assumption about the caller's memory context.
  * ----------------
  */
 void
 FreeExprContext(ExprContext *econtext)
 {
 	EState	   *estate;

 	/* Call any registered callbacks */
 	ShutdownExprContext(econtext);
 	/* And clean up the memory used */
 	MemoryContextDelete(econtext->ecxt_per_tuple_memory);
 	/* Unlink self from owning EState, if any */
 	estate = econtext->ecxt_estate;
 	if (estate)
 		estate->es_exprcontexts = list_delete_ptr(estate->es_exprcontexts,
 												  econtext);
 	/* And delete the ExprContext node */
 	pfree(econtext);
 }

 /*
  * ReScanExprContext
  *
  *		Reset an expression context in preparation for a rescan of its
  *		plan node.	This requires calling any registered shutdown callbacks,
  *		since any partially complete set-returning-functions must be canceled.
  *
  * Note we make no assumption about the caller's memory context.
  */
 void
 ReScanExprContext(ExprContext *econtext)
 {
 	/* Call any registered callbacks */
 	ShutdownExprContext(econtext);
 	/* And clean up the memory used */
 	MemoryContextReset(econtext->ecxt_per_tuple_memory);
 }

 /*
  * Build a per-output-tuple ExprContext for an EState.
  *
  * This is normally invoked via GetPerTupleExprContext() macro,
  * not directly.
  */
 ExprContext *
 MakePerTupleExprContext(EState *estate)
 {
 	if (estate->es_per_tuple_exprcontext == NULL)
 		estate->es_per_tuple_exprcontext = CreateExprContext(estate);

 	return estate->es_per_tuple_exprcontext;
 }


 /* ----------------------------------------------------------------
  *				 miscellaneous node-init support functions
  *
  * Note: all of these are expected to be called with CurrentMemoryContext
  * equal to the per-query memory context.
  * ----------------------------------------------------------------
  */

 /* ----------------
  *		ExecAssignExprContext
  *
  *		This initializes the ps_ExprContext field.	It is only necessary
  *		to do this for nodes which use ExecQual or ExecProject
  *		because those routines require an econtext. Other nodes that
  *		don't have to evaluate expressions don't need to do this.
  * ----------------
  */
 void
 ExecAssignExprContext(EState *estate, PlanState *planstate)
 {
 	planstate->ps_ExprContext = CreateExprContext(estate);
 }

 /* ----------------
  *		ExecAssignResultType
  * ----------------
  */
 void
 ExecAssignResultType(PlanState *planstate, TupleDesc tupDesc)
 {
 	TupleTableSlot *slot = planstate->ps_ResultTupleSlot;

 	ExecSetSlotDescriptor(slot, tupDesc);
 }

 /* ----------------
  *		ExecAssignResultTypeFromTL
  * ----------------
  */
 void
 ExecAssignResultTypeFromTL(PlanState *planstate)
 {
 	bool		hasoid;
 	TupleDesc	tupDesc;

 	if (ExecContextForcesOids(planstate, &hasoid))
 	{
 		/* context forces OID choice; hasoid is now set correctly */
 	}
 	else
 	{
 		/* given free choice, don't leave space for OIDs in result tuples */
 		hasoid = false;
 	}

 	/*
 	 * ExecTypeFromTL needs the parse-time representation of the tlist, not a
 	 * list of ExprStates.	This is good because some plan nodes don't bother
 	 * to set up planstate->targetlist ...
 	 */
 	tupDesc = ExecTypeFromTL(planstate->plan->targetlist, hasoid);
 	ExecAssignResultType(planstate, tupDesc);
 }

 /* ----------------
  *		ExecGetResultType
  * ----------------
  */
 TupleDesc
 ExecGetResultType(PlanState *planstate)
 {
 	TupleTableSlot *slot = planstate->ps_ResultTupleSlot;

 	return slot->tts_tupleDescriptor;
 }

 /* ----------------
  *		ExecBuildProjectionInfo
  *
  * Build a ProjectionInfo node for evaluating the given tlist in the given
  * econtext, and storing the result into the tuple slot.  (Caller must have
  * ensured that tuple slot has a descriptor matching the tlist!)  Note that
  * the given tlist should be a list of ExprState nodes, not Expr nodes.
  *
  * inputDesc can be NULL, but if it is not, we check to see whether simple
  * Vars in the tlist match the descriptor.  It is important to provide
  * inputDesc for relation-scan plan nodes, as a cross check that the relation
  * hasn't been changed since the plan was made.  At higher levels of a plan,
  * there is no need to recheck.
  * ----------------
  */
 ProjectionInfo *
 ExecBuildProjectionInfo(List *targetList,
 						ExprContext *econtext,
 						TupleTableSlot *slot,
 						TupleDesc inputDesc)
 {
 	ProjectionInfo *projInfo = makeNode(ProjectionInfo);
 	int			len;
 	bool		isVarList;
 	ListCell   *tl;

 	len = ExecTargetListLength(targetList);

 	projInfo->pi_targetlist = targetList;
 	projInfo->pi_exprContext = econtext;
 	projInfo->pi_slot = slot;

 	/*
 	 * Determine whether the target list consists entirely of simple Var
 	 * references (ie, references to non-system attributes) that match the
 	 * input.  If so, we can use the simpler ExecVariableList instead of
 	 * ExecTargetList.  (Note: if there is a type mismatch then ExecEvalVar
 	 * will probably throw an error at runtime, but we leave that to it.)
 	 */
 	isVarList = true;
 	foreach(tl, targetList)
 	{
 		GenericExprState *gstate = (GenericExprState *) lfirst(tl);
 		Var		   *variable = (Var *) gstate->arg->expr;
 		Form_pg_attribute attr;

 		if (variable == NULL ||
 			!IsA(variable, Var) ||
 			variable->varattno <= 0)
 		{
 			isVarList = false;
 			break;
 		}
 		if (!inputDesc)
 			continue;			/* can't check type, assume OK */
 		if (variable->varattno > inputDesc->natts)
 		{
 			isVarList = false;
 			break;
 		}
 		attr = inputDesc->attrs[variable->varattno - 1];
 		if (attr->attisdropped || variable->vartype != attr->atttypid)
 		{
 			isVarList = false;
 			break;
 		}
 	}
 	projInfo->pi_isVarList = isVarList;

 	if (isVarList)
 	{
 		int		   *varSlotOffsets;
 		int		   *varNumbers;
 		AttrNumber	lastInnerVar = 0;
 		AttrNumber	lastOuterVar = 0;
 		AttrNumber	lastScanVar = 0;

 		projInfo->pi_itemIsDone = NULL; /* not needed */
 		projInfo->pi_varSlotOffsets = varSlotOffsets = (int *)
 			palloc0(len * sizeof(int));
 		projInfo->pi_varNumbers = varNumbers = (int *)
 			palloc0(len * sizeof(int));

 		/*
 		 * Set up the data needed by ExecVariableList.	The slots in which the
 		 * variables can be found at runtime are denoted by the offsets of
 		 * their slot pointers within the econtext.  This rather grotty
 		 * representation is needed because the caller may not have given us
 		 * the real econtext yet (see hacks in nodeSubplan.c).
 		 */
 		foreach(tl, targetList)
 		{
 			GenericExprState *gstate = (GenericExprState *) lfirst(tl);
 			Var		   *variable = (Var *) gstate->arg->expr;
 			AttrNumber	attnum = variable->varattno;
 			TargetEntry *tle = (TargetEntry *) gstate->xprstate.expr;
 			AttrNumber	resind = tle->resno - 1;

 			Assert(resind >= 0 && resind < len);
 			varNumbers[resind] = attnum;

 			switch (variable->varno)
 			{
 				case INNER:
 					varSlotOffsets[resind] = offsetof(ExprContext,
 													  ecxt_innertuple);
 					lastInnerVar = Max(lastInnerVar, attnum);
 					break;

 				case OUTER:
 					varSlotOffsets[resind] = offsetof(ExprContext,
 													  ecxt_outertuple);
 					lastOuterVar = Max(lastOuterVar, attnum);
 					break;

 				default:
 					varSlotOffsets[resind] = offsetof(ExprContext,
 													  ecxt_scantuple);
 					lastScanVar = Max(lastScanVar, attnum);
 					break;
 			}
 		}
 		projInfo->pi_lastInnerVar = lastInnerVar;
 		projInfo->pi_lastOuterVar = lastOuterVar;
 		projInfo->pi_lastScanVar = lastScanVar;
 	}
 	else
 	{
 		projInfo->pi_itemIsDone = (ExprDoneCond *)
 			palloc0(len * sizeof(ExprDoneCond));
 		projInfo->pi_varSlotOffsets = NULL;
 		projInfo->pi_varNumbers = NULL;
 	}

 	return projInfo;
 }

 /* ----------------
  *		ExecAssignProjectionInfo
  *
  * forms the projection information from the node's targetlist
  *
  * Notes for inputDesc are same as for ExecBuildProjectionInfo: supply it
  * for a relation-scan node, can pass NULL for upper-level nodes
  * ----------------
  */
 void
 ExecAssignProjectionInfo(PlanState *planstate,
 						 TupleDesc inputDesc)
 {
 	ProjectionInfo* pi = planstate->ps_ProjInfo;
 	if (NULL != pi)
 	{
 		if (NULL != pi->pi_varNumbers)
 		{
 			pfree(pi->pi_varNumbers);
 		}
 		if (NULL != pi->pi_varSlotOffsets)
 		{
 			pfree(pi->pi_varSlotOffsets);
 		}
 		if (NULL != pi->pi_itemIsDone)
 		{
 			pfree(pi->pi_itemIsDone);
 		}
 		pfree(pi);
 	}

 	planstate->ps_ProjInfo =
 		ExecBuildProjectionInfo(planstate->targetlist,
 								planstate->ps_ExprContext,
 								planstate->ps_ResultTupleSlot,
 								inputDesc);
 }

 /*
  * Constructs a new targetlist list that maps to a tuple descriptor.
  */
 List *
 GetPartitionTargetlist(TupleDesc partDescr, List *targetlist)
 {
 	Assert(NIL != targetlist);
 	Assert(partDescr);

 	List *partitionTargetlist = NIL;

 	AttrMap *attrmap = NULL;

 	TupleDesc targetDescr = ExecTypeFromTL(targetlist, false);

 	map_part_attrs_from_targetdesc(targetDescr, partDescr, &attrmap);

 	ListCell *entry = NULL;
 	int pos = 1;
 	foreach(entry, targetlist)
 	{
 		TargetEntry *te = (TargetEntry *) lfirst(entry);

 		/* Obtain corresponding attribute number in the part (this will be the resno). */
 		int partAtt = (int)attrMap(attrmap, pos);

 		/* A system attribute should be added to the target list with its original
 		 * attribute number.
 		 */
 		if (te->resorigcol < 0)
 		{
 			/* te->resorigcol should be equivalent to ((Var *)te->expr)->varattno.
 			 * te->resorigcol is used for simplicity.
 			 */
 			Assert(((Var *)te->expr)->varattno == te->resorigcol);

 			/* Validate interval for system-defined attributes. */
 			Assert(te->resorigcol > FirstLowInvalidHeapAttributeNumber &&
 				te->resorigcol <= SelfItemPointerAttributeNumber);

 			partAtt = te->resorigcol;
 		}

 		TargetEntry *newTe = flatCopyTargetEntry(te);

 		/* Parts are not explicitly specified in the range table. Therefore, the original RTE index is kept. */
 		Index rteIdx = ((Var *)te->expr)->varno;
 		/* Variable expression required by the Target Entry. */
 		Var *var = makeVar(rteIdx,
 				partAtt,
 				targetDescr->attrs[pos-1]->atttypid,
 				targetDescr->attrs[pos-1]->atttypmod,
 				0 /* resjunk */);


 		/* Modify resno in the new TargetEntry */
 		newTe->resno = partAtt;
 		newTe->expr = (Expr *) var;

 		partitionTargetlist = lappend(partitionTargetlist, newTe);

 		pos++;
 	}

 	Assert(attrmap);
 	pfree(attrmap);
 	Assert(partitionTargetlist);

 	return partitionTargetlist;
 }

 /*
  * Replace all attribute numbers to the corresponding mapped value (resno)
  *  in GenericExprState list with the attribute numbers in the  target list.
  */
 void
 UpdateGenericExprState(List *teTargetlist, List *geTargetlist)
 {
 	Assert(list_length(teTargetlist) ==
 		list_length(geTargetlist));

 	ListCell   *ge = NULL;
 	ListCell   *te = NULL;

 	forboth(te, teTargetlist, ge, geTargetlist)
 	{
 		GenericExprState *gstate = (GenericExprState *)ge->data.ptr_value;
 		TargetEntry *tle = (TargetEntry *)te->data.ptr_value;

 		Var *variable = (Var *) gstate->arg->expr;
 		variable->varattno = tle->resno;
 	}
 }

 /* ----------------
  *		ExecFreeExprContext
  *
  * A plan node's ExprContext should be freed explicitly during executor
  * shutdown because there may be shutdown callbacks to call.  (Other resources
  * made by the above routines, such as projection info, don't need to be freed
  * explicitly because they're just memory in the per-query memory context.)
  *
  * However ... there is no particular need to do it during ExecEndNode,
  * because FreeExecutorState will free any remaining ExprContexts within
  * the EState.	Letting FreeExecutorState do it allows the ExprContexts to
  * be freed in reverse order of creation, rather than order of creation as
  * will happen if we delete them here, which saves O(N^2) work in the list
  * cleanup inside FreeExprContext.
  * ----------------
  */
 void
 ExecFreeExprContext(PlanState *planstate)
 {
 	/*
 	 * Per above discussion, don't actually delete the ExprContext. We do
 	 * unlink it from the plan node, though.
 	 */
 	planstate->ps_ExprContext = NULL;
 }

 /* ----------------------------------------------------------------
  *		the following scan type support functions are for
  *		those nodes which are stubborn and return tuples in
  *		their Scan tuple slot instead of their Result tuple
  *		slot..	luck fur us, these nodes do not do projections
  *		so we don't have to worry about getting the ProjectionInfo
  *		right for them...  -cim 6/3/91
  * ----------------------------------------------------------------
  */

 /* ----------------
  *		ExecGetScanType
  * ----------------
  */
 TupleDesc
 ExecGetScanType(ScanState *scanstate)
 {
 	TupleTableSlot *slot = scanstate->ss_ScanTupleSlot;

 	return slot->tts_tupleDescriptor;
 }

 /* ----------------
  *		ExecAssignScanType
  * ----------------
  */
 void
 ExecAssignScanType(ScanState *scanstate, TupleDesc tupDesc)
 {
 	TupleTableSlot *slot = scanstate->ss_ScanTupleSlot;

 	ExecSetSlotDescriptor(slot, tupDesc);
 }

 /* ----------------
  *		ExecAssignScanTypeFromOuterPlan
  * ----------------
  */
 void
 ExecAssignScanTypeFromOuterPlan(ScanState *scanstate)
 {
 	PlanState  *outerPlan;
 	TupleDesc	tupDesc;

 	outerPlan = outerPlanState(scanstate);
 	tupDesc = ExecGetResultType(outerPlan);

 	ExecAssignScanType(scanstate, tupDesc);
 }


 /* ----------------------------------------------------------------
  *				  Scan node support
  * ----------------------------------------------------------------
  */

 /* ----------------------------------------------------------------
  *		ExecRelationIsTargetRelation
  *
  *		Detect whether a relation (identified by rangetable index)
  *		is one of the target relations of the query.
  * ----------------------------------------------------------------
  */
 bool
 ExecRelationIsTargetRelation(EState *estate, Index scanrelid)
 {
 	ResultRelInfo *resultRelInfos;
 	int			i;

 	resultRelInfos = estate->es_result_relations;
 	for (i = 0; i < estate->es_num_result_relations; i++)
 	{
 		if (resultRelInfos[i].ri_RangeTableIndex == scanrelid)
 			return true;
 	}
 	return false;
 }

 /* ----------------------------------------------------------------
  *		ExecOpenScanRelation
  *
  *		Open the heap relation to be scanned by a base-level scan plan node.
  *		This should be called during the node's ExecInit routine.
  *
  * By default, this acquires AccessShareLock on the relation.  However,
  * if the relation was already locked by InitPlan, we don't need to acquire
  * any additional lock.  This saves trips to the shared lock manager.
  * ----------------------------------------------------------------
  */
 Relation
 ExecOpenScanRelation(EState *estate, Index scanrelid)
 {
 	Oid			reloid;
 	LOCKMODE	lockmode;
 	ResultRelInfo *resultRelInfos;
 	int			i;

 	/*
 	 * First determine the lock type we need.  Scan to see if target relation
 	 * is either a result relation or a FOR UPDATE/FOR SHARE relation.
 	 */
 	lockmode = AccessShareLock;
 	resultRelInfos = estate->es_result_relations;
 	for (i = 0; i < estate->es_num_result_relations; i++)
 	{
 		if (resultRelInfos[i].ri_RangeTableIndex == scanrelid)
 		{
 			lockmode = NoLock;
 			break;
 		}
 	}

 	if (lockmode == AccessShareLock)
 	{
 		ListCell   *l;

 		foreach(l, estate->es_rowMarks)
 		{
 			ExecRowMark *erm = lfirst(l);

 			if (erm->rti == scanrelid)
 			{
 				lockmode = NoLock;
 				break;
 			}
 		}
 	}

 	/* OK, open the relation and acquire lock as needed */
 	reloid = getrelid(scanrelid, estate->es_range_table);

 	Assert(reloid != InvalidOid);

 	return heap_open(reloid, lockmode);
 }

 /*
  * same as above, but for external table scans
  */
 Relation
 ExecOpenScanExternalRelation(EState *estate, Index scanrelid)
 {
 	RangeTblEntry *rtentry;
 	Oid			reloid;
 	LOCKMODE	lockmode;

 	lockmode = NoLock;

 	rtentry = rt_fetch(scanrelid, estate->es_range_table);
 	reloid = rtentry->relid;

 	return relation_open(reloid, NoLock);
 }

 /* ----------------------------------------------------------------
  *		ExecCloseScanRelation
  *
  *		Close the heap relation scanned by a base-level scan plan node.
  *		This should be called during the node's ExecEnd routine.
  *
  * Currently, we do not release the lock acquired by ExecOpenScanRelation.
  * This lock should be held till end of transaction.  (There is a faction
  * that considers this too much locking, however.)
  *
  * If we did want to release the lock, we'd have to repeat the logic in
  * ExecOpenScanRelation in order to figure out what to release.
  * ----------------------------------------------------------------
  */
 void
 ExecCloseScanRelation(Relation scanrel)
 {
 	heap_close(scanrel, NoLock);
 }


 /* ----------------------------------------------------------------
  *				  ExecInsertIndexTuples support
  * ----------------------------------------------------------------
  */

 /* ----------------------------------------------------------------
  *		ExecOpenIndices
  *
  *		Find the indices associated with a result relation, open them,
  *		and save information about them in the result ResultRelInfo.
  *
  *		At entry, caller has already opened and locked
  *		resultRelInfo->ri_RelationDesc.
  * ----------------------------------------------------------------
  */
 void
 ExecOpenIndices(ResultRelInfo *resultRelInfo)
 {
 	Relation	resultRelation = resultRelInfo->ri_RelationDesc;
 	List	   *indexoidlist;
 	ListCell   *l;
 	int			len,
 				i;
 	RelationPtr relationDescs;
 	IndexInfo **indexInfoArray;

 	resultRelInfo->ri_NumIndices = 0;

 	/* fast path if no indexes */
 	if (!RelationGetForm(resultRelation)->relhasindex)
 		return;

 	/*
 	 * Get cached list of index OIDs
 	 */
 	indexoidlist = RelationGetIndexList(resultRelation);
 	len = list_length(indexoidlist);
 	if (len == 0)
 		return;

 	/*
 	 * allocate space for result arrays
 	 */
 	relationDescs = (RelationPtr) palloc(len * sizeof(Relation));
 	indexInfoArray = (IndexInfo **) palloc(len * sizeof(IndexInfo *));

 	resultRelInfo->ri_NumIndices = len;
 	resultRelInfo->ri_IndexRelationDescs = relationDescs;
 	resultRelInfo->ri_IndexRelationInfo = indexInfoArray;

 	/*
 	 * For each index, open the index relation and save pg_index info. We
 	 * acquire RowExclusiveLock, signifying we will update the index.
 	 */
 	i = 0;
 	foreach(l, indexoidlist)
 	{
 		Oid			indexOid = lfirst_oid(l);
 		Relation	indexDesc;
 		IndexInfo  *ii;

 		indexDesc = index_open(indexOid, RowExclusiveLock);

 		/* extract index key information from the index's pg_index info */
 		ii = BuildIndexInfo(indexDesc);

 		relationDescs[i] = indexDesc;
 		indexInfoArray[i] = ii;
 		i++;
 	}

 	list_free(indexoidlist);
 }

 /* ----------------------------------------------------------------
  *		ExecCloseIndices
  *
  *		Close the index relations stored in resultRelInfo
  * ----------------------------------------------------------------
  */
 void
 ExecCloseIndices(ResultRelInfo *resultRelInfo)
 {
 	int			i;
 	int			numIndices;
 	RelationPtr indexDescs;

 	numIndices = resultRelInfo->ri_NumIndices;
 	indexDescs = resultRelInfo->ri_IndexRelationDescs;

 	for (i = 0; i < numIndices; i++)
 	{
 		if (indexDescs[i] == NULL)
 			continue;			/* shouldn't happen? */

 		/* Drop lock acquired by ExecOpenIndices */
 		index_close(indexDescs[i], RowExclusiveLock);
 	}

 	/*
 	 * XXX should free indexInfo array here too?  Currently we assume that
 	 * such stuff will be cleaned up automatically in FreeExecutorState.
 	 */
 }

 /* ----------------------------------------------------------------
  *		ExecInsertIndexTuples
  *
  *		This routine takes care of inserting index tuples
  *		into all the relations indexing the result relation
  *		when a heap tuple is inserted into the result relation.
  *		Much of this code should be moved into the genam
  *		stuff as it only exists here because the genam stuff
  *		doesn't provide the functionality needed by the
  *		executor.. -cim 9/27/89
  * ----------------------------------------------------------------
  */
 void
 ExecInsertIndexTuples(TupleTableSlot *slot,
 					  ItemPointer tupleid,
 					  EState *estate,
 					  bool is_vacuum)
 {
 	ResultRelInfo *resultRelInfo;
 	int			i;
 	int			numIndices;
 	RelationPtr relationDescs;
 	Relation	heapRelation;
 	IndexInfo **indexInfoArray;
 	ExprContext *econtext;
 	Datum		values[INDEX_MAX_KEYS];
 	bool		isnull[INDEX_MAX_KEYS];

 	/*
 	 * Get information from the result relation info structure.
 	 */
 	resultRelInfo = estate->es_result_relation_info;
 	numIndices = resultRelInfo->ri_NumIndices;
 	relationDescs = resultRelInfo->ri_IndexRelationDescs;
 	indexInfoArray = resultRelInfo->ri_IndexRelationInfo;
 	heapRelation = resultRelInfo->ri_RelationDesc;

 	/*
 	 * We will use the EState's per-tuple context for evaluating predicates
 	 * and index expressions (creating it if it's not already there).
 	 */
 	econtext = GetPerTupleExprContext(estate);

 	/* Arrange for econtext's scan tuple to be the tuple under test */
 	econtext->ecxt_scantuple = slot;

 	/*
 	 * for each index, form and insert the index tuple
 	 */
 	for (i = 0; i < numIndices; i++)
 	{
 		IndexInfo  *indexInfo;

 		if (relationDescs[i] == NULL)
 			continue;

 		indexInfo = indexInfoArray[i];

 		/* Check for partial index */
 		if (indexInfo->ii_Predicate != NIL)
 		{
 			List	   *predicate;

 			/*
 			 * If predicate state not set up yet, create it (in the estate's
 			 * per-query context)
 			 */
 			predicate = indexInfo->ii_PredicateState;
 			if (predicate == NIL)
 			{
 				predicate = (List *)
 					ExecPrepareExpr((Expr *) indexInfo->ii_Predicate,
 									estate);
 				indexInfo->ii_PredicateState = predicate;
 			}

 			/* Skip this index-update if the predicate isn't satisfied */
 			if (!ExecQual(predicate, econtext, false))
 				continue;
 		}

 		/*
 		 * FormIndexDatum fills in its values and isnull parameters with the
 		 * appropriate values for the column(s) of the index.
 		 */
 		FormIndexDatum(indexInfo,
 					   slot,
 					   estate,
 					   values,
 					   isnull);

 		/*
 		 * The index AM does the rest.	Note we suppress unique-index checks
 		 * if we are being called from VACUUM, since VACUUM may need to move
 		 * dead tuples that have the same keys as live ones.
 		 */
 		index_insert(relationDescs[i],	/* index relation */
 					 values,	/* array of index Datums */
 					 isnull,	/* null flags */
 					 tupleid,	/* tid of heap tuple */
 					 heapRelation,
 					 relationDescs[i]->rd_index->indisunique && !is_vacuum);

 		/*
 		 * keep track of index inserts for debugging
 		 */
 		IncrIndexInserted();
 	}
 }

 /*
  * UpdateChangedParamSet
  *		Add changed parameters to a plan node's chgParam set
  */
 void
 UpdateChangedParamSet(PlanState *node, Bitmapset *newchg)
 {
 	Bitmapset  *parmset;

 	/*
 	 * The plan node only depends on params listed in its allParam set. Don't
 	 * include anything else into its chgParam set.
 	 */
 	parmset = bms_intersect(node->plan->allParam, newchg);

 	/*
 	 * Keep node->chgParam == NULL if there's not actually any members; this
 	 * allows the simplest possible tests in executor node files.
 	 */
 	if (!bms_is_empty(parmset))
 		node->chgParam = bms_join(node->chgParam, parmset);
 	else
 		bms_free(parmset);
 }

 /*
  * Register a shutdown callback in an ExprContext.
  *
  * Shutdown callbacks will be called (in reverse order of registration)
  * when the ExprContext is deleted or rescanned.  This provides a hook
  * for functions called in the context to do any cleanup needed --- it's
  * particularly useful for functions returning sets.  Note that the
  * callback will *not* be called in the event that execution is aborted
  * by an error.
  */
 void
 RegisterExprContextCallback(ExprContext *econtext,
 							ExprContextCallbackFunction function,
 							Datum arg)
 {
 	ExprContext_CB *ecxt_callback;

 	/* Save the info in appropriate memory context */
 	ecxt_callback = (ExprContext_CB *)
 		MemoryContextAlloc(econtext->ecxt_per_query_memory,
 						   sizeof(ExprContext_CB));

 	ecxt_callback->function = function;
 	ecxt_callback->arg = arg;

 	/* link to front of list for appropriate execution order */
 	ecxt_callback->next = econtext->ecxt_callbacks;
 	econtext->ecxt_callbacks = ecxt_callback;
 }

 /*
  * Deregister a shutdown callback in an ExprContext.
  *
  * Any list entries matching the function and arg will be removed.
  * This can be used if it's no longer necessary to call the callback.
  */
 void
 UnregisterExprContextCallback(ExprContext *econtext,
 							  ExprContextCallbackFunction function,
 							  Datum arg)
 {
 	ExprContext_CB **prev_callback;
 	ExprContext_CB *ecxt_callback;

 	prev_callback = &econtext->ecxt_callbacks;

 	while ((ecxt_callback = *prev_callback) != NULL)
 	{
 		if (ecxt_callback->function == function && ecxt_callback->arg == arg)
 		{
 			*prev_callback = ecxt_callback->next;
 			pfree(ecxt_callback);
 		}
 		else
 			prev_callback = &ecxt_callback->next;
 	}
 }

 /*
  * Call all the shutdown callbacks registered in an ExprContext.
  *
  * The callback list is emptied (important in case this is only a rescan
  * reset, and not deletion of the ExprContext).
  */
 static void
 ShutdownExprContext(ExprContext *econtext)
 {
 	ExprContext_CB *ecxt_callback;
 	MemoryContext oldcontext;

 	/* Fast path in normal case where there's nothing to do. */
 	if (econtext->ecxt_callbacks == NULL)
 		return;

 	/*
 	 * Call the callbacks in econtext's per-tuple context.  This ensures that
 	 * any memory they might leak will get cleaned up.
 	 */
 	oldcontext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory);

 	/*
 	 * Call each callback function in reverse registration order.
 	 */
 	while ((ecxt_callback = econtext->ecxt_callbacks) != NULL)
 	{
 		econtext->ecxt_callbacks = ecxt_callback->next;
 		(*ecxt_callback->function) (ecxt_callback->arg);
 		pfree(ecxt_callback);
 	}

 	MemoryContextSwitchTo(oldcontext);
 }


 /* ---------------------------------------------------------------
  * 		Share Input utilities
  * ---------------------------------------------------------------
  */
 ShareNodeEntry *
 ExecGetShareNodeEntry(EState* estate, int shareidx, bool fCreate)
 {
 	Assert(shareidx >= 0);
 	Assert(estate->es_sharenode != NULL);

 	if(!fCreate)
 	{
 		if(shareidx >= list_length(*estate->es_sharenode))
 			return NULL;
 	}
 	else
 	{
 		while(list_length(*estate->es_sharenode) <= shareidx)
 		{
 			ShareNodeEntry *n = makeNode(ShareNodeEntry);
 			n->sharePlan = NULL;
 			n->shareState = NULL;

 			*estate->es_sharenode = lappend(*estate->es_sharenode, n);
 		}
 	}

 	return (ShareNodeEntry *) list_nth(*estate->es_sharenode, shareidx);
 }

 /* ----------------------------------------------------------------
  *		CDB Slice Table utilities
  * ----------------------------------------------------------------
  */

 static bool
 is1GangSlice(Slice *slice, int numSegmentsInCluster)
 {
 	return slice->gangSize == 1 && numSegmentsInCluster != 1;
 }

 /* Attach a slice table to the given Estate structure.	It should
  * consist of blank slices, one for the root plan, one for each
  * Motion node (which roots a slice with a send node), and one for
  * each subplan (which acts as an initplan root node).
  */
 void
 InitSliceTable(EState *estate, int nMotions, int nSubplans)
 {
 	SliceTable *table;
 	Slice	   *slice;
 	int			i,
 				n;
 	MemoryContext oldcontext;

 	oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);

 	table = makeNode(SliceTable);
 	table->nMotions = nMotions;
 	table->nInitPlans = nSubplans;
 	table->slices = NIL;
     table->doInstrument = false;

 	/* Each slice table has a unique-id. */
 	table->ic_instance_id = ++gp_interconnect_id;

 	n = 1 + nMotions + nSubplans;
 	for (i = 0; i < n; i++)
 	{
 		slice = makeNode(Slice);

 		slice->sliceIndex = i;
         slice->rootIndex = (i > 0 && i <= nMotions) ? -1 : i;
 		slice->gangType = GANGTYPE_UNALLOCATED;
 		slice->gangSize = 0;
 		slice->numGangMembersToBeActive = 0;
 		slice->directDispatch.isDirectDispatch = false;
 		slice->directDispatch.contentIds = NIL;
 		slice->primary_gang_id = 0;
 		slice->parentIndex = -1;
 		slice->is_writer = false;
 		slice->children = NIL;
 		slice->primaryProcesses = NIL;

 		table->slices = lappend(table->slices, slice);
 	}

 	estate->es_sliceTable = table;

 	MemoryContextSwitchTo(oldcontext);
 }

 /*
  * A forgiving slice table indexer that returns the indexed Slice* or NULL
  */
 Slice *
 getCurrentSlice(EState *estate, int sliceIndex)
 {
 	SliceTable *sliceTable = estate->es_sliceTable;

     if (sliceTable &&
         sliceIndex >= 0 &&
         sliceIndex < list_length(sliceTable->slices))
 	    return (Slice *)list_nth(sliceTable->slices, sliceIndex);

     return NULL;
 }

 /* Should the slice run on the QD?
  *
  * N.B. Not the same as !sliceRunsOnQE(slice), when slice is NULL.
  */
 bool
 sliceRunsOnQD(Slice * slice)
 {
 	return (slice != NULL && slice->gangType == GANGTYPE_UNALLOCATED);
 }


 /* Should the slice run on a QE?
  *
  * N.B. Not the same as !sliceRunsOnQD(slice), when slice is NULL.
  */
 bool
 sliceRunsOnQE(Slice * slice)
 {
 	return (slice != NULL && slice->gangType != GANGTYPE_UNALLOCATED);
 }

 /**
  * Calculate the number of sending processes that should in be a slice.
  */
 int
 sliceCalculateNumSendingProcesses(Slice *slice, int numSegmentsInCluster)
 {
 	switch(slice->gangType)
 	{
 		case GANGTYPE_UNALLOCATED:
 			return 0; /* does not send */

 		case GANGTYPE_ENTRYDB_READER:
 			return 1; /* on master */

 		case GANGTYPE_PRIMARY_WRITER:
 			return 0; /* writers don't send */

 		case GANGTYPE_PRIMARY_READER:
 			if ( is1GangSlice(slice, numSegmentsInCluster))
 				return 1;
 			else if ( slice->directDispatch.isDirectDispatch)
 				return list_length(slice->directDispatch.contentIds);
 			else return numSegmentsInCluster;

 		default:
 			Insist(false);
 			return -1;
 	}
 }

 /* Assign gang descriptions to the root slices of the slice forest.
  *
  * The root slices of initPlan slice trees will always run on the QD,
  * which, for the time being, we represent as
  *
  *	  (gangType, gangSize) = <GANGTYPE_UNALLOCATED, 0>.
  *
  * The root slice of the main plan wil run on the QD in case it's a
  * SELECT,	but will run on QE(s) in case it's an INSERT, UPDATE, or
  * DELETE. Because we restrict UPDATE and DELETE to have no motions
  * (i.e., one slice) and because INSERT must always route tuples,
  * the assigment for these will be primary and mirror writer gangs,
  * which we represent as
  *
  *	  (gangType, gangSize) =  <GANGTYPE_PRIMARY_WRITER, N>
  */
 void
 InitRootSlices(QueryDesc *queryDesc, int numSegmentsInCluster)
 {
 	EState	   *estate = queryDesc->estate;
 	SliceTable *sliceTable = estate->es_sliceTable;
 	ListCell   *cell;
 	Slice	   *slice;
 	int			i;

 	foreach(cell, sliceTable->slices)
 	{
 		slice = (Slice *) lfirst(cell);
 		i = slice->sliceIndex;
 		if (i == 0)
 		{
 			/* Main plan root slice */
 			switch (queryDesc->operation)
 			{
 				case CMD_SELECT:
 					Assert(slice->gangType == GANGTYPE_UNALLOCATED && slice->gangSize == 0);
 					if (queryDesc->plannedstmt->intoClause != NULL)
 					{
 						slice->gangType = GANGTYPE_PRIMARY_WRITER;
 						slice->gangSize = numSegmentsInCluster;
 						slice->numGangMembersToBeActive = sliceCalculateNumSendingProcesses(slice, numSegmentsInCluster);
 					}
 					break;

 				case CMD_INSERT:
 				case CMD_UPDATE:
 				case CMD_DELETE:
 				{
 					/* if updating a master-only table: do not dispatch to segments */
 					List *resultRelations = queryDesc->plannedstmt->resultRelations;
 					Assert(list_length(resultRelations) > 0);
 					int idx = list_nth_int(resultRelations, 0);
 					Assert (idx > 0);
 					Oid reloid = getrelid(idx, queryDesc->plannedstmt->rtable);
 					if (!isMasterOnly(reloid))
 					{
 						slice->gangType = GANGTYPE_PRIMARY_WRITER;
 						slice->gangSize = numSegmentsInCluster;
 						slice->numGangMembersToBeActive = sliceCalculateNumSendingProcesses(slice, numSegmentsInCluster);
 			        }
 			        /* else: result relation is master-only, so top slice should run on the QD and not be dispatched */
 					break;
 				}
 				default:
 					Assert(FALSE);
 			}
 		}
 		if (i <= sliceTable->nMotions)
 		{
 			/* Non-root slice */
 			continue;
 		}
 		else
 		{
 			/* InitPlan root slice */
 			Assert(slice->gangType == GANGTYPE_UNALLOCATED && slice->gangSize == 0);
 		}
 	}
 }

 /*
  * Context for AssignGangs() and helper functions.
  */
 typedef struct SliceReq
 {
 	int			numNgangs;
 	int			num1gangs_primary_reader;
     int         num1gangs_entrydb_reader;
 	int			nxtNgang;
 	int			nxt1gang_primary_reader;
 	int			nxt1gang_entrydb_reader;
 	Gang	  **vecNgangs;
 	Gang	  **vec1gangs_primary_reader;
 	Gang	  **vec1gangs_entrydb_reader;
 	bool		writer;

 }	SliceReq;

 /* Forward declarations */
 static void InitSliceReq(SliceReq * req);
 static void AccumSliceReq(SliceReq * inv, SliceReq * req);
 static void InventorySliceTree(Slice ** sliceMap, int sliceIndex, SliceReq * req, int numSegmentsInCluster);
 static void AssociateSlicesToProcesses(Slice ** sliceMap, int sliceIndex, SliceReq * req, int numSegmentsInCluster);


 /* Function AssignGangs runs on the QD and finishes construction of the
  * global slice table for a plan by assigning gangs allocated by the
  * executor factory to the slices of the slice table.
  *
  * On entry, the slice table (at queryDesc->estate->es_sliceTable) has
  * the correct structure (established by InitSliceTable) and has correct
  * gang types (established by function InitRootSlices).
  *
  * Gang assignment involves taking an inventory of the requirements of
  * each slice tree in the slice table, asking the executor factory to
  * allocate a minimal set of gangs that can satisfy any of the slice trees,
  * and associating the allocated gangs with slices in the slice table.
  *
  * The argument utility_segment_index is the segment index to use for
  * 1-gangs that run on QEs.
  *
  * TODO Currently (July 2005) this argument is always supplied as 0, but
  *		there are no cases of the planner specifying a fixed Motion to a
  *		QE, so we don't know the case works.
  *
  * On successful exit, the CDBProcess lists (primaryProcesses, mirrorProcesses)
  * and the Gang pointers (primaryGang, mirrorGang) are set correctly in each
  * slice in the slice table.
  */
 void
 AssignGangs(QueryDesc *queryDesc, int utility_segment_index)
 {
 	EState	   *estate = queryDesc->estate;
 	SliceTable *sliceTable = estate->es_sliceTable;
 	ListCell   *cell;
 	Slice	   *slice;
 	int			i,
 				nslices;
 	Slice	  **sliceMap;
 	SliceReq	req,
 				inv;

 	/* Make a map so we can access slices quickly by index.  */
 	nslices = list_length(sliceTable->slices);
 	sliceMap = (Slice **) palloc(nslices * sizeof(Slice *));
 	i = 0;
 	foreach(cell, sliceTable->slices)
 	{
 		slice = (Slice *) lfirst(cell);
 		Assert(i == slice->sliceIndex);
 		sliceMap[i] = slice;
 		i++;
 	}

 	/* Initialize gang requirement inventory */
 	InitSliceReq(&inv);

 	/* Capture main slice tree requirement. */
 	InventorySliceTree(sliceMap, 0, &inv, queryDesc->planner_segments);

 	/* Capture initPlan slice tree requirements. */
 	for (i = sliceTable->nMotions + 1; i < nslices; i++)
 	{
 		InitSliceReq(&req);
 		InventorySliceTree(sliceMap, i, &req, queryDesc->planner_segments);
 		AccumSliceReq(&inv, &req);
 	}

 	/*
 	 * Get the gangs we'll use.
 	 *
 	 * As a general rule the first gang is a writer and the rest are readers.
 	 * If this happens to be an extended query protocol then all gangs are readers.
 	 */
 	if (inv.numNgangs > 0)
 	{
 		inv.vecNgangs = (Gang **) palloc(sizeof(Gang *) * inv.numNgangs);
 		for (i = 0; i < inv.numNgangs; i++)
 		{
 			if (i == 0 && !queryDesc->extended_query)
 			{
 				inv.vecNgangs[i] = (void *)1;//allocateWriterGang();

 				Assert(inv.vecNgangs[i] != NULL);
 			}
 			else
 			{
 				inv.vecNgangs[i] = NULL;//allocateGang(GANGTYPE_PRIMARY_READER, getgpsegmentCount(), GpAliveSegmentsInfo.singleton_segindex, queryDesc->portal_name);
 			}
 		}
 	}
 	if (inv.num1gangs_primary_reader > 0)
 	{
 		inv.vec1gangs_primary_reader = (Gang **) palloc(sizeof(Gang *) * inv.num1gangs_primary_reader);
 		for (i = 0; i < inv.num1gangs_primary_reader; i++)
 		{
 			inv.vec1gangs_primary_reader[i] = NULL;//allocateGang(GANGTYPE_PRIMARY_READER, 1, utility_segment_index, queryDesc->portal_name);
 		}
 	}
 	if (inv.num1gangs_entrydb_reader > 0)
 	{
 		inv.vec1gangs_entrydb_reader = (Gang **) palloc(sizeof(Gang *) * inv.num1gangs_entrydb_reader);
 		for (i = 0; i < inv.num1gangs_entrydb_reader; i++)
 		{
 			inv.vec1gangs_entrydb_reader[i] = NULL;//allocateGang(GANGTYPE_ENTRYDB_READER, 1, -1, queryDesc->portal_name);
 		}
 	}

 	/* Use the gangs to construct the CdbProcess lists in slices. */

 	inv.nxtNgang = 0;
   inv.nxt1gang_primary_reader = 0;
   inv.nxt1gang_entrydb_reader = 0;
 	AssociateSlicesToProcesses(sliceMap, 0, &inv, queryDesc->planner_segments);		/* Main tree. */

 	for (i = sliceTable->nMotions + 1; i < nslices; i++)
 	{
 		inv.nxtNgang = 0;
         inv.nxt1gang_primary_reader = 0;
         inv.nxt1gang_entrydb_reader = 0;
 		AssociateSlicesToProcesses(sliceMap, i, &inv, queryDesc->planner_segments);	/* An initPlan */
 	}

 	/* Clean up */
 	pfree(sliceMap);
 	if (inv.vecNgangs != NULL)
 		pfree(inv.vecNgangs);
 	if (inv.vec1gangs_primary_reader != NULL)
 		pfree(inv.vec1gangs_primary_reader);
 	if (inv.vec1gangs_entrydb_reader != NULL)
 		pfree(inv.vec1gangs_entrydb_reader);

 }

 void
 InitSliceReq(SliceReq * req)
 {
 	req->numNgangs = 0;
     req->num1gangs_primary_reader = 0;
     req->num1gangs_entrydb_reader = 0;
 	req->writer = FALSE;
 	req->vecNgangs = NULL;
 	req->vec1gangs_primary_reader = NULL;
 	req->vec1gangs_entrydb_reader = NULL;
 }

 void
 AccumSliceReq(SliceReq * inv, SliceReq * req)
 {
 	inv->numNgangs = Max(inv->numNgangs, req->numNgangs);
 	inv->num1gangs_primary_reader = Max(inv->num1gangs_primary_reader, req->num1gangs_primary_reader);
 	inv->num1gangs_entrydb_reader = Max(inv->num1gangs_entrydb_reader, req->num1gangs_entrydb_reader);
 	inv->writer = (inv->writer || req->writer);
 }


 /*
  * Helper for AssignGangs takes a simple inventory of the gangs required
  * by a slice tree.  Recursive.  Closely coupled with AssignGangs.	Not
  * generally useful.
  */
 void
 InventorySliceTree(Slice ** sliceMap, int sliceIndex, SliceReq * req,
     int numSegmentsInCluster)
 {
 	ListCell   *cell;
 	int			childIndex;
 	Slice	   *slice = sliceMap[sliceIndex];

 	switch (slice->gangType)
 	{
 		case GANGTYPE_UNALLOCATED:
 			/* Roots that run on the  QD don't need a gang. */
 			break;

 		case GANGTYPE_ENTRYDB_READER:
             Assert(slice->gangSize == 1);
 			req->num1gangs_entrydb_reader++;
 			break;

 		case GANGTYPE_PRIMARY_WRITER:
 			req->writer = TRUE;
 			/* fall through */
 		case GANGTYPE_PRIMARY_READER:
 			if (is1GangSlice(slice, numSegmentsInCluster))
 			{
 				req->num1gangs_primary_reader++;
 			}
 			else
 			{
 				Assert(slice->gangSize == numSegmentsInCluster);
 				req->numNgangs++;
 			}
 			break;
 	}

 	foreach(cell, slice->children)
 	{
 		childIndex = lfirst_int(cell);
 		InventorySliceTree(sliceMap, childIndex, req, numSegmentsInCluster);
 	}
 }

 /*
  * Helper for AssignGangs uses the gangs in the inventory to fill in the
  * CdbProcess lists in the slice tree.	Recursive.	Closely coupled with
  * AssignGangs.  Not generally useful.
  */
 void
 AssociateSlicesToProcesses(Slice ** sliceMap, int sliceIndex, SliceReq * req, int numSegmentsInCluster)
 {
 	ListCell   *cell;
 	int			childIndex;
 	Slice	   *slice = sliceMap[sliceIndex];

 	switch (slice->gangType)
 	{
 		case GANGTYPE_UNALLOCATED:
 			/* Roots that run on the  QD don't need a gang. */

 			slice->primaryProcesses = getCdbProcessesForQD(true);
 			break;

 		case GANGTYPE_ENTRYDB_READER:
 			req->nxt1gang_entrydb_reader++;
 			break;

 		case GANGTYPE_PRIMARY_WRITER:
 			slice->is_writer = req->vecNgangs[req->nxtNgang] == (void *)1;
 			req->nxtNgang++;
 			break;

 		case GANGTYPE_PRIMARY_READER:
 			if (is1GangSlice(slice, numSegmentsInCluster))
 			{
 				req->nxt1gang_primary_reader++;
 			}
 			else
 			{
 				slice->is_writer = req->vecNgangs[req->nxtNgang] == (void *)1;
 				req->nxtNgang++;
 			}
 			break;
 	}

 	foreach(cell, slice->children)
 	{
 		childIndex = lfirst_int(cell);
 		AssociateSlicesToProcesses(sliceMap, childIndex, req, numSegmentsInCluster);
 	}
 }

 /*
  * Choose the execution identity (who does this executor serve?).
  * There are types:
  *
  * 1. No-Op (ignore) -- this occurs when the specified direction is
  *	 NoMovementScanDirection or when Gp_role is GP_ROLE_DISPATCH
  *	 and the current slice belongs to a QE.
  *
  * 2. Executor serves a Root Slice -- this occurs when Gp_role is
  *   GP_ROLE_UTILITY or the current slice is a root.  It corresponds
  *   to the "normal" path through the executor in that we enter the plan
  *   at the top and count on the motion nodes at the fringe of the top
  *   slice to return without ever calling nodes below them.
  *
  * 3. Executor serves a Non-Root Slice on a QE -- this occurs when
  *   Gp_role is GP_ROLE_EXECUTE and the current slice is not a root
  *   slice. It corresponds to a QE running a slice with a motion node on
  *	 top.  The call, thus, returns no tuples (since they all go out
  *	 on the interconnect to the receiver version of the motion node),
  *	 but it does execute the indicated slice down to any fringe
  *	 motion nodes (as in case 2).
  */
 GpExecIdentity
 getGpExecIdentity(QueryDesc *queryDesc,
 				  ScanDirection direction,
 				  EState	   *estate)
 {
 	Slice *currentSlice;

 	currentSlice = getCurrentSlice(estate, LocallyExecutingSliceIndex(estate));
 	if (currentSlice)
     {
         if (Gp_role == GP_ROLE_EXECUTE ||
             sliceRunsOnQD(currentSlice))
             currentSliceId = currentSlice->sliceIndex;
     }

 	/* select the strategy */
 	if (direction == NoMovementScanDirection)
 	{
 		return GP_IGNORE;
 	}
 	else if (Gp_role == GP_ROLE_DISPATCH && sliceRunsOnQE(currentSlice))
 	{
 		return GP_IGNORE;
 	}
 	else if (Gp_role == GP_ROLE_EXECUTE && LocallyExecutingSliceIndex(estate) != RootSliceIndex(estate))
 	{
 		return GP_NON_ROOT_ON_QE;
 	}
 	else
 	{
 		return GP_ROOT_SLICE;
 	}
 }

 /*
  * End the gp-specific part of the executor.
  *
  * In here we collect the dispatch results if there are any, tear
  * down the interconnect if it is set-up.
  */
 void mppExecutorFinishup(QueryDesc *queryDesc)
 {
 	EState	   *estate;
 	Slice      *currentSlice;

 	/* caller must have switched into per-query memory context already */
 	estate = queryDesc->estate;

 	currentSlice = getCurrentSlice(estate, LocallyExecutingSliceIndex(estate));

 	/*
 	 * If QD, wait for QEs to finish and check their results.
 	 */
 	if (estate->dispatch_data && dispatch_get_results(estate->dispatch_data))
   {
     CdbDispatchResults *pr = dispatch_get_results(estate->dispatch_data);
     /*
      * If we are finishing a query before all the tuples of the query
      * plan were fetched we must call ExecSquelchNode before checking
      * the dispatch results in order to tell the nodes below we no longer
      * need any more tuples.
      */
     if (!estate->es_got_eos)
     {
       ExecSquelchNode(queryDesc->planstate);
     }

     /*
      * Wait for completion of all QEs.
      */
     dispatch_wait(estate->dispatch_data, false); /*estate->terminateOnGoing);*/

     /* If top slice was delegated to QEs, get num of rows processed. */
     if (sliceRunsOnQE(currentSlice))
     {
       estate->es_processed +=
         cdbdisp_sumCmdTuples(pr, LocallyExecutingSliceIndex(estate));
       estate->es_lastoid =
         cdbdisp_maxLastOid(pr, LocallyExecutingSliceIndex(estate));
     }

     cdbdisp_handleModifiedCatalogOnSegments(pr, UpdateCatalogModifiedOnSegments);

     /* sum up rejected rows if any (single row error handling only) */
     cdbdisp_sumRejectedRows(pr);

     /*
      * Check and free the results of all gangs. If any QE had an
      * error, report it and exit to our error handler via PG_THROW.
      * NB: This call doesn't wait, because we already waited above.
      */
     dispatch_cleanup(estate->dispatch_data);
     estate->dispatch_data = NULL;
   }
 	else if (estate->mainDispatchData && mainDispatchGetResults(estate->mainDispatchData))
 	{
 		CdbDispatchResults *pr = mainDispatchGetResults(estate->mainDispatchData);
 		/*
 		 * If we are finishing a query before all the tuples of the query
 		 * plan were fetched we must call ExecSquelchNode before checking
 		 * the dispatch results in order to tell the nodes below we no longer
 		 * need any more tuples.
 		 */
 		if (!estate->es_got_eos)
 		{
 			ExecSquelchNode(queryDesc->planstate);
 		}
 		/*
 		 * Wait for completion of all QEs.
 		 */
 		mainDispatchWait(estate->mainDispatchData, false);

 		/* If top slice was delegated to QEs, get num of rows processed. */
 		if (sliceRunsOnQE(currentSlice))
 		{
 			estate->es_processed +=
 				cdbdisp_sumCmdTuples(pr, LocallyExecutingSliceIndex(estate));
 			estate->es_lastoid =
 				cdbdisp_maxLastOid(pr, LocallyExecutingSliceIndex(estate));
 		}

     if (estate->es_plannedstmt->commandType == CMD_UPDATE ||
         estate->es_plannedstmt->commandType == CMD_DELETE) {
       cdbdisp_handleModifiedCatalogOnSegmentsForUD(
           pr, &estate->es_plannedstmt->relFileNodeInfo,
           UpdateCatalogModifiedOnSegmentsForUD,
           UpdateCatalogIndexForUD);
       if (!mainDispatchHasError(estate->mainDispatchData) &&
           list_length(estate->es_plannedstmt->relFileNodeInfo) != 0)
         elog(ERROR, "failed to update catalog due to code bug");
     } else {
       cdbdisp_handleModifiedCatalogOnSegments(
           pr, UpdateCatalogModifiedOnSegments);
     }

                 /* sum up rejected rows if any (single row error handling only) */
 		cdbdisp_sumRejectedRows(pr);

 		/*
 		 * Check and free the results of all gangs. If any QE had an
 		 * error, report it and exit to our error handler via PG_THROW.
 		 * NB: This call doesn't wait, because we already waited above.
 		 */
 		mainDispatchCleanUp(&estate->mainDispatchData);
 	}

 	/* Teardown the Interconnect */
 	if (estate->es_interconnect_is_setup)
 	{
 		/*
 		 * MPP-3413: If we got here during cancellation of a cursor,
 		 * we need to set the "forceEos" argument correctly --
 		 * otherwise we potentially hang (cursors cancel on the QEs,
 		 * mark the estate to "cancelUnfinished" and then try to do a
 		 * normal interconnect teardown).
 		 */
 		TeardownInterconnect(estate->interconnect_context, estate->motionlayer_context, estate->cancelUnfinished);
 		estate->es_interconnect_is_setup = false;
 	} else if (queryDesc->newPlan) {
 	  teardownNewInterconnect();
 	}
 }

 /*
  * Cleanup the gp-specific parts of the query executor.
  *
  * Will normally be called after an error from within a CATCH block.
  */
 void mppExecutorCleanup(QueryDesc *queryDesc)
 {
 	EState	   *estate;

 	/* caller must have switched into per-query memory context already */
 	estate = queryDesc->estate;

 	/*
 	 * If this query is being canceled, record that when the gpperfmon
 	 * is enabled.
 	 */
 	if (gp_enable_gpperfmon &&
 		Gp_role == GP_ROLE_DISPATCH &&
 		queryDesc->gpmon_pkt &&
 		QueryCancelCleanup)
 	{
 		gpmon_qlog_query_canceling(queryDesc->gpmon_pkt);

 		if (gp_cancel_query_print_log)
 		{
 			elog(LOG, "canceling query (%d, %d)",
 				 queryDesc->gpmon_pkt->u.qlog.key.ssid,
 				 queryDesc->gpmon_pkt->u.qlog.key.ccnt);
 		}
 	}

 	/*
 	 * Delaying the cancellation for a specified time.
 	 */
 	if (Gp_role == GP_ROLE_DISPATCH &&
 		QueryCancelCleanup &&
 		gp_cancel_query_delay_time > 0)
 	{
 		pg_usleep(gp_cancel_query_delay_time * 1000);
 	}

 	/*
 	 * Request any commands still executing on qExecs to stop.
 	 * Wait for them to finish and clean up the dispatching structures.
 	 * Replace current error info with QE error info if more interesting.
 	 */
 	if (estate->dispatch_data)
 	{
 		/*
 		 * If we are finishing a query before all the tuples of the query
 		 * plan were fetched we must call ExecSquelchNode before checking
 		 * the dispatch results in order to tell the nodes below we no longer
 		 * need any more tuples.
 		 */
 		if (estate->es_interconnect_is_setup && !estate->es_got_eos)
 			ExecSquelchNode(queryDesc->planstate);

 		dispatch_catch_error(estate->dispatch_data);
 		estate->dispatch_data = NULL;
 	}
 	else if (estate->mainDispatchData) {
 	  if (estate->es_interconnect_is_setup && !estate->es_got_eos)
 	    ExecSquelchNode(queryDesc->planstate);
 	  mainDispatchCatchError(&estate->mainDispatchData);
 	}

 	/* Clean up the interconnect. */
 	if (estate->es_interconnect_is_setup)
 	{
 		TeardownInterconnect(estate->interconnect_context, estate->motionlayer_context, true /* force EOS */);
 		estate->es_interconnect_is_setup = false;
 	} else if (queryDesc->newPlan) {
 	  teardownNewInterconnect();
 	}

 	/**
 	 * Perfmon related stuff.
 	 */
 	if (gp_enable_gpperfmon
 			&& Gp_role == GP_ROLE_DISPATCH
 			&& queryDesc->gpmon_pkt)
 	{
 		gpmon_qlog_query_error(queryDesc->gpmon_pkt);
 		pfree(queryDesc->gpmon_pkt);
 		queryDesc->gpmon_pkt = NULL;
 	}

 	/* Workfile manager per-query resource accounting */
 	WorkfileQueryspace_ReleaseEntry();

 	ReportOOMConsumption();

 	/**
 	 * Since there was an error, clean up the function scan stack.
 	 */
 	SPI_InitMemoryReservation();
 }

 void
 initGpmonPktForDefunctOperators(Plan *planNode, gpmon_packet_t *gpmon_pkt, EState *estate)
 {
 	Assert(IsA(planNode, SeqScan) ||
 		   IsA(planNode, AppendOnlyScan) ||
            IsA(planNode, ParquetScan));
 	insist_log(false, "SeqScan/AppendOnlyScan are defunct");
 }

 /*
  * The funcion pointers to init gpmon package for each plan node.
  * The order of the function pointers are the same as the one defined in
  * NodeTag (nodes.h).
  */
 void (*initGpmonPktFuncs[])(Plan *planNode, gpmon_packet_t *gpmon_pkt, EState *estate) =
 {
 	&initGpmonPktForResult, /* T_Result */
 	&initGpmonPktForAppend, /* T_Append */
 	&initGpmonPktForSequence, /* T_Sequence */
 	&initGpmonPktForBitmapAnd, /* T_BitmapAnd */
 	&initGpmonPktForBitmapOr, /* T_BitmapOr */
 	&initGpmonPktForDefunctOperators, /* T_SeqScan */
 	&initGpmonPktForExternalScan, /* T_ExternalScan */
 	&initGpmonPktForDefunctOperators, /* T_AppendOnlyScan */
 	&initGpmonPktForTableScan, /* T_TableScan */
 	&initGpmonPktForDynamicTableScan, /* reserved for T_DynamicTableScan */
 	&initGpmonPktForIndexScan, /* T_IndexScan */
 	&initGpmonPktForDynamicIndexScan, /* T_DynamicIndexScan */
 	&initGpmonPktForBitmapIndexScan, /* T_BitmapIndexScan */
 	&initGpmonPktForBitmapHeapScan, /* T_BitmapHeapScan */
 	&initGpmonPktForBitmapTableScan, /* T_BitmapTableScan */
 	&initGpmonPktForTidScan, /* T_TidScan */
 	&initGpmonPktForSubqueryScan, /* T_SubqueryScan */
 	&initGpmonPktForFunctionScan, /*  T_FunctionScan */
 	&initGpmonPktForValuesScan, /* T_ValuesScan */
 	&initGpmonPktForNestLoop, /* T_NestLoop */
 	&initGpmonPktForMergeJoin, /* T_MergeJoin */
 	&initGpmonPktForHashJoin, /* T_HashJoin */
 	&initGpmonPktForMaterial, /* T_Material */
 	&initGpmonPktForSort, /* T_Sort */
 	&initGpmonPktForAgg, /* T_Agg */
 	&initGpmonPktForUnique, /* T_Unique */
 	&initGpmonPktForHash, /* T_Hash */
 	&initGpmonPktForSetOp, /* T_SetOp */
 	&initGpmonPktForLimit, /* T_Limit */
 	&initGpmonPktForMotion, /* T_Motion */
 	&initGpmonPktForShareInputScan, /* T_ShareInputScan */
 	&initGpmonPktForWindow, /* T_Window */
 	&initGpmonPktForRepeat /* T_Repeat */
 	/* T_Plan_End */
 };

 /*
  * Define a compile assert so that when a new executor node is added,
  * this assert will fire up, and the proper change will be made to
  * the above initGpmonPktFuncs array.
  */
 typedef char assertion_failed_initGpmonPktFuncs \
 	[((T_Plan_End - T_Plan_Start) == \
 	  (sizeof(initGpmonPktFuncs) / sizeof(&initGpmonPktForResult))) - 1];


 /*
  * sendInitGpmonPkts -- Send init Gpmon package for the node and its child
  * nodes that are running on the same slice of the given node.
  *
  * This function is only used by the Append executor node, since Append does
  * not call ExecInitNode() for each of its child nodes during initialization
  * time. However, Gpmon requires each node to be initialized to show the
  * whole plan tree.
  */
 void
 sendInitGpmonPkts(Plan *node, EState *estate)
 {
 	gpmon_packet_t gpmon_pkt;

 	if (node == NULL)
 		return;

 	switch (nodeTag(node))
 	{
 		case T_Append:
 		{
 			int first_plan, last_plan;
 			Append *appendnode = (Append *)node;

 			initGpmonPktForAppend(node, &gpmon_pkt, estate);

 			if (appendnode->isTarget && estate->es_evTuple != NULL)
 			{
 				first_plan = estate->es_result_relation_info - estate->es_result_relations;
 				Assert(first_plan >= 0 && first_plan < list_length(appendnode->appendplans));
 				last_plan = first_plan;
 			}
 			else
 			{
 				first_plan = 0;
 				last_plan = list_length(appendnode->appendplans) - 1;
 			}

 			for (; first_plan <= last_plan; first_plan++)
 			{
 				Plan *initNode = (Plan *)list_nth(appendnode->appendplans, first_plan);

 				sendInitGpmonPkts(initNode, estate);
 			}

 			break;
 		}

 		case T_Sequence:
 		{
 			Sequence *sequence = (Sequence *)node;

 			ListCell *lc;
 			foreach (lc, sequence->subplans)
 			{
 				Plan *subplan = (Plan *)lfirst(lc);

 				sendInitGpmonPkts(subplan, estate);
 			}
 			break;
 		}

 		case T_BitmapAnd:
 		{
 			ListCell *lc;

 			initGpmonPktForBitmapAnd(node, &gpmon_pkt, estate);
 			foreach (lc, ((BitmapAnd*)node)->bitmapplans)
 			{
 				sendInitGpmonPkts((Plan *)lfirst(lc), estate);
 			}

 			break;
 		}

 		case T_BitmapOr:
 		{
 			ListCell *lc;

 			initGpmonPktForBitmapOr(node, &gpmon_pkt, estate);
 			foreach (lc, ((BitmapOr*)node)->bitmapplans)
 			{
 				sendInitGpmonPkts((Plan *)lfirst(lc), estate);
 			}

 			break;
 		}

 		case T_SeqScan:
 		case T_AppendOnlyScan:
 		case T_DynamicTableScan:
 		case T_ParquetScan:
 		case T_ExternalScan:
 		case T_IndexScan:
 		case T_OrcIndexScan:
 		case T_OrcIndexOnlyScan:
 		case T_BitmapIndexScan:
 		case T_TidScan:
 		case T_FunctionScan:
 		case T_ValuesScan:
 		case T_MagmaIndexScan:
 		case T_MagmaIndexOnlyScan:
 		{
 			initGpmonPktFuncs[nodeTag(node) - T_Plan_Start](node, &gpmon_pkt, estate);
 			break;
 		}

 		case T_Result:
 		case T_BitmapHeapScan:
 		case T_BitmapTableScan:
 		case T_ShareInputScan:
 		case T_Material:
 		case T_Sort:
 		case T_Agg:
 		case T_Window:
 		case T_Unique:
 		case T_Hash:
 		case T_SetOp:
 		case T_Limit:
 		case T_Repeat:
 		{
 			initGpmonPktFuncs[nodeTag(node) - T_Plan_Start](node, &gpmon_pkt, estate);
 			sendInitGpmonPkts(outerPlan(node), estate);

 			break;
 		}

 		case T_SubqueryScan:
 		{
 			/**
 			 * Recurse into subqueryscan node's subplan.
 			 */
 			sendInitGpmonPkts(((SubqueryScan *)node)->subplan, estate);
 			break;
 		}

 		case T_NestLoop:
 		case T_MergeJoin:
 		case T_HashJoin:
 		{
 			initGpmonPktFuncs[nodeTag(node) - T_Plan_Start](node, &gpmon_pkt, estate);
 			sendInitGpmonPkts(outerPlan(node), estate);
 			sendInitGpmonPkts(innerPlan(node), estate);

 			break;
 		}

 		case T_Motion:
 			/*
 			 * Do not need to send init package since Motion node is always initialized
 			 * Since all nodes under Motion are running on a different slice, we stop
 			 * here.
 			 */
 			break;

 		default:
 			elog(ERROR, "unrecognized node type: %d", (int) nodeTag(node));
 			break;
 	}
 }

 void ResetExprContext(ExprContext *econtext)
 {
 	MemoryContext memctxt = econtext->ecxt_per_tuple_memory;
 	if(memctxt->allBytesAlloc - memctxt->allBytesFreed > 50000)
 		MemoryContextReset(memctxt);
 }

 /**
  * This method is used to determine how much memory a specific operator
  * is supposed to use (in KB).
  */
 uint64 PlanStateOperatorMemKB(const PlanState *ps)
 {
 	Assert(ps);
 	Assert(ps->plan);
 	uint64 result = 0;
 	if (ps->plan->operatorMemKB == 0)
 	{
 		/**
 		 * There are some statements that do not go through the resource queue and these
 		 * plans dont get decorated with the operatorMemKB. Someday, we should fix resource queues.
 		 */
 		result = work_mem;
 	}
 	else
 	{
 		result = ps->plan->operatorMemKB;
 	}

 	return result;
 }

 /**
  * Methods to find motionstate object within a planstate tree given a motion id (which is the same as slice index)
  */
 typedef struct MotionStateFinderContext
 {
 	int motionId; /* Input */
 	MotionState *motionState; /* Output */
 } MotionStateFinderContext;

 /**
  * Walker method that finds motion state node within a planstate tree.
  */
 static CdbVisitOpt
 MotionStateFinderWalker(PlanState *node,
 				  void *context)
 {
 	Assert(context);
 	MotionStateFinderContext *ctx = (MotionStateFinderContext *) context;

 	if (IsA(node, MotionState))
 	{
 		MotionState *ms = (MotionState *) node;
 		Motion *m = (Motion *) ms->ps.plan;
 		if (m->motionID == ctx->motionId)
 		{
 			Assert(ctx->motionState == NULL);
 			ctx->motionState = ms;
 			return CdbVisit_Skip;	/* don't visit subtree */
 		}
 	}

 	/* Continue walking */
 	return CdbVisit_Walk;
 }

 /**
  * Given a slice index, find the motionstate that corresponds to this slice index. This will iterate over the planstate tree
  * to get the right node.
  */
 MotionState *getMotionState(struct PlanState *ps, int sliceIndex)
 {
 	Assert(ps);
 	Assert(sliceIndex > -1);

 	MotionStateFinderContext ctx;
 	ctx.motionId = sliceIndex;
 	ctx.motionState = NULL;
 	planstate_walk_node(ps, MotionStateFinderWalker, &ctx);
 	Assert(ctx.motionState != NULL);
 	return ctx.motionState;
 }

 /**
  * Provide index of locally executing slice
  */
 int LocallyExecutingSliceIndex(EState *estate)
 {
 	Assert(estate);
 	return (!estate->es_sliceTable ? 0 : estate->es_sliceTable->localSlice);
 }

 /**
  * Provide root slice of locally executing slice.
  */
 int RootSliceIndex(EState *estate)
 {
 	Assert(estate);
 	int result = 0;

 	if (estate->es_sliceTable)
 	{
 		Slice *localSlice = list_nth(estate->es_sliceTable->slices, LocallyExecutingSliceIndex(estate));
 		result = localSlice->rootIndex;
 	}

 	return result;
 }