src/backend/executor/nodeIndexscan.c - hawq - Git at Google

 /*-------------------------------------------------------------------------
  *
  * nodeIndexscan.c
  *	  Routines to support indexed scans of relations
  *
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  *
  * IDENTIFICATION
  *	  $PostgreSQL: pgsql/src/backend/executor/nodeIndexscan.c,v 1.117.2.1 2006/12/26 19:26:56 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 /*
  * INTERFACE ROUTINES
  *		ExecIndexScan			scans a relation using indices
  *		ExecIndexNext			using index to retrieve next tuple
  *		ExecInitIndexScan		creates and initializes state info.
  *		ExecIndexReScan			rescans the indexed relation.
  *		ExecEndIndexScan		releases all storage.
  *		ExecIndexMarkPos		marks scan position.
  *		ExecIndexRestrPos		restores scan position.
  */
 #include "postgres.h"

 #include "access/genam.h"
 #include "access/nbtree.h"
 #include "cdb/cdbvars.h"
 #include "executor/execdebug.h"
 #include "executor/nodeIndexscan.h"
 #include "nodes/nodeFuncs.h"
 #include "optimizer/clauses.h"
 #include "utils/array.h"
 #include "utils/lsyscache.h"
 #include "utils/memutils.h"

 /*
  * Initialize the index scan descriptor if it is not initialized.
  */
 static inline void
 initScanDesc(IndexScanState *indexstate)
 {
 	Relation currentRelation = indexstate->ss.ss_currentRelation;
 	EState *estate = indexstate->ss.ps.state;

 	if (indexstate->iss_ScanDesc == NULL)
 	{
 		/*
 		 * Initialize scan descriptor.
 		 */
 		indexstate->iss_ScanDesc = index_beginscan(currentRelation,
 												   indexstate->iss_RelationDesc,
 												   estate->es_snapshot,
 												   indexstate->iss_NumScanKeys,
 												   indexstate->iss_ScanKeys);
 	}
 }

 /*
  * Free the index scan descriptor.
  */
 static inline void
 freeScanDesc(IndexScanState *indexstate)
 {
 	if (indexstate->iss_ScanDesc != NULL)
 	{
 		index_endscan(indexstate->iss_ScanDesc);
 		indexstate->iss_ScanDesc = NULL;
 	}
 }


 /* ----------------------------------------------------------------
  *		IndexNext
  *
  *		Retrieve a tuple from the IndexScan node's currentRelation
  *		using the index specified in the IndexScanState information.
  * ----------------------------------------------------------------
  */
 TupleTableSlot *
 IndexNext(IndexScanState *node)
 {
 	EState	   *estate;
 	ExprContext *econtext;
 	ScanDirection direction;
 	IndexScanDesc scandesc;
 	Index		scanrelid;
 	HeapTuple	tuple;
 	TupleTableSlot *slot;

 	/*
 	 * extract necessary information from index scan node
 	 */
 	estate = node->ss.ps.state;
 	direction = estate->es_direction;

 	initScanDesc(node);

 	/* flip direction if this is an overall backward scan */
 	if (ScanDirectionIsBackward(((IndexScan *) node->ss.ps.plan)->indexorderdir))
 	{
 		if (ScanDirectionIsForward(direction))
 			direction = BackwardScanDirection;
 		else if (ScanDirectionIsBackward(direction))
 			direction = ForwardScanDirection;
 	}
 	scandesc = node->iss_ScanDesc;
 	econtext = node->ss.ps.ps_ExprContext;
 	slot = node->ss.ss_ScanTupleSlot;
 	scanrelid = ((IndexScan *) node->ss.ps.plan)->scan.scanrelid;

 	/*
 	 * Check if we are evaluating PlanQual for tuple of this relation.
 	 * Additional checking is not good, but no other way for now. We could
 	 * introduce new nodes for this case and handle IndexScan --> NewNode
 	 * switching in Init/ReScan plan...
 	 */
 	if (estate->es_evTuple != NULL &&
 		estate->es_evTuple[scanrelid - 1] != NULL)
 	{
 		if (estate->es_evTupleNull[scanrelid - 1])
 		{
 			if (!node->ss.ps.delayEagerFree)
 			{
 				ExecEagerFreeIndexScan(node);
 			}

 			return ExecClearTuple(slot);
 		}

 		ExecStoreGenericTuple(estate->es_evTuple[scanrelid - 1], slot, false);

 		/* Does the tuple meet the indexqual condition? */
 		econtext->ecxt_scantuple = slot;

 		ResetExprContext(econtext);

 		if (!ExecQual(node->indexqualorig, econtext, false))
 		{
 			if (!node->ss.ps.delayEagerFree)
 			{
 				ExecEagerFreeIndexScan(node);
 			}

 			ExecClearTuple(slot);		/* would not be returned by scan */
 		}

 		/* Flag for the next call that no more tuples */
 		estate->es_evTupleNull[scanrelid - 1] = true;

 		Gpmon_M_Incr_Rows_Out(GpmonPktFromIndexScanState(node));
                 CheckSendPlanStateGpmonPkt(&node->ss.ps);
 		return slot;
 	}

 	/*
 	 * ok, now that we have what we need, fetch the next tuple.
 	 */
 	if ((tuple = index_getnext(scandesc, direction)) != NULL)
 	{
 		/*
 		 * Store the scanned tuple in the scan tuple slot of the scan state.
 		 * Note: we pass 'false' because tuples returned by amgetnext are
 		 * pointers onto disk pages and must not be pfree()'d.
 		 */
 		ExecStoreHeapTuple(tuple,	/* tuple to store */
 				slot,	/* slot to store in */
 					   scandesc->xs_cbuf,		/* buffer containing tuple */
 					   false);	/* don't pfree */

 		Gpmon_M_Incr_Rows_Out(GpmonPktFromIndexScanState(node));
                 CheckSendPlanStateGpmonPkt(&node->ss.ps);
 		return slot;
 	}

 	if (!node->ss.ps.delayEagerFree)
 	{
 		ExecEagerFreeIndexScan(node);
 	}

 	/*
 	 * if we get here it means the index scan failed so we are at the end of
 	 * the scan..
 	 */
 	return ExecClearTuple(slot);
 }

 /* ----------------------------------------------------------------
  *		ExecIndexScan(node)
  * ----------------------------------------------------------------
  */
 TupleTableSlot *
 ExecIndexScan(IndexScanState *node)
 {
 	/*
 	 * If we have runtime keys and they've not already been set up, do it now.
 	 */
 	if (node->iss_NumRuntimeKeys != 0 && !node->iss_RuntimeKeysReady)
 		ExecReScan((PlanState *) node, NULL);

 	/*
 	 * use IndexNext as access method
 	 */
 	return ExecScan(&node->ss, (ExecScanAccessMtd) IndexNext);
 }

 /* ----------------------------------------------------------------
  *		ExecIndexReScan(node)
  *
  *		Recalculates the value of the scan keys whose value depends on
  *		information known at runtime and rescans the indexed relation.
  *		Updating the scan key was formerly done separately in
  *		ExecUpdateIndexScanKeys. Integrating it into ReScan makes
  *		rescans of indices and relations/general streams more uniform.
  * ----------------------------------------------------------------
  */
 void
 ExecIndexReScan(IndexScanState *node, ExprContext *exprCtxt)
 {
 	EState	   *estate;
 	ExprContext *econtext;
 	Index		scanrelid;

 	initScanDesc(node);

 	estate = node->ss.ps.state;
 	econtext = node->iss_RuntimeContext;		/* context for runtime keys */
 	scanrelid = ((IndexScan *) node->ss.ps.plan)->scan.scanrelid;

 	/*node->ss.ps.ps_TupFromTlist = false;*/

 	if (econtext)
 	{
 		/*
 		 * If we are being passed an outer tuple, save it for runtime key
 		 * calc.  We also need to link it into the "regular" per-tuple
 		 * econtext, so it can be used during indexqualorig evaluations.
 		 */
 		if (exprCtxt != NULL)
 		{
 			ExprContext *stdecontext;

 			econtext->ecxt_outertuple = exprCtxt->ecxt_outertuple;
 			stdecontext = node->ss.ps.ps_ExprContext;
 			stdecontext->ecxt_outertuple = exprCtxt->ecxt_outertuple;
 		}

 		/*
 		 * Reset the runtime-key context so we don't leak memory as each outer
 		 * tuple is scanned.  Note this assumes that we will recalculate *all*
 		 * runtime keys on each call.
 		 */
 		ResetExprContext(econtext);
 	}

 	/*
 	 * If we are doing runtime key calculations (ie, the index keys depend on
 	 * data from an outer scan), compute the new key values
 	 */
 	if (node->iss_NumRuntimeKeys != 0)
 		ExecIndexEvalRuntimeKeys(econtext,
 								 node->iss_RuntimeKeys,
 								 node->iss_NumRuntimeKeys);
 	node->iss_RuntimeKeysReady = true;

 	/* If this is re-scanning of PlanQual ... */
 	if (estate->es_evTuple != NULL &&
 		estate->es_evTuple[scanrelid - 1] != NULL)
 	{
 		estate->es_evTupleNull[scanrelid - 1] = false;
 		return;
 	}

 	/* reset index scan */
 	index_rescan(node->iss_ScanDesc, node->iss_ScanKeys);

 	Gpmon_M_Incr(GpmonPktFromIndexScanState(node), GPMON_INDEXSCAN_RESCAN);
 	CheckSendPlanStateGpmonPkt(&node->ss.ps);
 }


 /*
  * ExecIndexEvalRuntimeKeys
  *		Evaluate any runtime key values, and update the scankeys.
  */
 void
 ExecIndexEvalRuntimeKeys(ExprContext *econtext,
 						 IndexRuntimeKeyInfo *runtimeKeys, int numRuntimeKeys)
 {
 	int			j;

 	for (j = 0; j < numRuntimeKeys; j++)
 	{
 		ScanKey		scan_key = runtimeKeys[j].scan_key;
 		ExprState  *key_expr = runtimeKeys[j].key_expr;
 		Datum		scanvalue;
 		bool		isNull;

 		/*
 		 * For each run-time key, extract the run-time expression and evaluate
 		 * it with respect to the current outer tuple.	We then stick the
 		 * result into the proper scan key.
 		 *
 		 * Note: the result of the eval could be a pass-by-ref value that's
 		 * stored in the outer scan's tuple, not in
 		 * econtext->ecxt_per_tuple_memory.  We assume that the outer tuple
 		 * will stay put throughout our scan.  If this is wrong, we could copy
 		 * the result into our context explicitly, but I think that's not
 		 * necessary...
 		 */
 		scanvalue = ExecEvalExprSwitchContext(key_expr,
 											  econtext,
 											  &isNull,
 											  NULL);
 		scan_key->sk_argument = scanvalue;
 		if (isNull)
 			scan_key->sk_flags |= SK_ISNULL;
 		else
 			scan_key->sk_flags &= ~SK_ISNULL;
 	}
 }

 /*
  * ExecIndexEvalArrayKeys
  *		Evaluate any array key values, and set up to iterate through arrays.
  *
  * Returns TRUE if there are array elements to consider; FALSE means there
  * is at least one null or empty array, so no match is possible.  On TRUE
  * result, the scankeys are initialized with the first elements of the arrays.
  */
 bool
 ExecIndexEvalArrayKeys(ExprContext *econtext,
 					   IndexArrayKeyInfo *arrayKeys, int numArrayKeys)
 {
 	bool		result = true;
 	int			j;
 	MemoryContext oldContext;

 	/* We want to keep the arrays in per-tuple memory */
 	oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory);

 	for (j = 0; j < numArrayKeys; j++)
 	{
 		ScanKey		scan_key = arrayKeys[j].scan_key;
 		ExprState  *array_expr = arrayKeys[j].array_expr;
 		Datum		arraydatum;
 		bool		isNull;
 		ArrayType  *arrayval;
 		int16		elmlen;
 		bool		elmbyval;
 		char		elmalign;
 		int			num_elems;
 		Datum	   *elem_values;
 		bool	   *elem_nulls;

 		/*
 		 * Compute and deconstruct the array expression. (Notes in
 		 * ExecIndexEvalRuntimeKeys() apply here too.)
 		 */
 		arraydatum = ExecEvalExpr(array_expr,
 								  econtext,
 								  &isNull,
 								  NULL);
 		if (isNull)
 		{
 			result = false;
 			break;				/* no point in evaluating more */
 		}
 		arrayval = DatumGetArrayTypeP(arraydatum);
 		/* We could cache this data, but not clear it's worth it */
 		get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
 							 &elmlen, &elmbyval, &elmalign);
 		deconstruct_array(arrayval,
 						  ARR_ELEMTYPE(arrayval),
 						  elmlen, elmbyval, elmalign,
 						  &elem_values, &elem_nulls, &num_elems);
 		if (num_elems <= 0)
 		{
 			result = false;
 			break;				/* no point in evaluating more */
 		}

 		/*
 		 * Note: we expect the previous array data, if any, to be
 		 * automatically freed by resetting the per-tuple context; hence no
 		 * pfree's here.
 		 */
 		arrayKeys[j].elem_values = elem_values;
 		arrayKeys[j].elem_nulls = elem_nulls;
 		arrayKeys[j].num_elems = num_elems;
 		scan_key->sk_argument = elem_values[0];
 		if (elem_nulls[0])
 			scan_key->sk_flags |= SK_ISNULL;
 		else
 			scan_key->sk_flags &= ~SK_ISNULL;
 		arrayKeys[j].next_elem = 1;
 	}

 	MemoryContextSwitchTo(oldContext);

 	return result;
 }

 /*
  * ExecIndexAdvanceArrayKeys
  *		Advance to the next set of array key values, if any.
  *
  * Returns TRUE if there is another set of values to consider, FALSE if not.
  * On TRUE result, the scankeys are initialized with the next set of values.
  */
 bool
 ExecIndexAdvanceArrayKeys(IndexArrayKeyInfo *arrayKeys, int numArrayKeys)
 {
 	bool		found = false;
 	int			j;

 	for (j = 0; j < numArrayKeys; j++)
 	{
 		ScanKey		scan_key = arrayKeys[j].scan_key;
 		int			next_elem = arrayKeys[j].next_elem;
 		int			num_elems = arrayKeys[j].num_elems;
 		Datum	   *elem_values = arrayKeys[j].elem_values;
 		bool	   *elem_nulls = arrayKeys[j].elem_nulls;

 		if (next_elem >= num_elems)
 		{
 			next_elem = 0;
 			found = false;		/* need to advance next array key */
 		}
 		else
 			found = true;
 		scan_key->sk_argument = elem_values[next_elem];
 		if (elem_nulls[next_elem])
 			scan_key->sk_flags |= SK_ISNULL;
 		else
 			scan_key->sk_flags &= ~SK_ISNULL;
 		arrayKeys[j].next_elem = next_elem + 1;
 		if (found)
 			break;
 	}

 	return found;
 }


 /* ----------------------------------------------------------------
  *		ExecEndIndexScan
  * ----------------------------------------------------------------
  */
 void
 ExecEndIndexScan(IndexScanState *node)
 {
 	Relation	indexRelationDesc;
 	IndexScanDesc indexScanDesc;
 	Relation	relation;

 	/*
 	 * extract information from the node
 	 */
 	indexRelationDesc = node->iss_RelationDesc;
 	indexScanDesc = node->iss_ScanDesc;
 	relation = node->ss.ss_currentRelation;

 	/*
 	 * Free the exprcontext(s) ... now dead code, see ExecFreeExprContext
 	 */
 #ifdef NOT_USED
 	ExecFreeExprContext(&node->ss.ps);
 	if (node->iss_RuntimeContext)
 		FreeExprContext(node->iss_RuntimeContext);
 #endif

 	/*
 	 * clear out tuple table slots
 	 */
 	ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
 	ExecClearTuple(node->ss.ss_ScanTupleSlot);

 	/*
 	 * close the index relation
 	 */
 	ExecEagerFreeIndexScan(node);
 	index_close(indexRelationDesc, NoLock);

 	/*
 	 * close the heap relation.
 	 */
 	ExecCloseScanRelation(relation);

 	EndPlanStateGpmonPkt(&node->ss.ps);
 }

 /* ----------------------------------------------------------------
  *		ExecIndexMarkPos
  * ----------------------------------------------------------------
  */
 void
 ExecIndexMarkPos(IndexScanState *node)
 {
 	index_markpos(node->iss_ScanDesc);
 }

 /* ----------------------------------------------------------------
  *		ExecIndexRestrPos
  * ----------------------------------------------------------------
  */
 void
 ExecIndexRestrPos(IndexScanState *node)
 {
 	index_restrpos(node->iss_ScanDesc);
 	Gpmon_M_Incr(GpmonPktFromIndexScanState(node), GPMON_INDEXSCAN_RESTOREPOS);
 	CheckSendPlanStateGpmonPkt(&node->ss.ps);
 }

 /* ----------------------------------------------------------------
  *		ExecInitIndexScan
  *
  *		Initializes the index scan's state information, creates
  *		scan keys, and opens the base and index relations.
  *
  *		Note: index scans have 2 sets of state information because
  *			  we have to keep track of the base relation and the
  *			  index relation.
  * ----------------------------------------------------------------
  */
 IndexScanState *
 ExecInitIndexScan(IndexScan *node, EState *estate, int eflags)
 {
 	IndexScanState *indexstate;
 	Relation	currentRelation;
 	bool		relistarget;

 	/*
 	 * create state structure
 	 */
 	indexstate = makeNode(IndexScanState);
 	indexstate->ss.ps.plan = (Plan *) node;
 	indexstate->ss.ps.state = estate;

 	/*
 	 * Miscellaneous initialization
 	 *
 	 * create expression context for node
 	 */
 	ExecAssignExprContext(estate, &indexstate->ss.ps);

 	/*indexstate->ss.ps.ps_TupFromTlist = false;*/

 	/*
 	 * initialize child expressions
 	 *
 	 * Note: we don't initialize all of the indexqual expression, only the
 	 * sub-parts corresponding to runtime keys (see below).  The indexqualorig
 	 * expression is always initialized even though it will only be used in
 	 * some uncommon cases --- would be nice to improve that.  (Problem is
 	 * that any SubPlans present in the expression must be found now...)
 	 */
 	indexstate->ss.ps.targetlist = (List *)
 		ExecInitExpr((Expr *) node->scan.plan.targetlist,
 					 (PlanState *) indexstate);
 	indexstate->ss.ps.qual = (List *)
 		ExecInitExpr((Expr *) node->scan.plan.qual,
 					 (PlanState *) indexstate);
 	indexstate->indexqualorig = (List *)
 		ExecInitExpr((Expr *) node->indexqualorig,
 					 (PlanState *) indexstate);

 #define INDEXSCAN_NSLOTS 2

 	/*
 	 * tuple table initialization
 	 */
 	ExecInitResultTupleSlot(estate, &indexstate->ss.ps);
 	ExecInitScanTupleSlot(estate, &indexstate->ss);

 	/*
 	 * open the base relation and acquire appropriate lock on it.
 	 */
 	currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid);

 	indexstate->ss.ss_currentRelation = currentRelation;

 	/*
 	 * get the scan type from the relation descriptor.
 	 */
 	ExecAssignScanType(&indexstate->ss, RelationGetDescr(currentRelation));

 	/*
 	 * Open the index relation.
 	 *
 	 * If the parent table is one of the target relations of the query, then
 	 * InitPlan already opened and write-locked the index, so we can avoid
 	 * taking another lock here.  Otherwise we need a normal reader's lock.
 	 */
 	relistarget = ExecRelationIsTargetRelation(estate, node->scan.scanrelid);
 	indexstate->iss_RelationDesc = index_open(node->indexid,
 									 relistarget ? NoLock : AccessShareLock);

 	/*
 	 * build the index scan keys from the index qualification
 	 */
 	ExecIndexBuildScanKeys((PlanState *) indexstate,
 						   indexstate->iss_RelationDesc,
 						   node->indexqual,
 						   node->indexstrategy,
 						   node->indexsubtype,
 						   &indexstate->iss_ScanKeys,
 						   &indexstate->iss_NumScanKeys,
 						   &indexstate->iss_RuntimeKeys,
 						   &indexstate->iss_NumRuntimeKeys,
 						   NULL,	/* no ArrayKeys */
 						   NULL);

 	/*
 	 * If we have runtime keys, we need an ExprContext to evaluate them. The
 	 * node's standard context won't do because we want to reset that context
 	 * for every tuple.  So, build another context just like the other one...
 	 * -tgl 7/11/00
 	 */
 	if (indexstate->iss_NumRuntimeKeys != 0)
 	{
 		ExprContext *stdecontext = indexstate->ss.ps.ps_ExprContext;

 		ExecAssignExprContext(estate, &indexstate->ss.ps);
 		indexstate->iss_RuntimeContext = indexstate->ss.ps.ps_ExprContext;
 		indexstate->ss.ps.ps_ExprContext = stdecontext;
 	}
 	else
 	{
 		indexstate->iss_RuntimeContext = NULL;
 	}

 	/*
 	 * Initialize index-specific scan state
 	 */
 	indexstate->iss_RuntimeKeysReady = false;

 	/*
 	 * Initialize result tuple type and projection info.
 	 */
 	ExecAssignResultTypeFromTL(&indexstate->ss.ps);
 	ExecAssignScanProjectionInfo(&indexstate->ss);

 	initGpmonPktForIndexScan((Plan *)node, &indexstate->ss.ps.gpmon_pkt, estate);

 	/*
 	 * If eflag contains EXEC_FLAG_REWIND or EXEC_FLAG_BACKWARD or EXEC_FLAG_MARK,
 	 * then this node is not eager free safe.
 	 */
 	indexstate->ss.ps.delayEagerFree =
 		((eflags & (EXEC_FLAG_REWIND | EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)) != 0);

 	/*
 	 * all done.
 	 */
 	return indexstate;
 }

 int
 ExecCountSlotsIndexScan(IndexScan *node)
 {
 	return ExecCountSlotsNode(outerPlan((Plan *) node)) +
 		ExecCountSlotsNode(innerPlan((Plan *) node)) + INDEXSCAN_NSLOTS;
 }


 void
 initGpmonPktForIndexScan(Plan *planNode, gpmon_packet_t *gpmon_pkt, EState *estate)
 {
 	Assert(planNode != NULL && gpmon_pkt != NULL && IsA(planNode, IndexScan));

 	{
 		char *relname = get_rel_name(((IndexScan *)planNode)->indexid);

 		Assert(GPMON_INDEXSCAN_TOTAL <= (int) GPMON_QEXEC_M_COUNT);
 		InitPlanNodeGpmonPkt(planNode, gpmon_pkt, estate, PMNT_IndexScan,
 							 (int64)planNode->plan_rows,
 							 relname);
 		if (relname)
 			pfree(relname);
 	}
 }

 void
 ExecEagerFreeIndexScan(IndexScanState *node)
 {
 	freeScanDesc(node);
 }
	/*-------------------------------------------------------------------------
	*
	* nodeIndexscan.c
	* Routines to support indexed scans of relations
	*
	* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
	* Portions Copyright (c) 1994, Regents of the University of California
	*
	*
	* IDENTIFICATION
	* $PostgreSQL: pgsql/src/backend/executor/nodeIndexscan.c,v 1.117.2.1 2006/12/26 19:26:56 tgl Exp $
	*
	*-------------------------------------------------------------------------
	*/
	/*
	* INTERFACE ROUTINES
	* ExecIndexScan scans a relation using indices
	* ExecIndexNext using index to retrieve next tuple
	* ExecInitIndexScan creates and initializes state info.
	* ExecIndexReScan rescans the indexed relation.
	* ExecEndIndexScan releases all storage.
	* ExecIndexMarkPos marks scan position.
	* ExecIndexRestrPos restores scan position.
	*/
	#include "postgres.h"

	#include "access/genam.h"
	#include "access/nbtree.h"
	#include "cdb/cdbvars.h"
	#include "executor/execdebug.h"
	#include "executor/nodeIndexscan.h"
	#include "nodes/nodeFuncs.h"
	#include "optimizer/clauses.h"
	#include "utils/array.h"
	#include "utils/lsyscache.h"
	#include "utils/memutils.h"

	/*
	* Initialize the index scan descriptor if it is not initialized.
	*/
	static inline void
	initScanDesc(IndexScanState *indexstate)
	{
	Relation currentRelation = indexstate->ss.ss_currentRelation;
	EState *estate = indexstate->ss.ps.state;

	if (indexstate->iss_ScanDesc == NULL)
	{
	/*
	* Initialize scan descriptor.
	*/
	indexstate->iss_ScanDesc = index_beginscan(currentRelation,
	indexstate->iss_RelationDesc,
	estate->es_snapshot,
	indexstate->iss_NumScanKeys,
	indexstate->iss_ScanKeys);
	}
	}

	/*
	* Free the index scan descriptor.
	*/
	static inline void
	freeScanDesc(IndexScanState *indexstate)
	{
	if (indexstate->iss_ScanDesc != NULL)
	{
	index_endscan(indexstate->iss_ScanDesc);
	indexstate->iss_ScanDesc = NULL;
	}
	}


	/* ----------------------------------------------------------------
	* IndexNext
	*
	* Retrieve a tuple from the IndexScan node's currentRelation
	* using the index specified in the IndexScanState information.
	* ----------------------------------------------------------------
	*/
	TupleTableSlot *
	IndexNext(IndexScanState *node)
	{
	EState *estate;
	ExprContext *econtext;
	ScanDirection direction;
	IndexScanDesc scandesc;
	Index scanrelid;
	HeapTuple tuple;
	TupleTableSlot *slot;

	/*
	* extract necessary information from index scan node
	*/
	estate = node->ss.ps.state;
	direction = estate->es_direction;

	initScanDesc(node);

	/* flip direction if this is an overall backward scan */
	if (ScanDirectionIsBackward(((IndexScan *) node->ss.ps.plan)->indexorderdir))
	{
	if (ScanDirectionIsForward(direction))
	direction = BackwardScanDirection;
	else if (ScanDirectionIsBackward(direction))
	direction = ForwardScanDirection;
	}
	scandesc = node->iss_ScanDesc;
	econtext = node->ss.ps.ps_ExprContext;
	slot = node->ss.ss_ScanTupleSlot;
	scanrelid = ((IndexScan *) node->ss.ps.plan)->scan.scanrelid;

	/*
	* Check if we are evaluating PlanQual for tuple of this relation.
	* Additional checking is not good, but no other way for now. We could
	* introduce new nodes for this case and handle IndexScan --> NewNode
	* switching in Init/ReScan plan...
	*/
	if (estate->es_evTuple != NULL &&
	estate->es_evTuple[scanrelid - 1] != NULL)
	{
	if (estate->es_evTupleNull[scanrelid - 1])
	{
	if (!node->ss.ps.delayEagerFree)
	{
	ExecEagerFreeIndexScan(node);
	}

	return ExecClearTuple(slot);
	}

	ExecStoreGenericTuple(estate->es_evTuple[scanrelid - 1], slot, false);

	/* Does the tuple meet the indexqual condition? */
	econtext->ecxt_scantuple = slot;

	ResetExprContext(econtext);

	if (!ExecQual(node->indexqualorig, econtext, false))
	{
	if (!node->ss.ps.delayEagerFree)
	{
	ExecEagerFreeIndexScan(node);
	}

	ExecClearTuple(slot); /* would not be returned by scan */
	}

	/* Flag for the next call that no more tuples */
	estate->es_evTupleNull[scanrelid - 1] = true;

	Gpmon_M_Incr_Rows_Out(GpmonPktFromIndexScanState(node));
	CheckSendPlanStateGpmonPkt(&node->ss.ps);
	return slot;
	}

	/*
	* ok, now that we have what we need, fetch the next tuple.
	*/
	if ((tuple = index_getnext(scandesc, direction)) != NULL)
	{
	/*
	* Store the scanned tuple in the scan tuple slot of the scan state.
	* Note: we pass 'false' because tuples returned by amgetnext are
	* pointers onto disk pages and must not be pfree()'d.
	*/
	ExecStoreHeapTuple(tuple, /* tuple to store */
	slot, /* slot to store in */
	scandesc->xs_cbuf, /* buffer containing tuple */
	false); /* don't pfree */

	Gpmon_M_Incr_Rows_Out(GpmonPktFromIndexScanState(node));
	CheckSendPlanStateGpmonPkt(&node->ss.ps);
	return slot;
	}

	if (!node->ss.ps.delayEagerFree)
	{
	ExecEagerFreeIndexScan(node);
	}

	/*
	* if we get here it means the index scan failed so we are at the end of
	* the scan..
	*/
	return ExecClearTuple(slot);
	}

	/* ----------------------------------------------------------------
	* ExecIndexScan(node)
	* ----------------------------------------------------------------
	*/
	TupleTableSlot *
	ExecIndexScan(IndexScanState *node)
	{
	/*
	* If we have runtime keys and they've not already been set up, do it now.
	*/
	if (node->iss_NumRuntimeKeys != 0 && !node->iss_RuntimeKeysReady)
	ExecReScan((PlanState *) node, NULL);

	/*
	* use IndexNext as access method
	*/
	return ExecScan(&node->ss, (ExecScanAccessMtd) IndexNext);
	}

	/* ----------------------------------------------------------------
	* ExecIndexReScan(node)
	*
	* Recalculates the value of the scan keys whose value depends on
	* information known at runtime and rescans the indexed relation.
	* Updating the scan key was formerly done separately in
	* ExecUpdateIndexScanKeys. Integrating it into ReScan makes
	* rescans of indices and relations/general streams more uniform.
	* ----------------------------------------------------------------
	*/
	void
	ExecIndexReScan(IndexScanState node, ExprContext exprCtxt)
	{
	EState *estate;
	ExprContext *econtext;
	Index scanrelid;

	initScanDesc(node);

	estate = node->ss.ps.state;
	econtext = node->iss_RuntimeContext; /* context for runtime keys */
	scanrelid = ((IndexScan *) node->ss.ps.plan)->scan.scanrelid;

	/node->ss.ps.ps_TupFromTlist = false;/

	if (econtext)
	{
	/*
	* If we are being passed an outer tuple, save it for runtime key
	* calc. We also need to link it into the "regular" per-tuple
	* econtext, so it can be used during indexqualorig evaluations.
	*/
	if (exprCtxt != NULL)
	{
	ExprContext *stdecontext;

	econtext->ecxt_outertuple = exprCtxt->ecxt_outertuple;
	stdecontext = node->ss.ps.ps_ExprContext;
	stdecontext->ecxt_outertuple = exprCtxt->ecxt_outertuple;
	}

	/*
	* Reset the runtime-key context so we don't leak memory as each outer
	* tuple is scanned. Note this assumes that we will recalculate all
	* runtime keys on each call.
	*/
	ResetExprContext(econtext);
	}

	/*
	* If we are doing runtime key calculations (ie, the index keys depend on
	* data from an outer scan), compute the new key values
	*/
	if (node->iss_NumRuntimeKeys != 0)
	ExecIndexEvalRuntimeKeys(econtext,
	node->iss_RuntimeKeys,
	node->iss_NumRuntimeKeys);
	node->iss_RuntimeKeysReady = true;

	/* If this is re-scanning of PlanQual ... */
	if (estate->es_evTuple != NULL &&
	estate->es_evTuple[scanrelid - 1] != NULL)
	{
	estate->es_evTupleNull[scanrelid - 1] = false;
	return;
	}

	/* reset index scan */
	index_rescan(node->iss_ScanDesc, node->iss_ScanKeys);

	Gpmon_M_Incr(GpmonPktFromIndexScanState(node), GPMON_INDEXSCAN_RESCAN);
	CheckSendPlanStateGpmonPkt(&node->ss.ps);
	}


	/*
	* ExecIndexEvalRuntimeKeys
	* Evaluate any runtime key values, and update the scankeys.
	*/
	void
	ExecIndexEvalRuntimeKeys(ExprContext *econtext,
	IndexRuntimeKeyInfo *runtimeKeys, int numRuntimeKeys)
	{
	int j;

	for (j = 0; j < numRuntimeKeys; j++)
	{
	ScanKey scan_key = runtimeKeys[j].scan_key;
	ExprState *key_expr = runtimeKeys[j].key_expr;
	Datum scanvalue;
	bool isNull;

	/*
	* For each run-time key, extract the run-time expression and evaluate
	* it with respect to the current outer tuple. We then stick the
	* result into the proper scan key.
	*
	* Note: the result of the eval could be a pass-by-ref value that's
	* stored in the outer scan's tuple, not in
	* econtext->ecxt_per_tuple_memory. We assume that the outer tuple
	* will stay put throughout our scan. If this is wrong, we could copy
	* the result into our context explicitly, but I think that's not
	* necessary...
	*/
	scanvalue = ExecEvalExprSwitchContext(key_expr,
	econtext,
	&isNull,
	NULL);
	scan_key->sk_argument = scanvalue;
	if (isNull)
	scan_key->sk_flags \|= SK_ISNULL;
	else
	scan_key->sk_flags &= ~SK_ISNULL;
	}
	}

	/*
	* ExecIndexEvalArrayKeys
	* Evaluate any array key values, and set up to iterate through arrays.
	*
	* Returns TRUE if there are array elements to consider; FALSE means there
	* is at least one null or empty array, so no match is possible. On TRUE
	* result, the scankeys are initialized with the first elements of the arrays.
	*/
	bool
	ExecIndexEvalArrayKeys(ExprContext *econtext,
	IndexArrayKeyInfo *arrayKeys, int numArrayKeys)
	{
	bool result = true;
	int j;
	MemoryContext oldContext;

	/* We want to keep the arrays in per-tuple memory */
	oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory);

	for (j = 0; j < numArrayKeys; j++)
	{
	ScanKey scan_key = arrayKeys[j].scan_key;
	ExprState *array_expr = arrayKeys[j].array_expr;
	Datum arraydatum;
	bool isNull;
	ArrayType *arrayval;
	int16 elmlen;
	bool elmbyval;
	char elmalign;
	int num_elems;
	Datum *elem_values;
	bool *elem_nulls;

	/*
	* Compute and deconstruct the array expression. (Notes in
	* ExecIndexEvalRuntimeKeys() apply here too.)
	*/
	arraydatum = ExecEvalExpr(array_expr,
	econtext,
	&isNull,
	NULL);
	if (isNull)
	{
	result = false;
	break; /* no point in evaluating more */
	}
	arrayval = DatumGetArrayTypeP(arraydatum);
	/* We could cache this data, but not clear it's worth it */
	get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
	&elmlen, &elmbyval, &elmalign);
	deconstruct_array(arrayval,
	ARR_ELEMTYPE(arrayval),
	elmlen, elmbyval, elmalign,
	&elem_values, &elem_nulls, &num_elems);
	if (num_elems <= 0)
	{
	result = false;
	break; /* no point in evaluating more */
	}

	/*
	* Note: we expect the previous array data, if any, to be
	* automatically freed by resetting the per-tuple context; hence no
	* pfree's here.
	*/
	arrayKeys[j].elem_values = elem_values;
	arrayKeys[j].elem_nulls = elem_nulls;
	arrayKeys[j].num_elems = num_elems;
	scan_key->sk_argument = elem_values[0];
	if (elem_nulls[0])
	scan_key->sk_flags \|= SK_ISNULL;
	else
	scan_key->sk_flags &= ~SK_ISNULL;
	arrayKeys[j].next_elem = 1;
	}

	MemoryContextSwitchTo(oldContext);

	return result;
	}

	/*
	* ExecIndexAdvanceArrayKeys
	* Advance to the next set of array key values, if any.
	*
	* Returns TRUE if there is another set of values to consider, FALSE if not.
	* On TRUE result, the scankeys are initialized with the next set of values.
	*/
	bool
	ExecIndexAdvanceArrayKeys(IndexArrayKeyInfo *arrayKeys, int numArrayKeys)
	{
	bool found = false;
	int j;

	for (j = 0; j < numArrayKeys; j++)
	{
	ScanKey scan_key = arrayKeys[j].scan_key;
	int next_elem = arrayKeys[j].next_elem;
	int num_elems = arrayKeys[j].num_elems;
	Datum *elem_values = arrayKeys[j].elem_values;
	bool *elem_nulls = arrayKeys[j].elem_nulls;

	if (next_elem >= num_elems)
	{
	next_elem = 0;
	found = false; /* need to advance next array key */
	}
	else
	found = true;
	scan_key->sk_argument = elem_values[next_elem];
	if (elem_nulls[next_elem])
	scan_key->sk_flags \|= SK_ISNULL;
	else
	scan_key->sk_flags &= ~SK_ISNULL;
	arrayKeys[j].next_elem = next_elem + 1;
	if (found)
	break;
	}

	return found;
	}


	/* ----------------------------------------------------------------
	* ExecEndIndexScan
	* ----------------------------------------------------------------
	*/
	void
	ExecEndIndexScan(IndexScanState *node)
	{
	Relation indexRelationDesc;
	IndexScanDesc indexScanDesc;
	Relation relation;

	/*
	* extract information from the node
	*/
	indexRelationDesc = node->iss_RelationDesc;
	indexScanDesc = node->iss_ScanDesc;
	relation = node->ss.ss_currentRelation;

	/*
	* Free the exprcontext(s) ... now dead code, see ExecFreeExprContext
	*/
	#ifdef NOT_USED
	ExecFreeExprContext(&node->ss.ps);
	if (node->iss_RuntimeContext)
	FreeExprContext(node->iss_RuntimeContext);
	#endif

	/*
	* clear out tuple table slots
	*/
	ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
	ExecClearTuple(node->ss.ss_ScanTupleSlot);

	/*
	* close the index relation
	*/
	ExecEagerFreeIndexScan(node);
	index_close(indexRelationDesc, NoLock);

	/*
	* close the heap relation.
	*/
	ExecCloseScanRelation(relation);

	EndPlanStateGpmonPkt(&node->ss.ps);
	}

	/* ----------------------------------------------------------------
	* ExecIndexMarkPos
	* ----------------------------------------------------------------
	*/
	void
	ExecIndexMarkPos(IndexScanState *node)
	{
	index_markpos(node->iss_ScanDesc);
	}

	/* ----------------------------------------------------------------
	* ExecIndexRestrPos
	* ----------------------------------------------------------------
	*/
	void
	ExecIndexRestrPos(IndexScanState *node)
	{
	index_restrpos(node->iss_ScanDesc);
	Gpmon_M_Incr(GpmonPktFromIndexScanState(node), GPMON_INDEXSCAN_RESTOREPOS);
	CheckSendPlanStateGpmonPkt(&node->ss.ps);
	}

	/* ----------------------------------------------------------------
	* ExecInitIndexScan
	*
	* Initializes the index scan's state information, creates
	* scan keys, and opens the base and index relations.
	*
	* Note: index scans have 2 sets of state information because
	* we have to keep track of the base relation and the
	* index relation.
	* ----------------------------------------------------------------
	*/
	IndexScanState *
	ExecInitIndexScan(IndexScan node, EState estate, int eflags)
	{
	IndexScanState *indexstate;
	Relation currentRelation;
	bool relistarget;

	/*
	* create state structure
	*/
	indexstate = makeNode(IndexScanState);
	indexstate->ss.ps.plan = (Plan *) node;
	indexstate->ss.ps.state = estate;

	/*
	* Miscellaneous initialization
	*
	* create expression context for node
	*/
	ExecAssignExprContext(estate, &indexstate->ss.ps);

	/indexstate->ss.ps.ps_TupFromTlist = false;/

	/*
	* initialize child expressions
	*
	* Note: we don't initialize all of the indexqual expression, only the
	* sub-parts corresponding to runtime keys (see below). The indexqualorig
	* expression is always initialized even though it will only be used in
	* some uncommon cases --- would be nice to improve that. (Problem is
	* that any SubPlans present in the expression must be found now...)
	*/
	indexstate->ss.ps.targetlist = (List *)
	ExecInitExpr((Expr *) node->scan.plan.targetlist,
	(PlanState *) indexstate);
	indexstate->ss.ps.qual = (List *)
	ExecInitExpr((Expr *) node->scan.plan.qual,
	(PlanState *) indexstate);
	indexstate->indexqualorig = (List *)
	ExecInitExpr((Expr *) node->indexqualorig,
	(PlanState *) indexstate);

	#define INDEXSCAN_NSLOTS 2

	/*
	* tuple table initialization
	*/
	ExecInitResultTupleSlot(estate, &indexstate->ss.ps);
	ExecInitScanTupleSlot(estate, &indexstate->ss);

	/*
	* open the base relation and acquire appropriate lock on it.
	*/
	currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid);

	indexstate->ss.ss_currentRelation = currentRelation;

	/*
	* get the scan type from the relation descriptor.
	*/
	ExecAssignScanType(&indexstate->ss, RelationGetDescr(currentRelation));

	/*
	* Open the index relation.
	*
	* If the parent table is one of the target relations of the query, then
	* InitPlan already opened and write-locked the index, so we can avoid
	* taking another lock here. Otherwise we need a normal reader's lock.
	*/
	relistarget = ExecRelationIsTargetRelation(estate, node->scan.scanrelid);
	indexstate->iss_RelationDesc = index_open(node->indexid,
	relistarget ? NoLock : AccessShareLock);

	/*
	* build the index scan keys from the index qualification
	*/
	ExecIndexBuildScanKeys((PlanState *) indexstate,
	indexstate->iss_RelationDesc,
	node->indexqual,
	node->indexstrategy,
	node->indexsubtype,
	&indexstate->iss_ScanKeys,
	&indexstate->iss_NumScanKeys,
	&indexstate->iss_RuntimeKeys,
	&indexstate->iss_NumRuntimeKeys,
	NULL, /* no ArrayKeys */
	NULL);

	/*
	* If we have runtime keys, we need an ExprContext to evaluate them. The
	* node's standard context won't do because we want to reset that context
	* for every tuple. So, build another context just like the other one...
	* -tgl 7/11/00
	*/
	if (indexstate->iss_NumRuntimeKeys != 0)
	{
	ExprContext *stdecontext = indexstate->ss.ps.ps_ExprContext;

	ExecAssignExprContext(estate, &indexstate->ss.ps);
	indexstate->iss_RuntimeContext = indexstate->ss.ps.ps_ExprContext;
	indexstate->ss.ps.ps_ExprContext = stdecontext;
	}
	else
	{
	indexstate->iss_RuntimeContext = NULL;
	}

	/*
	* Initialize index-specific scan state
	*/
	indexstate->iss_RuntimeKeysReady = false;

	/*
	* Initialize result tuple type and projection info.
	*/
	ExecAssignResultTypeFromTL(&indexstate->ss.ps);
	ExecAssignScanProjectionInfo(&indexstate->ss);

	initGpmonPktForIndexScan((Plan *)node, &indexstate->ss.ps.gpmon_pkt, estate);

	/*
	* If eflag contains EXEC_FLAG_REWIND or EXEC_FLAG_BACKWARD or EXEC_FLAG_MARK,
	* then this node is not eager free safe.
	*/
	indexstate->ss.ps.delayEagerFree =
	((eflags & (EXEC_FLAG_REWIND \| EXEC_FLAG_BACKWARD \| EXEC_FLAG_MARK)) != 0);

	/*
	* all done.
	*/
	return indexstate;
	}

	int
	ExecCountSlotsIndexScan(IndexScan *node)
	{
	return ExecCountSlotsNode(outerPlan((Plan *) node)) +
	ExecCountSlotsNode(innerPlan((Plan *) node)) + INDEXSCAN_NSLOTS;
	}


	void
	initGpmonPktForIndexScan(Plan planNode, gpmon_packet_t gpmon_pkt, EState *estate)
	{
	Assert(planNode != NULL && gpmon_pkt != NULL && IsA(planNode, IndexScan));

	{
	char relname = get_rel_name(((IndexScan )planNode)->indexid);

	Assert(GPMON_INDEXSCAN_TOTAL <= (int) GPMON_QEXEC_M_COUNT);
	InitPlanNodeGpmonPkt(planNode, gpmon_pkt, estate, PMNT_IndexScan,
	(int64)planNode->plan_rows,
	relname);
	if (relname)
	pfree(relname);
	}
	}

	void
	ExecEagerFreeIndexScan(IndexScanState *node)
	{
	freeScanDesc(node);
	}