blob: d91bcc75cc10326c494c6091c79a01f47ed904f0 [file] [log] [blame]
/*-------------------------------------------------------------------------
*
* nodeIndexscan.c
* Routines to support indexed scans of relations
*
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/executor/nodeIndexscan.c,v 1.117.2.1 2006/12/26 19:26:56 tgl Exp $
*
*-------------------------------------------------------------------------
*/
/*
* INTERFACE ROUTINES
* ExecIndexScan scans a relation using indices
* ExecIndexNext using index to retrieve next tuple
* ExecInitIndexScan creates and initializes state info.
* ExecIndexReScan rescans the indexed relation.
* ExecEndIndexScan releases all storage.
* ExecIndexMarkPos marks scan position.
* ExecIndexRestrPos restores scan position.
*/
#include "postgres.h"
#include "access/genam.h"
#include "access/nbtree.h"
#include "cdb/cdbvars.h"
#include "executor/execdebug.h"
#include "executor/nodeIndexscan.h"
#include "nodes/nodeFuncs.h"
#include "optimizer/clauses.h"
#include "utils/array.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
/*
* Initialize the index scan descriptor if it is not initialized.
*/
static inline void
initScanDesc(IndexScanState *indexstate)
{
Relation currentRelation = indexstate->ss.ss_currentRelation;
EState *estate = indexstate->ss.ps.state;
if (indexstate->iss_ScanDesc == NULL)
{
/*
* Initialize scan descriptor.
*/
indexstate->iss_ScanDesc = index_beginscan(currentRelation,
indexstate->iss_RelationDesc,
estate->es_snapshot,
indexstate->iss_NumScanKeys,
indexstate->iss_ScanKeys);
}
}
/*
* Free the index scan descriptor.
*/
static inline void
freeScanDesc(IndexScanState *indexstate)
{
if (indexstate->iss_ScanDesc != NULL)
{
index_endscan(indexstate->iss_ScanDesc);
indexstate->iss_ScanDesc = NULL;
}
}
/* ----------------------------------------------------------------
* IndexNext
*
* Retrieve a tuple from the IndexScan node's currentRelation
* using the index specified in the IndexScanState information.
* ----------------------------------------------------------------
*/
TupleTableSlot *
IndexNext(IndexScanState *node)
{
EState *estate;
ExprContext *econtext;
ScanDirection direction;
IndexScanDesc scandesc;
Index scanrelid;
HeapTuple tuple;
TupleTableSlot *slot;
/*
* extract necessary information from index scan node
*/
estate = node->ss.ps.state;
direction = estate->es_direction;
initScanDesc(node);
/* flip direction if this is an overall backward scan */
if (ScanDirectionIsBackward(((IndexScan *) node->ss.ps.plan)->indexorderdir))
{
if (ScanDirectionIsForward(direction))
direction = BackwardScanDirection;
else if (ScanDirectionIsBackward(direction))
direction = ForwardScanDirection;
}
scandesc = node->iss_ScanDesc;
econtext = node->ss.ps.ps_ExprContext;
slot = node->ss.ss_ScanTupleSlot;
scanrelid = ((IndexScan *) node->ss.ps.plan)->scan.scanrelid;
/*
* Check if we are evaluating PlanQual for tuple of this relation.
* Additional checking is not good, but no other way for now. We could
* introduce new nodes for this case and handle IndexScan --> NewNode
* switching in Init/ReScan plan...
*/
if (estate->es_evTuple != NULL &&
estate->es_evTuple[scanrelid - 1] != NULL)
{
if (estate->es_evTupleNull[scanrelid - 1])
{
if (!node->ss.ps.delayEagerFree)
{
ExecEagerFreeIndexScan(node);
}
return ExecClearTuple(slot);
}
ExecStoreGenericTuple(estate->es_evTuple[scanrelid - 1], slot, false);
/* Does the tuple meet the indexqual condition? */
econtext->ecxt_scantuple = slot;
ResetExprContext(econtext);
if (!ExecQual(node->indexqualorig, econtext, false))
{
if (!node->ss.ps.delayEagerFree)
{
ExecEagerFreeIndexScan(node);
}
ExecClearTuple(slot); /* would not be returned by scan */
}
/* Flag for the next call that no more tuples */
estate->es_evTupleNull[scanrelid - 1] = true;
Gpmon_M_Incr_Rows_Out(GpmonPktFromIndexScanState(node));
CheckSendPlanStateGpmonPkt(&node->ss.ps);
return slot;
}
/*
* ok, now that we have what we need, fetch the next tuple.
*/
if ((tuple = index_getnext(scandesc, direction)) != NULL)
{
/*
* Store the scanned tuple in the scan tuple slot of the scan state.
* Note: we pass 'false' because tuples returned by amgetnext are
* pointers onto disk pages and must not be pfree()'d.
*/
ExecStoreHeapTuple(tuple, /* tuple to store */
slot, /* slot to store in */
scandesc->xs_cbuf, /* buffer containing tuple */
false); /* don't pfree */
Gpmon_M_Incr_Rows_Out(GpmonPktFromIndexScanState(node));
CheckSendPlanStateGpmonPkt(&node->ss.ps);
return slot;
}
if (!node->ss.ps.delayEagerFree)
{
ExecEagerFreeIndexScan(node);
}
/*
* if we get here it means the index scan failed so we are at the end of
* the scan..
*/
return ExecClearTuple(slot);
}
/* ----------------------------------------------------------------
* ExecIndexScan(node)
* ----------------------------------------------------------------
*/
TupleTableSlot *
ExecIndexScan(IndexScanState *node)
{
/*
* If we have runtime keys and they've not already been set up, do it now.
*/
if (node->iss_NumRuntimeKeys != 0 && !node->iss_RuntimeKeysReady)
ExecReScan((PlanState *) node, NULL);
/*
* use IndexNext as access method
*/
return ExecScan(&node->ss, (ExecScanAccessMtd) IndexNext);
}
/* ----------------------------------------------------------------
* ExecIndexReScan(node)
*
* Recalculates the value of the scan keys whose value depends on
* information known at runtime and rescans the indexed relation.
* Updating the scan key was formerly done separately in
* ExecUpdateIndexScanKeys. Integrating it into ReScan makes
* rescans of indices and relations/general streams more uniform.
* ----------------------------------------------------------------
*/
void
ExecIndexReScan(IndexScanState *node, ExprContext *exprCtxt)
{
EState *estate;
ExprContext *econtext;
Index scanrelid;
initScanDesc(node);
estate = node->ss.ps.state;
econtext = node->iss_RuntimeContext; /* context for runtime keys */
scanrelid = ((IndexScan *) node->ss.ps.plan)->scan.scanrelid;
/*node->ss.ps.ps_TupFromTlist = false;*/
if (econtext)
{
/*
* If we are being passed an outer tuple, save it for runtime key
* calc. We also need to link it into the "regular" per-tuple
* econtext, so it can be used during indexqualorig evaluations.
*/
if (exprCtxt != NULL)
{
ExprContext *stdecontext;
econtext->ecxt_outertuple = exprCtxt->ecxt_outertuple;
stdecontext = node->ss.ps.ps_ExprContext;
stdecontext->ecxt_outertuple = exprCtxt->ecxt_outertuple;
}
/*
* Reset the runtime-key context so we don't leak memory as each outer
* tuple is scanned. Note this assumes that we will recalculate *all*
* runtime keys on each call.
*/
ResetExprContext(econtext);
}
/*
* If we are doing runtime key calculations (ie, the index keys depend on
* data from an outer scan), compute the new key values
*/
if (node->iss_NumRuntimeKeys != 0)
ExecIndexEvalRuntimeKeys(econtext,
node->iss_RuntimeKeys,
node->iss_NumRuntimeKeys);
node->iss_RuntimeKeysReady = true;
/* If this is re-scanning of PlanQual ... */
if (estate->es_evTuple != NULL &&
estate->es_evTuple[scanrelid - 1] != NULL)
{
estate->es_evTupleNull[scanrelid - 1] = false;
return;
}
/* reset index scan */
index_rescan(node->iss_ScanDesc, node->iss_ScanKeys);
Gpmon_M_Incr(GpmonPktFromIndexScanState(node), GPMON_INDEXSCAN_RESCAN);
CheckSendPlanStateGpmonPkt(&node->ss.ps);
}
/*
* ExecIndexEvalRuntimeKeys
* Evaluate any runtime key values, and update the scankeys.
*/
void
ExecIndexEvalRuntimeKeys(ExprContext *econtext,
IndexRuntimeKeyInfo *runtimeKeys, int numRuntimeKeys)
{
int j;
for (j = 0; j < numRuntimeKeys; j++)
{
ScanKey scan_key = runtimeKeys[j].scan_key;
ExprState *key_expr = runtimeKeys[j].key_expr;
Datum scanvalue;
bool isNull;
/*
* For each run-time key, extract the run-time expression and evaluate
* it with respect to the current outer tuple. We then stick the
* result into the proper scan key.
*
* Note: the result of the eval could be a pass-by-ref value that's
* stored in the outer scan's tuple, not in
* econtext->ecxt_per_tuple_memory. We assume that the outer tuple
* will stay put throughout our scan. If this is wrong, we could copy
* the result into our context explicitly, but I think that's not
* necessary...
*/
scanvalue = ExecEvalExprSwitchContext(key_expr,
econtext,
&isNull,
NULL);
scan_key->sk_argument = scanvalue;
if (isNull)
scan_key->sk_flags |= SK_ISNULL;
else
scan_key->sk_flags &= ~SK_ISNULL;
}
}
/*
* ExecIndexEvalArrayKeys
* Evaluate any array key values, and set up to iterate through arrays.
*
* Returns TRUE if there are array elements to consider; FALSE means there
* is at least one null or empty array, so no match is possible. On TRUE
* result, the scankeys are initialized with the first elements of the arrays.
*/
bool
ExecIndexEvalArrayKeys(ExprContext *econtext,
IndexArrayKeyInfo *arrayKeys, int numArrayKeys)
{
bool result = true;
int j;
MemoryContext oldContext;
/* We want to keep the arrays in per-tuple memory */
oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory);
for (j = 0; j < numArrayKeys; j++)
{
ScanKey scan_key = arrayKeys[j].scan_key;
ExprState *array_expr = arrayKeys[j].array_expr;
Datum arraydatum;
bool isNull;
ArrayType *arrayval;
int16 elmlen;
bool elmbyval;
char elmalign;
int num_elems;
Datum *elem_values;
bool *elem_nulls;
/*
* Compute and deconstruct the array expression. (Notes in
* ExecIndexEvalRuntimeKeys() apply here too.)
*/
arraydatum = ExecEvalExpr(array_expr,
econtext,
&isNull,
NULL);
if (isNull)
{
result = false;
break; /* no point in evaluating more */
}
arrayval = DatumGetArrayTypeP(arraydatum);
/* We could cache this data, but not clear it's worth it */
get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
&elmlen, &elmbyval, &elmalign);
deconstruct_array(arrayval,
ARR_ELEMTYPE(arrayval),
elmlen, elmbyval, elmalign,
&elem_values, &elem_nulls, &num_elems);
if (num_elems <= 0)
{
result = false;
break; /* no point in evaluating more */
}
/*
* Note: we expect the previous array data, if any, to be
* automatically freed by resetting the per-tuple context; hence no
* pfree's here.
*/
arrayKeys[j].elem_values = elem_values;
arrayKeys[j].elem_nulls = elem_nulls;
arrayKeys[j].num_elems = num_elems;
scan_key->sk_argument = elem_values[0];
if (elem_nulls[0])
scan_key->sk_flags |= SK_ISNULL;
else
scan_key->sk_flags &= ~SK_ISNULL;
arrayKeys[j].next_elem = 1;
}
MemoryContextSwitchTo(oldContext);
return result;
}
/*
* ExecIndexAdvanceArrayKeys
* Advance to the next set of array key values, if any.
*
* Returns TRUE if there is another set of values to consider, FALSE if not.
* On TRUE result, the scankeys are initialized with the next set of values.
*/
bool
ExecIndexAdvanceArrayKeys(IndexArrayKeyInfo *arrayKeys, int numArrayKeys)
{
bool found = false;
int j;
for (j = 0; j < numArrayKeys; j++)
{
ScanKey scan_key = arrayKeys[j].scan_key;
int next_elem = arrayKeys[j].next_elem;
int num_elems = arrayKeys[j].num_elems;
Datum *elem_values = arrayKeys[j].elem_values;
bool *elem_nulls = arrayKeys[j].elem_nulls;
if (next_elem >= num_elems)
{
next_elem = 0;
found = false; /* need to advance next array key */
}
else
found = true;
scan_key->sk_argument = elem_values[next_elem];
if (elem_nulls[next_elem])
scan_key->sk_flags |= SK_ISNULL;
else
scan_key->sk_flags &= ~SK_ISNULL;
arrayKeys[j].next_elem = next_elem + 1;
if (found)
break;
}
return found;
}
/* ----------------------------------------------------------------
* ExecEndIndexScan
* ----------------------------------------------------------------
*/
void
ExecEndIndexScan(IndexScanState *node)
{
Relation indexRelationDesc;
IndexScanDesc indexScanDesc;
Relation relation;
/*
* extract information from the node
*/
indexRelationDesc = node->iss_RelationDesc;
indexScanDesc = node->iss_ScanDesc;
relation = node->ss.ss_currentRelation;
/*
* Free the exprcontext(s) ... now dead code, see ExecFreeExprContext
*/
#ifdef NOT_USED
ExecFreeExprContext(&node->ss.ps);
if (node->iss_RuntimeContext)
FreeExprContext(node->iss_RuntimeContext);
#endif
/*
* clear out tuple table slots
*/
ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
ExecClearTuple(node->ss.ss_ScanTupleSlot);
/*
* close the index relation
*/
ExecEagerFreeIndexScan(node);
index_close(indexRelationDesc, NoLock);
/*
* close the heap relation.
*/
ExecCloseScanRelation(relation);
EndPlanStateGpmonPkt(&node->ss.ps);
}
/* ----------------------------------------------------------------
* ExecIndexMarkPos
* ----------------------------------------------------------------
*/
void
ExecIndexMarkPos(IndexScanState *node)
{
index_markpos(node->iss_ScanDesc);
}
/* ----------------------------------------------------------------
* ExecIndexRestrPos
* ----------------------------------------------------------------
*/
void
ExecIndexRestrPos(IndexScanState *node)
{
index_restrpos(node->iss_ScanDesc);
Gpmon_M_Incr(GpmonPktFromIndexScanState(node), GPMON_INDEXSCAN_RESTOREPOS);
CheckSendPlanStateGpmonPkt(&node->ss.ps);
}
/* ----------------------------------------------------------------
* ExecInitIndexScan
*
* Initializes the index scan's state information, creates
* scan keys, and opens the base and index relations.
*
* Note: index scans have 2 sets of state information because
* we have to keep track of the base relation and the
* index relation.
* ----------------------------------------------------------------
*/
IndexScanState *
ExecInitIndexScan(IndexScan *node, EState *estate, int eflags)
{
IndexScanState *indexstate;
Relation currentRelation;
bool relistarget;
/*
* create state structure
*/
indexstate = makeNode(IndexScanState);
indexstate->ss.ps.plan = (Plan *) node;
indexstate->ss.ps.state = estate;
/*
* Miscellaneous initialization
*
* create expression context for node
*/
ExecAssignExprContext(estate, &indexstate->ss.ps);
/*indexstate->ss.ps.ps_TupFromTlist = false;*/
/*
* initialize child expressions
*
* Note: we don't initialize all of the indexqual expression, only the
* sub-parts corresponding to runtime keys (see below). The indexqualorig
* expression is always initialized even though it will only be used in
* some uncommon cases --- would be nice to improve that. (Problem is
* that any SubPlans present in the expression must be found now...)
*/
indexstate->ss.ps.targetlist = (List *)
ExecInitExpr((Expr *) node->scan.plan.targetlist,
(PlanState *) indexstate);
indexstate->ss.ps.qual = (List *)
ExecInitExpr((Expr *) node->scan.plan.qual,
(PlanState *) indexstate);
indexstate->indexqualorig = (List *)
ExecInitExpr((Expr *) node->indexqualorig,
(PlanState *) indexstate);
#define INDEXSCAN_NSLOTS 2
/*
* tuple table initialization
*/
ExecInitResultTupleSlot(estate, &indexstate->ss.ps);
ExecInitScanTupleSlot(estate, &indexstate->ss);
/*
* open the base relation and acquire appropriate lock on it.
*/
currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid);
indexstate->ss.ss_currentRelation = currentRelation;
/*
* get the scan type from the relation descriptor.
*/
ExecAssignScanType(&indexstate->ss, RelationGetDescr(currentRelation));
/*
* Open the index relation.
*
* If the parent table is one of the target relations of the query, then
* InitPlan already opened and write-locked the index, so we can avoid
* taking another lock here. Otherwise we need a normal reader's lock.
*/
relistarget = ExecRelationIsTargetRelation(estate, node->scan.scanrelid);
indexstate->iss_RelationDesc = index_open(node->indexid,
relistarget ? NoLock : AccessShareLock);
/*
* build the index scan keys from the index qualification
*/
ExecIndexBuildScanKeys((PlanState *) indexstate,
indexstate->iss_RelationDesc,
node->indexqual,
node->indexstrategy,
node->indexsubtype,
&indexstate->iss_ScanKeys,
&indexstate->iss_NumScanKeys,
&indexstate->iss_RuntimeKeys,
&indexstate->iss_NumRuntimeKeys,
NULL, /* no ArrayKeys */
NULL);
/*
* If we have runtime keys, we need an ExprContext to evaluate them. The
* node's standard context won't do because we want to reset that context
* for every tuple. So, build another context just like the other one...
* -tgl 7/11/00
*/
if (indexstate->iss_NumRuntimeKeys != 0)
{
ExprContext *stdecontext = indexstate->ss.ps.ps_ExprContext;
ExecAssignExprContext(estate, &indexstate->ss.ps);
indexstate->iss_RuntimeContext = indexstate->ss.ps.ps_ExprContext;
indexstate->ss.ps.ps_ExprContext = stdecontext;
}
else
{
indexstate->iss_RuntimeContext = NULL;
}
/*
* Initialize index-specific scan state
*/
indexstate->iss_RuntimeKeysReady = false;
/*
* Initialize result tuple type and projection info.
*/
ExecAssignResultTypeFromTL(&indexstate->ss.ps);
ExecAssignScanProjectionInfo(&indexstate->ss);
initGpmonPktForIndexScan((Plan *)node, &indexstate->ss.ps.gpmon_pkt, estate);
/*
* If eflag contains EXEC_FLAG_REWIND or EXEC_FLAG_BACKWARD or EXEC_FLAG_MARK,
* then this node is not eager free safe.
*/
indexstate->ss.ps.delayEagerFree =
((eflags & (EXEC_FLAG_REWIND | EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)) != 0);
/*
* all done.
*/
return indexstate;
}
int
ExecCountSlotsIndexScan(IndexScan *node)
{
return ExecCountSlotsNode(outerPlan((Plan *) node)) +
ExecCountSlotsNode(innerPlan((Plan *) node)) + INDEXSCAN_NSLOTS;
}
void
initGpmonPktForIndexScan(Plan *planNode, gpmon_packet_t *gpmon_pkt, EState *estate)
{
Assert(planNode != NULL && gpmon_pkt != NULL && IsA(planNode, IndexScan));
{
char *relname = get_rel_name(((IndexScan *)planNode)->indexid);
Assert(GPMON_INDEXSCAN_TOTAL <= (int) GPMON_QEXEC_M_COUNT);
InitPlanNodeGpmonPkt(planNode, gpmon_pkt, estate, PMNT_IndexScan,
(int64)planNode->plan_rows,
relname);
if (relname)
pfree(relname);
}
}
void
ExecEagerFreeIndexScan(IndexScanState *node)
{
freeScanDesc(node);
}