| /*------------------------------------------------------------------------- |
| * |
| * nodeIndexscan.c |
| * Routines to support indexed scans of relations |
| * |
| * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group |
| * Portions Copyright (c) 1994, Regents of the University of California |
| * |
| * |
| * IDENTIFICATION |
| * $PostgreSQL: pgsql/src/backend/executor/nodeIndexscan.c,v 1.117.2.1 2006/12/26 19:26:56 tgl Exp $ |
| * |
| *------------------------------------------------------------------------- |
| */ |
| /* |
| * INTERFACE ROUTINES |
| * ExecIndexScan scans a relation using indices |
| * ExecIndexNext using index to retrieve next tuple |
| * ExecInitIndexScan creates and initializes state info. |
| * ExecIndexReScan rescans the indexed relation. |
| * ExecEndIndexScan releases all storage. |
| * ExecIndexMarkPos marks scan position. |
| * ExecIndexRestrPos restores scan position. |
| */ |
| #include "postgres.h" |
| |
| #include "access/genam.h" |
| #include "access/nbtree.h" |
| #include "cdb/cdbvars.h" |
| #include "executor/execdebug.h" |
| #include "executor/nodeIndexscan.h" |
| #include "nodes/nodeFuncs.h" |
| #include "optimizer/clauses.h" |
| #include "utils/array.h" |
| #include "utils/lsyscache.h" |
| #include "utils/memutils.h" |
| |
| /* |
| * Initialize the index scan descriptor if it is not initialized. |
| */ |
| static inline void |
| initScanDesc(IndexScanState *indexstate) |
| { |
| Relation currentRelation = indexstate->ss.ss_currentRelation; |
| EState *estate = indexstate->ss.ps.state; |
| |
| if (indexstate->iss_ScanDesc == NULL) |
| { |
| /* |
| * Initialize scan descriptor. |
| */ |
| indexstate->iss_ScanDesc = index_beginscan(currentRelation, |
| indexstate->iss_RelationDesc, |
| estate->es_snapshot, |
| indexstate->iss_NumScanKeys, |
| indexstate->iss_ScanKeys); |
| } |
| } |
| |
| /* |
| * Free the index scan descriptor. |
| */ |
| static inline void |
| freeScanDesc(IndexScanState *indexstate) |
| { |
| if (indexstate->iss_ScanDesc != NULL) |
| { |
| index_endscan(indexstate->iss_ScanDesc); |
| indexstate->iss_ScanDesc = NULL; |
| } |
| } |
| |
| |
| /* ---------------------------------------------------------------- |
| * IndexNext |
| * |
| * Retrieve a tuple from the IndexScan node's currentRelation |
| * using the index specified in the IndexScanState information. |
| * ---------------------------------------------------------------- |
| */ |
| TupleTableSlot * |
| IndexNext(IndexScanState *node) |
| { |
| EState *estate; |
| ExprContext *econtext; |
| ScanDirection direction; |
| IndexScanDesc scandesc; |
| Index scanrelid; |
| HeapTuple tuple; |
| TupleTableSlot *slot; |
| |
| /* |
| * extract necessary information from index scan node |
| */ |
| estate = node->ss.ps.state; |
| direction = estate->es_direction; |
| |
| initScanDesc(node); |
| |
| /* flip direction if this is an overall backward scan */ |
| if (ScanDirectionIsBackward(((IndexScan *) node->ss.ps.plan)->indexorderdir)) |
| { |
| if (ScanDirectionIsForward(direction)) |
| direction = BackwardScanDirection; |
| else if (ScanDirectionIsBackward(direction)) |
| direction = ForwardScanDirection; |
| } |
| scandesc = node->iss_ScanDesc; |
| econtext = node->ss.ps.ps_ExprContext; |
| slot = node->ss.ss_ScanTupleSlot; |
| scanrelid = ((IndexScan *) node->ss.ps.plan)->scan.scanrelid; |
| |
| /* |
| * Check if we are evaluating PlanQual for tuple of this relation. |
| * Additional checking is not good, but no other way for now. We could |
| * introduce new nodes for this case and handle IndexScan --> NewNode |
| * switching in Init/ReScan plan... |
| */ |
| if (estate->es_evTuple != NULL && |
| estate->es_evTuple[scanrelid - 1] != NULL) |
| { |
| if (estate->es_evTupleNull[scanrelid - 1]) |
| { |
| if (!node->ss.ps.delayEagerFree) |
| { |
| ExecEagerFreeIndexScan(node); |
| } |
| |
| return ExecClearTuple(slot); |
| } |
| |
| ExecStoreGenericTuple(estate->es_evTuple[scanrelid - 1], slot, false); |
| |
| /* Does the tuple meet the indexqual condition? */ |
| econtext->ecxt_scantuple = slot; |
| |
| ResetExprContext(econtext); |
| |
| if (!ExecQual(node->indexqualorig, econtext, false)) |
| { |
| if (!node->ss.ps.delayEagerFree) |
| { |
| ExecEagerFreeIndexScan(node); |
| } |
| |
| ExecClearTuple(slot); /* would not be returned by scan */ |
| } |
| |
| /* Flag for the next call that no more tuples */ |
| estate->es_evTupleNull[scanrelid - 1] = true; |
| |
| Gpmon_M_Incr_Rows_Out(GpmonPktFromIndexScanState(node)); |
| CheckSendPlanStateGpmonPkt(&node->ss.ps); |
| return slot; |
| } |
| |
| /* |
| * ok, now that we have what we need, fetch the next tuple. |
| */ |
| if ((tuple = index_getnext(scandesc, direction)) != NULL) |
| { |
| /* |
| * Store the scanned tuple in the scan tuple slot of the scan state. |
| * Note: we pass 'false' because tuples returned by amgetnext are |
| * pointers onto disk pages and must not be pfree()'d. |
| */ |
| ExecStoreHeapTuple(tuple, /* tuple to store */ |
| slot, /* slot to store in */ |
| scandesc->xs_cbuf, /* buffer containing tuple */ |
| false); /* don't pfree */ |
| |
| Gpmon_M_Incr_Rows_Out(GpmonPktFromIndexScanState(node)); |
| CheckSendPlanStateGpmonPkt(&node->ss.ps); |
| return slot; |
| } |
| |
| if (!node->ss.ps.delayEagerFree) |
| { |
| ExecEagerFreeIndexScan(node); |
| } |
| |
| /* |
| * if we get here it means the index scan failed so we are at the end of |
| * the scan.. |
| */ |
| return ExecClearTuple(slot); |
| } |
| |
| /* ---------------------------------------------------------------- |
| * ExecIndexScan(node) |
| * ---------------------------------------------------------------- |
| */ |
| TupleTableSlot * |
| ExecIndexScan(IndexScanState *node) |
| { |
| /* |
| * If we have runtime keys and they've not already been set up, do it now. |
| */ |
| if (node->iss_NumRuntimeKeys != 0 && !node->iss_RuntimeKeysReady) |
| ExecReScan((PlanState *) node, NULL); |
| |
| /* |
| * use IndexNext as access method |
| */ |
| return ExecScan(&node->ss, (ExecScanAccessMtd) IndexNext); |
| } |
| |
| /* ---------------------------------------------------------------- |
| * ExecIndexReScan(node) |
| * |
| * Recalculates the value of the scan keys whose value depends on |
| * information known at runtime and rescans the indexed relation. |
| * Updating the scan key was formerly done separately in |
| * ExecUpdateIndexScanKeys. Integrating it into ReScan makes |
| * rescans of indices and relations/general streams more uniform. |
| * ---------------------------------------------------------------- |
| */ |
| void |
| ExecIndexReScan(IndexScanState *node, ExprContext *exprCtxt) |
| { |
| EState *estate; |
| ExprContext *econtext; |
| Index scanrelid; |
| |
| initScanDesc(node); |
| |
| estate = node->ss.ps.state; |
| econtext = node->iss_RuntimeContext; /* context for runtime keys */ |
| scanrelid = ((IndexScan *) node->ss.ps.plan)->scan.scanrelid; |
| |
| /*node->ss.ps.ps_TupFromTlist = false;*/ |
| |
| if (econtext) |
| { |
| /* |
| * If we are being passed an outer tuple, save it for runtime key |
| * calc. We also need to link it into the "regular" per-tuple |
| * econtext, so it can be used during indexqualorig evaluations. |
| */ |
| if (exprCtxt != NULL) |
| { |
| ExprContext *stdecontext; |
| |
| econtext->ecxt_outertuple = exprCtxt->ecxt_outertuple; |
| stdecontext = node->ss.ps.ps_ExprContext; |
| stdecontext->ecxt_outertuple = exprCtxt->ecxt_outertuple; |
| } |
| |
| /* |
| * Reset the runtime-key context so we don't leak memory as each outer |
| * tuple is scanned. Note this assumes that we will recalculate *all* |
| * runtime keys on each call. |
| */ |
| ResetExprContext(econtext); |
| } |
| |
| /* |
| * If we are doing runtime key calculations (ie, the index keys depend on |
| * data from an outer scan), compute the new key values |
| */ |
| if (node->iss_NumRuntimeKeys != 0) |
| ExecIndexEvalRuntimeKeys(econtext, |
| node->iss_RuntimeKeys, |
| node->iss_NumRuntimeKeys); |
| node->iss_RuntimeKeysReady = true; |
| |
| /* If this is re-scanning of PlanQual ... */ |
| if (estate->es_evTuple != NULL && |
| estate->es_evTuple[scanrelid - 1] != NULL) |
| { |
| estate->es_evTupleNull[scanrelid - 1] = false; |
| return; |
| } |
| |
| /* reset index scan */ |
| index_rescan(node->iss_ScanDesc, node->iss_ScanKeys); |
| |
| Gpmon_M_Incr(GpmonPktFromIndexScanState(node), GPMON_INDEXSCAN_RESCAN); |
| CheckSendPlanStateGpmonPkt(&node->ss.ps); |
| } |
| |
| |
| /* |
| * ExecIndexEvalRuntimeKeys |
| * Evaluate any runtime key values, and update the scankeys. |
| */ |
| void |
| ExecIndexEvalRuntimeKeys(ExprContext *econtext, |
| IndexRuntimeKeyInfo *runtimeKeys, int numRuntimeKeys) |
| { |
| int j; |
| |
| for (j = 0; j < numRuntimeKeys; j++) |
| { |
| ScanKey scan_key = runtimeKeys[j].scan_key; |
| ExprState *key_expr = runtimeKeys[j].key_expr; |
| Datum scanvalue; |
| bool isNull; |
| |
| /* |
| * For each run-time key, extract the run-time expression and evaluate |
| * it with respect to the current outer tuple. We then stick the |
| * result into the proper scan key. |
| * |
| * Note: the result of the eval could be a pass-by-ref value that's |
| * stored in the outer scan's tuple, not in |
| * econtext->ecxt_per_tuple_memory. We assume that the outer tuple |
| * will stay put throughout our scan. If this is wrong, we could copy |
| * the result into our context explicitly, but I think that's not |
| * necessary... |
| */ |
| scanvalue = ExecEvalExprSwitchContext(key_expr, |
| econtext, |
| &isNull, |
| NULL); |
| scan_key->sk_argument = scanvalue; |
| if (isNull) |
| scan_key->sk_flags |= SK_ISNULL; |
| else |
| scan_key->sk_flags &= ~SK_ISNULL; |
| } |
| } |
| |
| /* |
| * ExecIndexEvalArrayKeys |
| * Evaluate any array key values, and set up to iterate through arrays. |
| * |
| * Returns TRUE if there are array elements to consider; FALSE means there |
| * is at least one null or empty array, so no match is possible. On TRUE |
| * result, the scankeys are initialized with the first elements of the arrays. |
| */ |
| bool |
| ExecIndexEvalArrayKeys(ExprContext *econtext, |
| IndexArrayKeyInfo *arrayKeys, int numArrayKeys) |
| { |
| bool result = true; |
| int j; |
| MemoryContext oldContext; |
| |
| /* We want to keep the arrays in per-tuple memory */ |
| oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory); |
| |
| for (j = 0; j < numArrayKeys; j++) |
| { |
| ScanKey scan_key = arrayKeys[j].scan_key; |
| ExprState *array_expr = arrayKeys[j].array_expr; |
| Datum arraydatum; |
| bool isNull; |
| ArrayType *arrayval; |
| int16 elmlen; |
| bool elmbyval; |
| char elmalign; |
| int num_elems; |
| Datum *elem_values; |
| bool *elem_nulls; |
| |
| /* |
| * Compute and deconstruct the array expression. (Notes in |
| * ExecIndexEvalRuntimeKeys() apply here too.) |
| */ |
| arraydatum = ExecEvalExpr(array_expr, |
| econtext, |
| &isNull, |
| NULL); |
| if (isNull) |
| { |
| result = false; |
| break; /* no point in evaluating more */ |
| } |
| arrayval = DatumGetArrayTypeP(arraydatum); |
| /* We could cache this data, but not clear it's worth it */ |
| get_typlenbyvalalign(ARR_ELEMTYPE(arrayval), |
| &elmlen, &elmbyval, &elmalign); |
| deconstruct_array(arrayval, |
| ARR_ELEMTYPE(arrayval), |
| elmlen, elmbyval, elmalign, |
| &elem_values, &elem_nulls, &num_elems); |
| if (num_elems <= 0) |
| { |
| result = false; |
| break; /* no point in evaluating more */ |
| } |
| |
| /* |
| * Note: we expect the previous array data, if any, to be |
| * automatically freed by resetting the per-tuple context; hence no |
| * pfree's here. |
| */ |
| arrayKeys[j].elem_values = elem_values; |
| arrayKeys[j].elem_nulls = elem_nulls; |
| arrayKeys[j].num_elems = num_elems; |
| scan_key->sk_argument = elem_values[0]; |
| if (elem_nulls[0]) |
| scan_key->sk_flags |= SK_ISNULL; |
| else |
| scan_key->sk_flags &= ~SK_ISNULL; |
| arrayKeys[j].next_elem = 1; |
| } |
| |
| MemoryContextSwitchTo(oldContext); |
| |
| return result; |
| } |
| |
| /* |
| * ExecIndexAdvanceArrayKeys |
| * Advance to the next set of array key values, if any. |
| * |
| * Returns TRUE if there is another set of values to consider, FALSE if not. |
| * On TRUE result, the scankeys are initialized with the next set of values. |
| */ |
| bool |
| ExecIndexAdvanceArrayKeys(IndexArrayKeyInfo *arrayKeys, int numArrayKeys) |
| { |
| bool found = false; |
| int j; |
| |
| for (j = 0; j < numArrayKeys; j++) |
| { |
| ScanKey scan_key = arrayKeys[j].scan_key; |
| int next_elem = arrayKeys[j].next_elem; |
| int num_elems = arrayKeys[j].num_elems; |
| Datum *elem_values = arrayKeys[j].elem_values; |
| bool *elem_nulls = arrayKeys[j].elem_nulls; |
| |
| if (next_elem >= num_elems) |
| { |
| next_elem = 0; |
| found = false; /* need to advance next array key */ |
| } |
| else |
| found = true; |
| scan_key->sk_argument = elem_values[next_elem]; |
| if (elem_nulls[next_elem]) |
| scan_key->sk_flags |= SK_ISNULL; |
| else |
| scan_key->sk_flags &= ~SK_ISNULL; |
| arrayKeys[j].next_elem = next_elem + 1; |
| if (found) |
| break; |
| } |
| |
| return found; |
| } |
| |
| |
| /* ---------------------------------------------------------------- |
| * ExecEndIndexScan |
| * ---------------------------------------------------------------- |
| */ |
| void |
| ExecEndIndexScan(IndexScanState *node) |
| { |
| Relation indexRelationDesc; |
| IndexScanDesc indexScanDesc; |
| Relation relation; |
| |
| /* |
| * extract information from the node |
| */ |
| indexRelationDesc = node->iss_RelationDesc; |
| indexScanDesc = node->iss_ScanDesc; |
| relation = node->ss.ss_currentRelation; |
| |
| /* |
| * Free the exprcontext(s) ... now dead code, see ExecFreeExprContext |
| */ |
| #ifdef NOT_USED |
| ExecFreeExprContext(&node->ss.ps); |
| if (node->iss_RuntimeContext) |
| FreeExprContext(node->iss_RuntimeContext); |
| #endif |
| |
| /* |
| * clear out tuple table slots |
| */ |
| ExecClearTuple(node->ss.ps.ps_ResultTupleSlot); |
| ExecClearTuple(node->ss.ss_ScanTupleSlot); |
| |
| /* |
| * close the index relation |
| */ |
| ExecEagerFreeIndexScan(node); |
| index_close(indexRelationDesc, NoLock); |
| |
| /* |
| * close the heap relation. |
| */ |
| ExecCloseScanRelation(relation); |
| |
| EndPlanStateGpmonPkt(&node->ss.ps); |
| } |
| |
| /* ---------------------------------------------------------------- |
| * ExecIndexMarkPos |
| * ---------------------------------------------------------------- |
| */ |
| void |
| ExecIndexMarkPos(IndexScanState *node) |
| { |
| index_markpos(node->iss_ScanDesc); |
| } |
| |
| /* ---------------------------------------------------------------- |
| * ExecIndexRestrPos |
| * ---------------------------------------------------------------- |
| */ |
| void |
| ExecIndexRestrPos(IndexScanState *node) |
| { |
| index_restrpos(node->iss_ScanDesc); |
| Gpmon_M_Incr(GpmonPktFromIndexScanState(node), GPMON_INDEXSCAN_RESTOREPOS); |
| CheckSendPlanStateGpmonPkt(&node->ss.ps); |
| } |
| |
| /* ---------------------------------------------------------------- |
| * ExecInitIndexScan |
| * |
| * Initializes the index scan's state information, creates |
| * scan keys, and opens the base and index relations. |
| * |
| * Note: index scans have 2 sets of state information because |
| * we have to keep track of the base relation and the |
| * index relation. |
| * ---------------------------------------------------------------- |
| */ |
| IndexScanState * |
| ExecInitIndexScan(IndexScan *node, EState *estate, int eflags) |
| { |
| IndexScanState *indexstate; |
| Relation currentRelation; |
| bool relistarget; |
| |
| /* |
| * create state structure |
| */ |
| indexstate = makeNode(IndexScanState); |
| indexstate->ss.ps.plan = (Plan *) node; |
| indexstate->ss.ps.state = estate; |
| |
| /* |
| * Miscellaneous initialization |
| * |
| * create expression context for node |
| */ |
| ExecAssignExprContext(estate, &indexstate->ss.ps); |
| |
| /*indexstate->ss.ps.ps_TupFromTlist = false;*/ |
| |
| /* |
| * initialize child expressions |
| * |
| * Note: we don't initialize all of the indexqual expression, only the |
| * sub-parts corresponding to runtime keys (see below). The indexqualorig |
| * expression is always initialized even though it will only be used in |
| * some uncommon cases --- would be nice to improve that. (Problem is |
| * that any SubPlans present in the expression must be found now...) |
| */ |
| indexstate->ss.ps.targetlist = (List *) |
| ExecInitExpr((Expr *) node->scan.plan.targetlist, |
| (PlanState *) indexstate); |
| indexstate->ss.ps.qual = (List *) |
| ExecInitExpr((Expr *) node->scan.plan.qual, |
| (PlanState *) indexstate); |
| indexstate->indexqualorig = (List *) |
| ExecInitExpr((Expr *) node->indexqualorig, |
| (PlanState *) indexstate); |
| |
| #define INDEXSCAN_NSLOTS 2 |
| |
| /* |
| * tuple table initialization |
| */ |
| ExecInitResultTupleSlot(estate, &indexstate->ss.ps); |
| ExecInitScanTupleSlot(estate, &indexstate->ss); |
| |
| /* |
| * open the base relation and acquire appropriate lock on it. |
| */ |
| currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid); |
| |
| indexstate->ss.ss_currentRelation = currentRelation; |
| |
| /* |
| * get the scan type from the relation descriptor. |
| */ |
| ExecAssignScanType(&indexstate->ss, RelationGetDescr(currentRelation)); |
| |
| /* |
| * Open the index relation. |
| * |
| * If the parent table is one of the target relations of the query, then |
| * InitPlan already opened and write-locked the index, so we can avoid |
| * taking another lock here. Otherwise we need a normal reader's lock. |
| */ |
| relistarget = ExecRelationIsTargetRelation(estate, node->scan.scanrelid); |
| indexstate->iss_RelationDesc = index_open(node->indexid, |
| relistarget ? NoLock : AccessShareLock); |
| |
| /* |
| * build the index scan keys from the index qualification |
| */ |
| ExecIndexBuildScanKeys((PlanState *) indexstate, |
| indexstate->iss_RelationDesc, |
| node->indexqual, |
| node->indexstrategy, |
| node->indexsubtype, |
| &indexstate->iss_ScanKeys, |
| &indexstate->iss_NumScanKeys, |
| &indexstate->iss_RuntimeKeys, |
| &indexstate->iss_NumRuntimeKeys, |
| NULL, /* no ArrayKeys */ |
| NULL); |
| |
| /* |
| * If we have runtime keys, we need an ExprContext to evaluate them. The |
| * node's standard context won't do because we want to reset that context |
| * for every tuple. So, build another context just like the other one... |
| * -tgl 7/11/00 |
| */ |
| if (indexstate->iss_NumRuntimeKeys != 0) |
| { |
| ExprContext *stdecontext = indexstate->ss.ps.ps_ExprContext; |
| |
| ExecAssignExprContext(estate, &indexstate->ss.ps); |
| indexstate->iss_RuntimeContext = indexstate->ss.ps.ps_ExprContext; |
| indexstate->ss.ps.ps_ExprContext = stdecontext; |
| } |
| else |
| { |
| indexstate->iss_RuntimeContext = NULL; |
| } |
| |
| /* |
| * Initialize index-specific scan state |
| */ |
| indexstate->iss_RuntimeKeysReady = false; |
| |
| /* |
| * Initialize result tuple type and projection info. |
| */ |
| ExecAssignResultTypeFromTL(&indexstate->ss.ps); |
| ExecAssignScanProjectionInfo(&indexstate->ss); |
| |
| initGpmonPktForIndexScan((Plan *)node, &indexstate->ss.ps.gpmon_pkt, estate); |
| |
| /* |
| * If eflag contains EXEC_FLAG_REWIND or EXEC_FLAG_BACKWARD or EXEC_FLAG_MARK, |
| * then this node is not eager free safe. |
| */ |
| indexstate->ss.ps.delayEagerFree = |
| ((eflags & (EXEC_FLAG_REWIND | EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)) != 0); |
| |
| /* |
| * all done. |
| */ |
| return indexstate; |
| } |
| |
| int |
| ExecCountSlotsIndexScan(IndexScan *node) |
| { |
| return ExecCountSlotsNode(outerPlan((Plan *) node)) + |
| ExecCountSlotsNode(innerPlan((Plan *) node)) + INDEXSCAN_NSLOTS; |
| } |
| |
| |
| void |
| initGpmonPktForIndexScan(Plan *planNode, gpmon_packet_t *gpmon_pkt, EState *estate) |
| { |
| Assert(planNode != NULL && gpmon_pkt != NULL && IsA(planNode, IndexScan)); |
| |
| { |
| char *relname = get_rel_name(((IndexScan *)planNode)->indexid); |
| |
| Assert(GPMON_INDEXSCAN_TOTAL <= (int) GPMON_QEXEC_M_COUNT); |
| InitPlanNodeGpmonPkt(planNode, gpmon_pkt, estate, PMNT_IndexScan, |
| (int64)planNode->plan_rows, |
| relname); |
| if (relname) |
| pfree(relname); |
| } |
| } |
| |
| void |
| ExecEagerFreeIndexScan(IndexScanState *node) |
| { |
| freeScanDesc(node); |
| } |