| /*------------------------------------------------------------------------- |
| * |
| * nodeTidscan.c |
| * Routines to support direct tid scans of relations |
| * |
| * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group |
| * Portions Copyright (c) 1994, Regents of the University of California |
| * |
| * |
| * IDENTIFICATION |
| * $PostgreSQL: pgsql/src/backend/executor/nodeTidscan.c,v 1.51.2.1 2006/12/26 19:26:56 tgl Exp $ |
| * |
| *------------------------------------------------------------------------- |
| */ |
| /* |
| * INTERFACE ROUTINES |
| * |
| * ExecTidScan scans a relation using tids |
| * ExecInitTidScan creates and initializes state info. |
| * ExecTidReScan rescans the tid relation. |
| * ExecEndTidScan releases all storage. |
| * ExecTidMarkPos marks scan position. |
| * ExecTidRestrPos restores scan position. |
| */ |
| #include "postgres.h" |
| |
| #include "access/heapam.h" |
| #include "catalog/pg_type.h" |
| #include "cdb/cdbvars.h" |
| #include "executor/execdebug.h" |
| #include "executor/nodeTidscan.h" |
| #include "optimizer/clauses.h" |
| #include "utils/array.h" |
| #include "parser/parsetree.h" |
| |
| |
| #define IsCTIDVar(node) \ |
| ((node) != NULL && \ |
| IsA((node), Var) && \ |
| ((Var *) (node))->varattno == SelfItemPointerAttributeNumber && \ |
| ((Var *) (node))->varlevelsup == 0) |
| |
| static void TidListCreate(TidScanState *tidstate); |
| static int itemptr_comparator(const void *a, const void *b); |
| static TupleTableSlot *TidNext(TidScanState *node); |
| |
| |
| /* |
| * Compute the list of TIDs to be visited, by evaluating the expressions |
| * for them. |
| * |
| * (The result is actually an array, not a list.) |
| */ |
| static void |
| TidListCreate(TidScanState *tidstate) |
| { |
| List *evalList = tidstate->tss_tidquals; |
| ExprContext *econtext = tidstate->ss.ps.ps_ExprContext; |
| ItemPointerData *tidList; |
| int numAllocTids; |
| int numTids; |
| ListCell *l; |
| |
| /* |
| * We initialize the array with enough slots for the case that all quals |
| * are simple OpExprs, or just one CurrentOfExpr. If there's any ScalarArrayOpExprs, |
| * we may have to enlarge the array. |
| */ |
| numAllocTids = list_length(evalList); |
| tidList = (ItemPointerData *) |
| palloc(numAllocTids * sizeof(ItemPointerData)); |
| numTids = 0; |
| |
| foreach(l, evalList) |
| { |
| ExprState *exstate = (ExprState *) lfirst(l); |
| Expr *expr = exstate->expr; |
| ItemPointer itemptr; |
| bool isNull; |
| |
| if (is_opclause(expr)) |
| { |
| FuncExprState *fexstate = (FuncExprState *) exstate; |
| Node *arg1; |
| Node *arg2; |
| |
| arg1 = get_leftop(expr); |
| arg2 = get_rightop(expr); |
| if (IsCTIDVar(arg1)) |
| exstate = (ExprState *) lsecond(fexstate->args); |
| else if (IsCTIDVar(arg2)) |
| exstate = (ExprState *) linitial(fexstate->args); |
| else |
| elog(ERROR, "could not identify CTID variable"); |
| |
| itemptr = (ItemPointer) |
| DatumGetPointer(ExecEvalExprSwitchContext(exstate, |
| econtext, |
| &isNull, |
| NULL)); |
| if (!isNull && ItemPointerIsValid(itemptr)) |
| { |
| if (numTids >= numAllocTids) |
| { |
| numAllocTids *= 2; |
| tidList = (ItemPointerData *) |
| repalloc(tidList, |
| numAllocTids * sizeof(ItemPointerData)); |
| } |
| tidList[numTids++] = *itemptr; |
| } |
| } |
| else if (expr && IsA(expr, ScalarArrayOpExpr)) |
| { |
| ScalarArrayOpExprState *saexstate = (ScalarArrayOpExprState *) exstate; |
| Datum arraydatum; |
| ArrayType *itemarray; |
| Datum *ipdatums; |
| bool *ipnulls; |
| int ndatums; |
| int i; |
| |
| exstate = (ExprState *) lsecond(saexstate->fxprstate.args); |
| arraydatum = ExecEvalExprSwitchContext(exstate, |
| econtext, |
| &isNull, |
| NULL); |
| if (isNull) |
| continue; |
| itemarray = DatumGetArrayTypeP(arraydatum); |
| deconstruct_array(itemarray, |
| TIDOID, SizeOfIptrData, false, 's', |
| &ipdatums, &ipnulls, &ndatums); |
| if (numTids + ndatums > numAllocTids) |
| { |
| numAllocTids = numTids + ndatums; |
| tidList = (ItemPointerData *) |
| repalloc(tidList, |
| numAllocTids * sizeof(ItemPointerData)); |
| } |
| for (i = 0; i < ndatums; i++) |
| { |
| if (!ipnulls[i]) |
| { |
| itemptr = (ItemPointer) DatumGetPointer(ipdatums[i]); |
| if (ItemPointerIsValid(itemptr)) |
| tidList[numTids++] = *itemptr; |
| } |
| } |
| pfree(ipdatums); |
| pfree(ipnulls); |
| } |
| else if (expr && IsA(expr, CurrentOfExpr)) |
| { |
| /* |
| * CURRENT OF must be the only expr. This allows us to avoid |
| * a repalloc of the tidList. |
| */ |
| Insist(list_length(evalList) == 1); |
| CurrentOfExpr *cexpr = (CurrentOfExpr *) expr; |
| tidList[numTids++] = cexpr->ctid; |
| } |
| else |
| elog(ERROR, "could not identify CTID expression"); |
| } |
| |
| /* |
| * Sort the array of TIDs into order, and eliminate duplicates. |
| * Eliminating duplicates is necessary since we want OR semantics across |
| * the list. Sorting makes it easier to detect duplicates, and as a bonus |
| * ensures that we will visit the heap in the most efficient way. |
| */ |
| if (numTids > 1) |
| { |
| int lastTid; |
| int i; |
| |
| qsort((void *) tidList, numTids, sizeof(ItemPointerData), |
| itemptr_comparator); |
| lastTid = 0; |
| for (i = 1; i < numTids; i++) |
| { |
| if (!ItemPointerEquals(&tidList[lastTid], &tidList[i])) |
| tidList[++lastTid] = tidList[i]; |
| } |
| numTids = lastTid + 1; |
| } |
| |
| tidstate->tss_TidList = tidList; |
| tidstate->tss_NumTids = numTids; |
| tidstate->tss_TidPtr = -1; |
| } |
| |
| /* |
| * qsort comparator for ItemPointerData items |
| */ |
| static int |
| itemptr_comparator(const void *a, const void *b) |
| { |
| const ItemPointerData *ipa = (const ItemPointerData *) a; |
| const ItemPointerData *ipb = (const ItemPointerData *) b; |
| BlockNumber ba = ItemPointerGetBlockNumber(ipa); |
| BlockNumber bb = ItemPointerGetBlockNumber(ipb); |
| OffsetNumber oa = ItemPointerGetOffsetNumber(ipa); |
| OffsetNumber ob = ItemPointerGetOffsetNumber(ipb); |
| |
| if (ba < bb) |
| return -1; |
| if (ba > bb) |
| return 1; |
| if (oa < ob) |
| return -1; |
| if (oa > ob) |
| return 1; |
| return 0; |
| } |
| |
| /* ---------------------------------------------------------------- |
| * TidNext |
| * |
| * Retrieve a tuple from the TidScan node's currentRelation |
| * using the tids in the TidScanState information. |
| * |
| * ---------------------------------------------------------------- |
| */ |
| static TupleTableSlot * |
| TidNext(TidScanState *node) |
| { |
| EState *estate; |
| ScanDirection direction; |
| Snapshot snapshot; |
| Relation heapRelation; |
| HeapTuple tuple; |
| TupleTableSlot *slot; |
| Index scanrelid; |
| Buffer buffer = InvalidBuffer; |
| ItemPointerData *tidList; |
| int numTids; |
| bool bBackward; |
| |
| /* |
| * extract necessary information from tid scan node |
| */ |
| estate = node->ss.ps.state; |
| direction = estate->es_direction; |
| snapshot = estate->es_snapshot; |
| heapRelation = node->ss.ss_currentRelation; |
| slot = node->ss.ss_ScanTupleSlot; |
| scanrelid = ((TidScan *) node->ss.ps.plan)->scan.scanrelid; |
| |
| /* |
| * Check if we are evaluating PlanQual for tuple of this relation. |
| * Additional checking is not good, but no other way for now. We could |
| * introduce new nodes for this case and handle TidScan --> NewNode |
| * switching in Init/ReScan plan... |
| */ |
| if (estate->es_evTuple != NULL && |
| estate->es_evTuple[scanrelid - 1] != NULL) |
| { |
| if (estate->es_evTupleNull[scanrelid - 1]) |
| return ExecClearTuple(slot); |
| |
| /* |
| * XXX shouldn't we check here to make sure tuple matches TID list? In |
| * runtime-key case this is not certain, is it? |
| */ |
| |
| ExecStoreGenericTuple(estate->es_evTuple[scanrelid - 1], slot, false); |
| |
| /* Flag for the next call that no more tuples */ |
| estate->es_evTupleNull[scanrelid - 1] = true; |
| if (!TupIsNull(slot)) |
| { |
| Gpmon_M_Incr_Rows_Out(GpmonPktFromTidScanState(node)); |
| CheckSendPlanStateGpmonPkt(&node->ss.ps); |
| } |
| return slot; |
| } |
| |
| /* |
| * First time through, compute the list of TIDs to be visited |
| */ |
| if (node->tss_TidList == NULL) |
| TidListCreate(node); |
| |
| tidList = node->tss_TidList; |
| numTids = node->tss_NumTids; |
| |
| tuple = &(node->tss_htup); |
| |
| /* |
| * Initialize or advance scan position, depending on direction. |
| */ |
| bBackward = ScanDirectionIsBackward(direction); |
| if (bBackward) |
| { |
| if (node->tss_TidPtr < 0) |
| { |
| /* initialize for backward scan */ |
| node->tss_TidPtr = numTids - 1; |
| } |
| else |
| node->tss_TidPtr--; |
| } |
| else |
| { |
| if (node->tss_TidPtr < 0) |
| { |
| /* initialize for forward scan */ |
| node->tss_TidPtr = 0; |
| } |
| else |
| node->tss_TidPtr++; |
| } |
| |
| while (node->tss_TidPtr >= 0 && node->tss_TidPtr < numTids) |
| { |
| tuple->t_self = tidList[node->tss_TidPtr]; |
| if (heap_fetch(heapRelation, snapshot, tuple, &buffer, false, NULL)) |
| { |
| /* |
| * store the scanned tuple in the scan tuple slot of the scan |
| * state. Eventually we will only do this and not return a tuple. |
| * Note: we pass 'false' because tuples returned by amgetnext are |
| * pointers onto disk pages and were not created with palloc() and |
| * so should not be pfree()'d. |
| */ |
| ExecStoreHeapTuple(tuple, /* tuple to store */ |
| slot, /* slot to store in */ |
| buffer, /* buffer associated with tuple */ |
| false); /* don't pfree */ |
| |
| /* |
| * At this point we have an extra pin on the buffer, because |
| * ExecStoreTuple incremented the pin count. Drop our local pin. |
| */ |
| ReleaseBuffer(buffer); |
| |
| return slot; |
| } |
| /* Bad TID or failed snapshot qual; try next */ |
| if (bBackward) |
| node->tss_TidPtr--; |
| else |
| node->tss_TidPtr++; |
| } |
| |
| /* |
| * if we get here it means the tid scan failed so we are at the end of the |
| * scan.. |
| */ |
| return ExecClearTuple(slot); |
| } |
| |
| /* ---------------------------------------------------------------- |
| * ExecTidScan(node) |
| * |
| * Scans the relation using tids and returns |
| * the next qualifying tuple in the direction specified. |
| * It calls ExecScan() and passes it the access methods which returns |
| * the next tuple using the tids. |
| * |
| * Conditions: |
| * -- the "cursor" maintained by the AMI is positioned at the tuple |
| * returned previously. |
| * |
| * Initial States: |
| * -- the relation indicated is opened for scanning so that the |
| * "cursor" is positioned before the first qualifying tuple. |
| * -- tidPtr is -1. |
| * ---------------------------------------------------------------- |
| */ |
| TupleTableSlot * |
| ExecTidScan(TidScanState *node) |
| { |
| /* |
| * use TidNext as access method |
| */ |
| return ExecScan(&node->ss, (ExecScanAccessMtd) TidNext); |
| } |
| |
| /* ---------------------------------------------------------------- |
| * ExecTidReScan(node) |
| * ---------------------------------------------------------------- |
| */ |
| void |
| ExecTidReScan(TidScanState *node, ExprContext *exprCtxt) |
| { |
| EState *estate; |
| Index scanrelid; |
| |
| estate = node->ss.ps.state; |
| scanrelid = ((TidScan *) node->ss.ps.plan)->scan.scanrelid; |
| |
| /*node->ss.ps.ps_TupFromTlist = false;*/ |
| |
| /* If we are being passed an outer tuple, save it for runtime key calc */ |
| if (exprCtxt != NULL) |
| node->ss.ps.ps_ExprContext->ecxt_outertuple = |
| exprCtxt->ecxt_outertuple; |
| |
| /* If this is re-scanning of PlanQual ... */ |
| if (estate->es_evTuple != NULL && |
| estate->es_evTuple[scanrelid - 1] != NULL) |
| { |
| estate->es_evTupleNull[scanrelid - 1] = false; |
| return; |
| } |
| |
| if (node->tss_TidList) |
| pfree(node->tss_TidList); |
| node->tss_TidList = NULL; |
| node->tss_NumTids = 0; |
| node->tss_TidPtr = -1; |
| } |
| |
| /* ---------------------------------------------------------------- |
| * ExecEndTidScan |
| * |
| * Releases any storage allocated through C routines. |
| * Returns nothing. |
| * ---------------------------------------------------------------- |
| */ |
| void |
| ExecEndTidScan(TidScanState *node) |
| { |
| /* |
| * Free the exprcontext |
| */ |
| ExecFreeExprContext(&node->ss.ps); |
| |
| /* |
| * clear out tuple table slots |
| */ |
| ExecClearTuple(node->ss.ps.ps_ResultTupleSlot); |
| ExecClearTuple(node->ss.ss_ScanTupleSlot); |
| |
| /* |
| * close the heap relation. |
| */ |
| ExecCloseScanRelation(node->ss.ss_currentRelation); |
| |
| EndPlanStateGpmonPkt(&node->ss.ps); |
| } |
| |
| /* ---------------------------------------------------------------- |
| * ExecTidMarkPos |
| * |
| * Marks scan position by marking the current tid. |
| * Returns nothing. |
| * ---------------------------------------------------------------- |
| */ |
| void |
| ExecTidMarkPos(TidScanState *node) |
| { |
| node->tss_MarkTidPtr = node->tss_TidPtr; |
| } |
| |
| /* ---------------------------------------------------------------- |
| * ExecTidRestrPos |
| * |
| * Restores scan position by restoring the current tid. |
| * Returns nothing. |
| * |
| * XXX Assumes previously marked scan position belongs to current tid |
| * ---------------------------------------------------------------- |
| */ |
| void |
| ExecTidRestrPos(TidScanState *node) |
| { |
| node->tss_TidPtr = node->tss_MarkTidPtr; |
| } |
| |
| /* ---------------------------------------------------------------- |
| * ExecInitTidScan |
| * |
| * Initializes the tid scan's state information, creates |
| * scan keys, and opens the base and tid relations. |
| * |
| * Parameters: |
| * node: TidNode node produced by the planner. |
| * estate: the execution state initialized in InitPlan. |
| * ---------------------------------------------------------------- |
| */ |
| TidScanState * |
| ExecInitTidScan(TidScan *node, EState *estate, int eflags) |
| { |
| TidScanState *tidstate; |
| Relation currentRelation; |
| |
| /* |
| * create state structure |
| */ |
| tidstate = makeNode(TidScanState); |
| tidstate->ss.ps.plan = (Plan *) node; |
| tidstate->ss.ps.state = estate; |
| |
| /* |
| * Miscellaneous initialization |
| * |
| * create expression context for node |
| */ |
| ExecAssignExprContext(estate, &tidstate->ss.ps); |
| |
| /*tidstate->ss.ps.ps_TupFromTlist = false;*/ |
| |
| /* |
| * initialize child expressions |
| */ |
| tidstate->ss.ps.targetlist = (List *) |
| ExecInitExpr((Expr *) node->scan.plan.targetlist, |
| (PlanState *) tidstate); |
| tidstate->ss.ps.qual = (List *) |
| ExecInitExpr((Expr *) node->scan.plan.qual, |
| (PlanState *) tidstate); |
| |
| tidstate->tss_tidquals = (List *) |
| ExecInitExpr((Expr *) node->tidquals, |
| (PlanState *) tidstate); |
| |
| #define TIDSCAN_NSLOTS 2 |
| |
| /* |
| * tuple table initialization |
| */ |
| ExecInitResultTupleSlot(estate, &tidstate->ss.ps); |
| ExecInitScanTupleSlot(estate, &tidstate->ss); |
| |
| /* |
| * mark tid list as not computed yet |
| */ |
| tidstate->tss_TidList = NULL; |
| tidstate->tss_NumTids = 0; |
| tidstate->tss_TidPtr = -1; |
| |
| /* |
| * open the base relation and acquire appropriate lock on it. |
| */ |
| currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid); |
| |
| tidstate->ss.ss_currentRelation = currentRelation; |
| |
| /* |
| * get the scan type from the relation descriptor. |
| */ |
| ExecAssignScanType(&tidstate->ss, RelationGetDescr(currentRelation)); |
| |
| /* |
| * Initialize result tuple type and projection info. |
| */ |
| ExecAssignResultTypeFromTL(&tidstate->ss.ps); |
| ExecAssignScanProjectionInfo(&tidstate->ss); |
| |
| initGpmonPktForTidScan((Plan *)node, &tidstate->ss.ps.gpmon_pkt, estate); |
| |
| /* |
| * all done. |
| */ |
| return tidstate; |
| } |
| |
| int |
| ExecCountSlotsTidScan(TidScan *node) |
| { |
| return ExecCountSlotsNode(outerPlan((Plan *) node)) + |
| ExecCountSlotsNode(innerPlan((Plan *) node)) + TIDSCAN_NSLOTS; |
| } |
| |
| void |
| initGpmonPktForTidScan(Plan *planNode, gpmon_packet_t *gpmon_pkt, EState *estate) |
| { |
| Assert(planNode != NULL && gpmon_pkt != NULL && IsA(planNode, TidScan)); |
| |
| { |
| RangeTblEntry *rte = rt_fetch(((TidScan *)planNode)->scan.scanrelid, |
| estate->es_range_table); |
| char schema_rel_name[SCAN_REL_NAME_BUF_SIZE] = {0}; |
| |
| Assert(GPMON_TIDSCAN_TOTAL <= (int)GPMON_QEXEC_M_COUNT); |
| InitPlanNodeGpmonPkt(planNode, gpmon_pkt, estate, PMNT_TidScan, |
| (int64)planNode->plan_rows, |
| GetScanRelNameGpmon(rte->relid, schema_rel_name)); |
| } |
| } |