blob: 608dd36893035a65ff0dd3e0dd5f0ca5dee03bb1 [file] [log] [blame]
/*-------------------------------------------------------------------------
*
* nodeTidscan.c
* Routines to support direct tid scans of relations
*
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/executor/nodeTidscan.c,v 1.51.2.1 2006/12/26 19:26:56 tgl Exp $
*
*-------------------------------------------------------------------------
*/
/*
* INTERFACE ROUTINES
*
* ExecTidScan scans a relation using tids
* ExecInitTidScan creates and initializes state info.
* ExecTidReScan rescans the tid relation.
* ExecEndTidScan releases all storage.
* ExecTidMarkPos marks scan position.
* ExecTidRestrPos restores scan position.
*/
#include "postgres.h"
#include "access/heapam.h"
#include "catalog/pg_type.h"
#include "cdb/cdbvars.h"
#include "executor/execdebug.h"
#include "executor/nodeTidscan.h"
#include "optimizer/clauses.h"
#include "utils/array.h"
#include "parser/parsetree.h"
#define IsCTIDVar(node) \
((node) != NULL && \
IsA((node), Var) && \
((Var *) (node))->varattno == SelfItemPointerAttributeNumber && \
((Var *) (node))->varlevelsup == 0)
static void TidListCreate(TidScanState *tidstate);
static int itemptr_comparator(const void *a, const void *b);
static TupleTableSlot *TidNext(TidScanState *node);
/*
* Compute the list of TIDs to be visited, by evaluating the expressions
* for them.
*
* (The result is actually an array, not a list.)
*/
static void
TidListCreate(TidScanState *tidstate)
{
List *evalList = tidstate->tss_tidquals;
ExprContext *econtext = tidstate->ss.ps.ps_ExprContext;
ItemPointerData *tidList;
int numAllocTids;
int numTids;
ListCell *l;
/*
* We initialize the array with enough slots for the case that all quals
* are simple OpExprs, or just one CurrentOfExpr. If there's any ScalarArrayOpExprs,
* we may have to enlarge the array.
*/
numAllocTids = list_length(evalList);
tidList = (ItemPointerData *)
palloc(numAllocTids * sizeof(ItemPointerData));
numTids = 0;
foreach(l, evalList)
{
ExprState *exstate = (ExprState *) lfirst(l);
Expr *expr = exstate->expr;
ItemPointer itemptr;
bool isNull;
if (is_opclause(expr))
{
FuncExprState *fexstate = (FuncExprState *) exstate;
Node *arg1;
Node *arg2;
arg1 = get_leftop(expr);
arg2 = get_rightop(expr);
if (IsCTIDVar(arg1))
exstate = (ExprState *) lsecond(fexstate->args);
else if (IsCTIDVar(arg2))
exstate = (ExprState *) linitial(fexstate->args);
else
elog(ERROR, "could not identify CTID variable");
itemptr = (ItemPointer)
DatumGetPointer(ExecEvalExprSwitchContext(exstate,
econtext,
&isNull,
NULL));
if (!isNull && ItemPointerIsValid(itemptr))
{
if (numTids >= numAllocTids)
{
numAllocTids *= 2;
tidList = (ItemPointerData *)
repalloc(tidList,
numAllocTids * sizeof(ItemPointerData));
}
tidList[numTids++] = *itemptr;
}
}
else if (expr && IsA(expr, ScalarArrayOpExpr))
{
ScalarArrayOpExprState *saexstate = (ScalarArrayOpExprState *) exstate;
Datum arraydatum;
ArrayType *itemarray;
Datum *ipdatums;
bool *ipnulls;
int ndatums;
int i;
exstate = (ExprState *) lsecond(saexstate->fxprstate.args);
arraydatum = ExecEvalExprSwitchContext(exstate,
econtext,
&isNull,
NULL);
if (isNull)
continue;
itemarray = DatumGetArrayTypeP(arraydatum);
deconstruct_array(itemarray,
TIDOID, SizeOfIptrData, false, 's',
&ipdatums, &ipnulls, &ndatums);
if (numTids + ndatums > numAllocTids)
{
numAllocTids = numTids + ndatums;
tidList = (ItemPointerData *)
repalloc(tidList,
numAllocTids * sizeof(ItemPointerData));
}
for (i = 0; i < ndatums; i++)
{
if (!ipnulls[i])
{
itemptr = (ItemPointer) DatumGetPointer(ipdatums[i]);
if (ItemPointerIsValid(itemptr))
tidList[numTids++] = *itemptr;
}
}
pfree(ipdatums);
pfree(ipnulls);
}
else if (expr && IsA(expr, CurrentOfExpr))
{
/*
* CURRENT OF must be the only expr. This allows us to avoid
* a repalloc of the tidList.
*/
Insist(list_length(evalList) == 1);
CurrentOfExpr *cexpr = (CurrentOfExpr *) expr;
tidList[numTids++] = cexpr->ctid;
}
else
elog(ERROR, "could not identify CTID expression");
}
/*
* Sort the array of TIDs into order, and eliminate duplicates.
* Eliminating duplicates is necessary since we want OR semantics across
* the list. Sorting makes it easier to detect duplicates, and as a bonus
* ensures that we will visit the heap in the most efficient way.
*/
if (numTids > 1)
{
int lastTid;
int i;
qsort((void *) tidList, numTids, sizeof(ItemPointerData),
itemptr_comparator);
lastTid = 0;
for (i = 1; i < numTids; i++)
{
if (!ItemPointerEquals(&tidList[lastTid], &tidList[i]))
tidList[++lastTid] = tidList[i];
}
numTids = lastTid + 1;
}
tidstate->tss_TidList = tidList;
tidstate->tss_NumTids = numTids;
tidstate->tss_TidPtr = -1;
}
/*
* qsort comparator for ItemPointerData items
*/
static int
itemptr_comparator(const void *a, const void *b)
{
const ItemPointerData *ipa = (const ItemPointerData *) a;
const ItemPointerData *ipb = (const ItemPointerData *) b;
BlockNumber ba = ItemPointerGetBlockNumber(ipa);
BlockNumber bb = ItemPointerGetBlockNumber(ipb);
OffsetNumber oa = ItemPointerGetOffsetNumber(ipa);
OffsetNumber ob = ItemPointerGetOffsetNumber(ipb);
if (ba < bb)
return -1;
if (ba > bb)
return 1;
if (oa < ob)
return -1;
if (oa > ob)
return 1;
return 0;
}
/* ----------------------------------------------------------------
* TidNext
*
* Retrieve a tuple from the TidScan node's currentRelation
* using the tids in the TidScanState information.
*
* ----------------------------------------------------------------
*/
static TupleTableSlot *
TidNext(TidScanState *node)
{
EState *estate;
ScanDirection direction;
Snapshot snapshot;
Relation heapRelation;
HeapTuple tuple;
TupleTableSlot *slot;
Index scanrelid;
Buffer buffer = InvalidBuffer;
ItemPointerData *tidList;
int numTids;
bool bBackward;
/*
* extract necessary information from tid scan node
*/
estate = node->ss.ps.state;
direction = estate->es_direction;
snapshot = estate->es_snapshot;
heapRelation = node->ss.ss_currentRelation;
slot = node->ss.ss_ScanTupleSlot;
scanrelid = ((TidScan *) node->ss.ps.plan)->scan.scanrelid;
/*
* Check if we are evaluating PlanQual for tuple of this relation.
* Additional checking is not good, but no other way for now. We could
* introduce new nodes for this case and handle TidScan --> NewNode
* switching in Init/ReScan plan...
*/
if (estate->es_evTuple != NULL &&
estate->es_evTuple[scanrelid - 1] != NULL)
{
if (estate->es_evTupleNull[scanrelid - 1])
return ExecClearTuple(slot);
/*
* XXX shouldn't we check here to make sure tuple matches TID list? In
* runtime-key case this is not certain, is it?
*/
ExecStoreGenericTuple(estate->es_evTuple[scanrelid - 1], slot, false);
/* Flag for the next call that no more tuples */
estate->es_evTupleNull[scanrelid - 1] = true;
if (!TupIsNull(slot))
{
Gpmon_M_Incr_Rows_Out(GpmonPktFromTidScanState(node));
CheckSendPlanStateGpmonPkt(&node->ss.ps);
}
return slot;
}
/*
* First time through, compute the list of TIDs to be visited
*/
if (node->tss_TidList == NULL)
TidListCreate(node);
tidList = node->tss_TidList;
numTids = node->tss_NumTids;
tuple = &(node->tss_htup);
/*
* Initialize or advance scan position, depending on direction.
*/
bBackward = ScanDirectionIsBackward(direction);
if (bBackward)
{
if (node->tss_TidPtr < 0)
{
/* initialize for backward scan */
node->tss_TidPtr = numTids - 1;
}
else
node->tss_TidPtr--;
}
else
{
if (node->tss_TidPtr < 0)
{
/* initialize for forward scan */
node->tss_TidPtr = 0;
}
else
node->tss_TidPtr++;
}
while (node->tss_TidPtr >= 0 && node->tss_TidPtr < numTids)
{
tuple->t_self = tidList[node->tss_TidPtr];
if (heap_fetch(heapRelation, snapshot, tuple, &buffer, false, NULL))
{
/*
* store the scanned tuple in the scan tuple slot of the scan
* state. Eventually we will only do this and not return a tuple.
* Note: we pass 'false' because tuples returned by amgetnext are
* pointers onto disk pages and were not created with palloc() and
* so should not be pfree()'d.
*/
ExecStoreHeapTuple(tuple, /* tuple to store */
slot, /* slot to store in */
buffer, /* buffer associated with tuple */
false); /* don't pfree */
/*
* At this point we have an extra pin on the buffer, because
* ExecStoreTuple incremented the pin count. Drop our local pin.
*/
ReleaseBuffer(buffer);
return slot;
}
/* Bad TID or failed snapshot qual; try next */
if (bBackward)
node->tss_TidPtr--;
else
node->tss_TidPtr++;
}
/*
* if we get here it means the tid scan failed so we are at the end of the
* scan..
*/
return ExecClearTuple(slot);
}
/* ----------------------------------------------------------------
* ExecTidScan(node)
*
* Scans the relation using tids and returns
* the next qualifying tuple in the direction specified.
* It calls ExecScan() and passes it the access methods which returns
* the next tuple using the tids.
*
* Conditions:
* -- the "cursor" maintained by the AMI is positioned at the tuple
* returned previously.
*
* Initial States:
* -- the relation indicated is opened for scanning so that the
* "cursor" is positioned before the first qualifying tuple.
* -- tidPtr is -1.
* ----------------------------------------------------------------
*/
TupleTableSlot *
ExecTidScan(TidScanState *node)
{
/*
* use TidNext as access method
*/
return ExecScan(&node->ss, (ExecScanAccessMtd) TidNext);
}
/* ----------------------------------------------------------------
* ExecTidReScan(node)
* ----------------------------------------------------------------
*/
void
ExecTidReScan(TidScanState *node, ExprContext *exprCtxt)
{
EState *estate;
Index scanrelid;
estate = node->ss.ps.state;
scanrelid = ((TidScan *) node->ss.ps.plan)->scan.scanrelid;
/*node->ss.ps.ps_TupFromTlist = false;*/
/* If we are being passed an outer tuple, save it for runtime key calc */
if (exprCtxt != NULL)
node->ss.ps.ps_ExprContext->ecxt_outertuple =
exprCtxt->ecxt_outertuple;
/* If this is re-scanning of PlanQual ... */
if (estate->es_evTuple != NULL &&
estate->es_evTuple[scanrelid - 1] != NULL)
{
estate->es_evTupleNull[scanrelid - 1] = false;
return;
}
if (node->tss_TidList)
pfree(node->tss_TidList);
node->tss_TidList = NULL;
node->tss_NumTids = 0;
node->tss_TidPtr = -1;
}
/* ----------------------------------------------------------------
* ExecEndTidScan
*
* Releases any storage allocated through C routines.
* Returns nothing.
* ----------------------------------------------------------------
*/
void
ExecEndTidScan(TidScanState *node)
{
/*
* Free the exprcontext
*/
ExecFreeExprContext(&node->ss.ps);
/*
* clear out tuple table slots
*/
ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
ExecClearTuple(node->ss.ss_ScanTupleSlot);
/*
* close the heap relation.
*/
ExecCloseScanRelation(node->ss.ss_currentRelation);
EndPlanStateGpmonPkt(&node->ss.ps);
}
/* ----------------------------------------------------------------
* ExecTidMarkPos
*
* Marks scan position by marking the current tid.
* Returns nothing.
* ----------------------------------------------------------------
*/
void
ExecTidMarkPos(TidScanState *node)
{
node->tss_MarkTidPtr = node->tss_TidPtr;
}
/* ----------------------------------------------------------------
* ExecTidRestrPos
*
* Restores scan position by restoring the current tid.
* Returns nothing.
*
* XXX Assumes previously marked scan position belongs to current tid
* ----------------------------------------------------------------
*/
void
ExecTidRestrPos(TidScanState *node)
{
node->tss_TidPtr = node->tss_MarkTidPtr;
}
/* ----------------------------------------------------------------
* ExecInitTidScan
*
* Initializes the tid scan's state information, creates
* scan keys, and opens the base and tid relations.
*
* Parameters:
* node: TidNode node produced by the planner.
* estate: the execution state initialized in InitPlan.
* ----------------------------------------------------------------
*/
TidScanState *
ExecInitTidScan(TidScan *node, EState *estate, int eflags)
{
TidScanState *tidstate;
Relation currentRelation;
/*
* create state structure
*/
tidstate = makeNode(TidScanState);
tidstate->ss.ps.plan = (Plan *) node;
tidstate->ss.ps.state = estate;
/*
* Miscellaneous initialization
*
* create expression context for node
*/
ExecAssignExprContext(estate, &tidstate->ss.ps);
/*tidstate->ss.ps.ps_TupFromTlist = false;*/
/*
* initialize child expressions
*/
tidstate->ss.ps.targetlist = (List *)
ExecInitExpr((Expr *) node->scan.plan.targetlist,
(PlanState *) tidstate);
tidstate->ss.ps.qual = (List *)
ExecInitExpr((Expr *) node->scan.plan.qual,
(PlanState *) tidstate);
tidstate->tss_tidquals = (List *)
ExecInitExpr((Expr *) node->tidquals,
(PlanState *) tidstate);
#define TIDSCAN_NSLOTS 2
/*
* tuple table initialization
*/
ExecInitResultTupleSlot(estate, &tidstate->ss.ps);
ExecInitScanTupleSlot(estate, &tidstate->ss);
/*
* mark tid list as not computed yet
*/
tidstate->tss_TidList = NULL;
tidstate->tss_NumTids = 0;
tidstate->tss_TidPtr = -1;
/*
* open the base relation and acquire appropriate lock on it.
*/
currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid);
tidstate->ss.ss_currentRelation = currentRelation;
/*
* get the scan type from the relation descriptor.
*/
ExecAssignScanType(&tidstate->ss, RelationGetDescr(currentRelation));
/*
* Initialize result tuple type and projection info.
*/
ExecAssignResultTypeFromTL(&tidstate->ss.ps);
ExecAssignScanProjectionInfo(&tidstate->ss);
initGpmonPktForTidScan((Plan *)node, &tidstate->ss.ps.gpmon_pkt, estate);
/*
* all done.
*/
return tidstate;
}
int
ExecCountSlotsTidScan(TidScan *node)
{
return ExecCountSlotsNode(outerPlan((Plan *) node)) +
ExecCountSlotsNode(innerPlan((Plan *) node)) + TIDSCAN_NSLOTS;
}
void
initGpmonPktForTidScan(Plan *planNode, gpmon_packet_t *gpmon_pkt, EState *estate)
{
Assert(planNode != NULL && gpmon_pkt != NULL && IsA(planNode, TidScan));
{
RangeTblEntry *rte = rt_fetch(((TidScan *)planNode)->scan.scanrelid,
estate->es_range_table);
char schema_rel_name[SCAN_REL_NAME_BUF_SIZE] = {0};
Assert(GPMON_TIDSCAN_TOTAL <= (int)GPMON_QEXEC_M_COUNT);
InitPlanNodeGpmonPkt(planNode, gpmon_pkt, estate, PMNT_TidScan,
(int64)planNode->plan_rows,
GetScanRelNameGpmon(rte->relid, schema_rel_name));
}
}