blob: 59ba4c1c4f16603ebf3c90f528f90237d7a41179 [file] [log] [blame]
/*
* execHeapScan.c
* Support routines for scanning Heap tables.
*
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*/
#include "postgres.h"
#include "executor/executor.h"
#include "nodes/execnodes.h"
#include "access/heapam.h"
#include "access/relscan.h"
#include "access/transam.h"
#include "executor/execdebug.h"
#include "pgstat.h"
#include "storage/bufmgr.h"
#include "utils/memutils.h"
#include "miscadmin.h"
#include "parser/parsetree.h"
#include "cdb/cdbvars.h" /* gp_select_invisible */
#include "nodes/tidbitmap.h"
static void bitgetpage(HeapScanDesc scan, TBMIterateResult *tbmres);
/*
* Prepares for a new heap scan.
*/
void
BitmapHeapScanBegin(ScanState *scanState)
{
BitmapTableScanState *node = (BitmapTableScanState *)scanState;
Relation currentRelation = node->ss.ss_currentRelation;
EState *estate = node->ss.ps.state;
Assert(node->scanDesc == NULL);
/*
* Even though we aren't going to do a conventional seqscan, it is useful
* to create a HeapScanDesc --- this checks the relation size and sets up
* statistical infrastructure for us.
*/
node->scanDesc = heap_beginscan(currentRelation,
estate->es_snapshot,
0,
NULL);
/*
* One problem is that heap_beginscan counts a "sequential scan" start,
* when we actually aren't doing any such thing. Reverse out the added
* scan count. (Eventually we may want to count bitmap scans separately.)
*/
pgstat_discount_heap_scan(currentRelation);
/*
* Heap always needs rechecking each tuple because of potential
* visibility issue (we don't store MVCC info in the index).
*/
node->recheckTuples = true;
}
/*
* Cleans up after the scanning is done.
*/
void
BitmapHeapScanEnd(ScanState *scanState)
{
BitmapTableScanState *node = (BitmapTableScanState *)scanState;
Assert(node->ss.scan_state == SCAN_SCAN);
heap_endscan((HeapScanDesc)node->scanDesc);
node->scanDesc = NULL;
if (NULL != node->iterator)
{
pfree(node->iterator);
node->iterator = NULL;
}
}
/*
* Returns the next matching tuple.
*/
TupleTableSlot *
BitmapHeapScanNext(ScanState *scanState)
{
BitmapTableScanState *node = (BitmapTableScanState *)scanState;
Assert((node->ss.scan_state & SCAN_SCAN) != 0);
/*
* extract necessary information from index scan node
*/
TupleTableSlot *slot = node->ss.ss_ScanTupleSlot;
HeapScanDesc scan = node->scanDesc;
TBMIterateResult *tbmres = (TBMIterateResult *)node->tbmres;
Assert(tbmres != NULL && tbmres->ntuples != 0);
Assert(node->needNewBitmapPage == false);
for (;;)
{
CHECK_FOR_INTERRUPTS();
if (node->iterator == NULL)
{
/*
* Fetch the current heap page and identify candidate tuples.
*/
bitgetpage(scan, tbmres);
/*
* Set rs_cindex to first slot to examine
*/
scan->rs_cindex = 0;
/*
* The nullity of the iterator is used to check if
* we need a new iterator to process a new bitmap page.
* Note: the bitmap page is provided by BitmapTableScan.
* This iterator is supposed to maintain the cursor position
* in the heap page that it is scanning. However, for heap
* tables we already have such cursor state as part of ScanState,
* and so, we just use a dummy allocation here to indicate
* ourselves that we have finished initialization for processing
* a new bitmap page.
*/
node->iterator = palloc(0);
}
else
{
/*
* Continuing in previously obtained page; advance rs_cindex
*/
scan->rs_cindex++;
}
/*
* If we reach the end of the relation or if we are out of range or
* nothing more to look at on this page, then request a new bitmap page.
*/
if (tbmres->blockno >= scan->rs_nblocks || scan->rs_cindex < 0 ||
scan->rs_cindex >= scan->rs_ntuples)
{
Assert(NULL != node->iterator);
pfree(node->iterator);
node->iterator = NULL;
node->needNewBitmapPage = true;
return ExecClearTuple(slot);
}
/*
* Okay to fetch the tuple
*/
OffsetNumber targoffset = scan->rs_vistuples[scan->rs_cindex];
Page dp = (Page) BufferGetPage(scan->rs_cbuf);
ItemId lp = PageGetItemId(dp, targoffset);
Assert(ItemIdIsUsed(lp));
scan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
scan->rs_ctup.t_len = ItemIdGetLength(lp);
ItemPointerSet(&scan->rs_ctup.t_self, tbmres->blockno, targoffset);
pgstat_count_heap_fetch(scan->rs_rd);
/*
* Set up the result slot to point to this tuple. Note that the slot
* acquires a pin on the buffer.
*/
ExecStoreHeapTuple(&scan->rs_ctup,
slot,
scan->rs_cbuf,
false);
if (!BitmapTableScanRecheckTuple(node, slot))
{
ExecClearTuple(slot);
continue;
}
return slot;
}
/*
* We should never reach here as the termination is handled
* from nodeBitmapTableScan.
*/
Assert(false);
return NULL;
}
/*
* Prepares for a re-scan.
*/
void
BitmapHeapScanReScan(ScanState *scanState)
{
BitmapTableScanState *node = (BitmapTableScanState *)scanState;
Assert(node->scanDesc != NULL);
/* rescan to release any page pin */
heap_rescan(node->scanDesc, NULL);
/* undo bogus "seq scan" count (see notes in ExecInitBitmapHeapScan) */
pgstat_discount_heap_scan(node->ss.ss_currentRelation);
}
/*
* This routine reads and pins the specified page of the relation, then
* builds an array indicating which tuples on the page are both potentially
* interesting according to the bitmap, and visible according to the snapshot.
*/
static void
bitgetpage(HeapScanDesc scan, TBMIterateResult *tbmres)
{
MIRROREDLOCK_BUFMGR_DECLARE;
BlockNumber page = tbmres->blockno;
Buffer buffer;
Snapshot snapshot;
Page dp;
int ntup;
int curslot;
int minslot;
int maxslot;
int maxoff;
/*
* Acquire pin on the target heap page, trading in any pin we held before.
*/
Assert(page < scan->rs_nblocks);
// -------- MirroredLock ----------
MIRROREDLOCK_BUFMGR_LOCK;
scan->rs_cbuf = ReleaseAndReadBuffer(scan->rs_cbuf,
scan->rs_rd,
page);
buffer = scan->rs_cbuf;
snapshot = scan->rs_snapshot;
/*
* We must hold share lock on the buffer content while examining tuple
* visibility. Afterwards, however, the tuples we have found to be
* visible are guaranteed good as long as we hold the buffer pin.
*/
LockBuffer(buffer, BUFFER_LOCK_SHARE);
dp = (Page) BufferGetPage(buffer);
maxoff = PageGetMaxOffsetNumber(dp);
/*
* Determine how many entries we need to look at on this page. If the
* bitmap is lossy then we need to look at each physical item pointer;
* otherwise we just look through the offsets listed in tbmres.
*/
if (tbmres->ntuples >= 0)
{
/* non-lossy case */
minslot = 0;
maxslot = tbmres->ntuples - 1;
}
else
{
/* lossy case */
minslot = FirstOffsetNumber;
maxslot = maxoff;
}
ntup = 0;
for (curslot = minslot; curslot <= maxslot; curslot++)
{
OffsetNumber targoffset;
ItemId lp;
HeapTupleData loctup;
bool valid;
if (tbmres->ntuples >= 0)
{
/* non-lossy case */
targoffset = tbmres->offsets[curslot];
}
else
{
/* lossy case */
targoffset = (OffsetNumber) curslot;
}
/*
* We'd better check for out-of-range offnum in case of VACUUM since
* the TID was obtained.
*/
if (targoffset < FirstOffsetNumber || targoffset > maxoff)
continue;
lp = PageGetItemId(dp, targoffset);
/*
* Must check for deleted tuple.
*/
if (!ItemIdIsUsed(lp))
continue;
/*
* check time qualification of tuple, remember it if valid
*/
loctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
loctup.t_len = ItemIdGetLength(lp);
ItemPointerSet(&(loctup.t_self), page, targoffset);
valid = HeapTupleSatisfiesVisibility(scan->rs_rd, &loctup, snapshot, buffer);
if (valid)
scan->rs_vistuples[ntup++] = targoffset;
}
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
MIRROREDLOCK_BUFMGR_UNLOCK;
// -------- MirroredLock ----------
Assert(ntup <= MaxHeapTuplesPerPage);
scan->rs_ntuples = ntup;
}