blob: 77316cf3606bbbd7ad7fcde13dc6bcc85a8beecf [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*-------------------------------------------------------------------------
*
* nodeBitmapAppendOnlyscan.c
* Routines to support bitmapped scan from Append-Only relations
*
* This is a modified copy of nodeBitmapHeapscan.c converted to Append-Only.
*
*
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
* Portions Copyright (c) 2008-2009, Greenplum Inc.
*
*-------------------------------------------------------------------------
*/
/*
* INTERFACE ROUTINES
* ExecBitmapAppendOnlyScan scan from an AO relation using bitmap info
* ExecBitmapAppendOnlyNext workhorse for above
* ExecInitBitmapAppendOnlyScan creates and initializes state info.
* ExecBitmapAppendOnlyReScan prepares to rescan the plan.
* ExecEndBitmapAppendOnlyScan releases all storage.
*/
#include "postgres.h"
#include "access/heapam.h"
#include "executor/execdebug.h"
#include "executor/nodeBitmapAppendOnlyscan.h"
#include "cdb/cdbappendonlyam.h"
#include "pgstat.h"
#include "utils/memutils.h"
#include "miscadmin.h"
#include "parser/parsetree.h"
#include "cdb/cdbvars.h" /* gp_select_invisible */
#include "nodes/tidbitmap.h"
static TupleTableSlot *BitmapAppendOnlyScanNext(BitmapAppendOnlyScanState *node);
/*
* Initialize the fetch descriptor for the BitmapAppendOnlyScanState if
* it is not initialized.
*/
static void
initFetchDesc(BitmapAppendOnlyScanState *scanstate)
{
BitmapAppendOnlyScan *node = (BitmapAppendOnlyScan *)(scanstate->ss.ps.plan);
Relation currentRelation = scanstate->ss.ss_currentRelation;
EState *estate = scanstate->ss.ps.state;
if (node->isAORow)
{
if (scanstate->baos_currentAOFetchDesc == NULL)
{
scanstate->baos_currentAOFetchDesc =
appendonly_fetch_init(currentRelation,
estate->es_snapshot);
}
}
}
/*
* Free fetch descriptor.
*/
static inline void
freeFetchDesc(BitmapAppendOnlyScanState *scanstate)
{
if (scanstate->baos_currentAOFetchDesc != NULL)
{
Assert(((BitmapAppendOnlyScan *)(scanstate->ss.ps.plan))->isAORow);
appendonly_fetch_finish(scanstate->baos_currentAOFetchDesc);
pfree(scanstate->baos_currentAOFetchDesc);
scanstate->baos_currentAOFetchDesc = NULL;
}
}
/*
* Initialize the state relevant to bitmaps.
*/
static inline void
initBitmapState(BitmapAppendOnlyScanState *scanstate)
{
if (scanstate->baos_tbmres == NULL)
{
scanstate->baos_tbmres =
palloc(sizeof(TBMIterateResult) +
MAX_TUPLES_PER_PAGE * sizeof(OffsetNumber));
/* initialize result header */
MemSetAligned(scanstate->baos_tbmres, 0, sizeof(TBMIterateResult));
}
}
/*
* Free the state relevant to bitmaps
*/
static inline void
freeBitmapState(BitmapAppendOnlyScanState *scanstate)
{
if (scanstate->baos_tbm != NULL)
{
if(IsA(scanstate->baos_tbm, HashBitmap))
tbm_free((HashBitmap *)scanstate->baos_tbm);
else
tbm_bitmap_free(scanstate->baos_tbm);
scanstate->baos_tbm = NULL;
}
if (scanstate->baos_tbmres != NULL)
{
pfree(scanstate->baos_tbmres);
scanstate->baos_tbmres = NULL;
}
}
/* ----------------------------------------------------------------
* BitmapAppendOnlyNext
*
* Retrieve next tuple from the BitmapAppendOnlyScan node's currentRelation
* ----------------------------------------------------------------
*/
static TupleTableSlot *
BitmapAppendOnlyScanNext(BitmapAppendOnlyScanState *node)
{
EState *estate;
ExprContext *econtext;
AppendOnlyFetchDesc aoFetchDesc;
Index scanrelid;
Node *tbm;
TBMIterateResult *tbmres;
OffsetNumber psuedoHeapOffset;
ItemPointerData psudeoHeapTid;
AOTupleId aoTid;
TupleTableSlot *slot;
/*
* extract necessary information from index scan node
*/
estate = node->ss.ps.state;
econtext = node->ss.ps.ps_ExprContext;
slot = node->ss.ss_ScanTupleSlot;
initBitmapState(node);
initFetchDesc(node);
aoFetchDesc = node->baos_currentAOFetchDesc;
scanrelid = ((BitmapAppendOnlyScan *) node->ss.ps.plan)->scan.scanrelid;
tbm = node->baos_tbm;
tbmres = (TBMIterateResult *) node->baos_tbmres;
Assert(tbmres != NULL);
/*
* Check if we are evaluating PlanQual for tuple of this relation.
* Additional checking is not good, but no other way for now. We could
* introduce new nodes for this case and handle IndexScan --> NewNode
* switching in Init/ReScan plan...
*/
if (estate->es_evTuple != NULL &&
estate->es_evTuple[scanrelid - 1] != NULL)
{
if (estate->es_evTupleNull[scanrelid - 1])
{
freeFetchDesc(node);
freeBitmapState(node);
return ExecClearTuple(slot);
}
ExecStoreGenericTuple(estate->es_evTuple[scanrelid - 1],
slot, false);
/* Does the tuple meet the original qual conditions? */
econtext->ecxt_scantuple = slot;
ResetExprContext(econtext);
if (!ExecQual(node->baos_bitmapqualorig, econtext, false))
{
ExecEagerFreeBitmapAppendOnlyScan(node);
ExecClearTuple(slot); /* would not be returned by scan */
}
/* Flag for the next call that no more tuples */
estate->es_evTupleNull[scanrelid - 1] = true;
if (!TupIsNull(slot))
{
Gpmon_M_Incr_Rows_Out(GpmonPktFromBitmapAppendOnlyScanState(node));
CheckSendPlanStateGpmonPkt(&node->ss.ps);
}
return slot;
}
/*
* If we haven't yet performed the underlying index scan, or
* we have used up the bitmaps from the previous scan, do the next scan,
* and prepare the bitmap to be iterated over.
*/
if (tbm == NULL)
{
tbm = (Node *) MultiExecProcNode(outerPlanState(node));
if (tbm != NULL && (!(IsA(tbm, HashBitmap) ||
IsA(tbm, StreamBitmap))))
elog(ERROR, "unrecognized result from subplan");
/* When a HashBitmap is returned, set the returning bitmaps
* in the subplan to NULL, so that the subplan nodes do not
* mistakenly try to release the space during the rescan.
*/
if (tbm != NULL && IsA(tbm, HashBitmap))
tbm_reset_bitmaps(outerPlanState(node));
node->baos_tbm = tbm;
}
if (tbm == NULL)
{
ExecEagerFreeBitmapAppendOnlyScan(node);
return ExecClearTuple(slot);
}
Assert(tbm != NULL);
Assert(tbmres != NULL);
for (;;)
{
CHECK_FOR_INTERRUPTS();
if (!node->baos_gotpage)
{
/*
* Obtain the next psuedo-heap-page-info with item bit-map. Later, we'll
* convert the (psuedo) heap block number and item number to an
* Append-Only TID.
*/
if (!tbm_iterate(tbm, tbmres))
{
/* no more entries in the bitmap */
break;
}
/* If tbmres contains no tuples, continue. */
if (tbmres->ntuples == 0)
continue;
Gpmon_M_Incr(GpmonPktFromBitmapAppendOnlyScanState(node), GPMON_BITMAPAPPENDONLYSCAN_PAGE);
CheckSendPlanStateGpmonPkt(&node->ss.ps);
node->baos_gotpage = true;
/*
* Set cindex to first slot to examine
*/
node->baos_cindex = 0;
node->baos_lossy = (tbmres->ntuples < 0);
if (!node->baos_lossy)
node->baos_ntuples = tbmres->ntuples;
else
node->baos_ntuples = MAX_TUPLES_PER_PAGE;
}
else
{
/*
* Continuing in previously obtained page; advance cindex
*/
node->baos_cindex++;
}
/*
* Out of range? If so, nothing more to look at on this page
*/
if (node->baos_cindex < 0 || node->baos_cindex >= node->baos_ntuples)
{
node->baos_gotpage = false;
continue;
}
/*
* Must account for lossy page info...
*/
if (node->baos_lossy)
psuedoHeapOffset = node->baos_cindex; // We are iterating through all items.
else
{
Assert(node->baos_cindex <= tbmres->ntuples);
psuedoHeapOffset = tbmres->offsets[node->baos_cindex];
}
/*
* Okay to fetch the tuple
*/
ItemPointerSet(
&psudeoHeapTid,
tbmres->blockno,
psuedoHeapOffset);
tbm_convert_appendonly_tid_out(&psudeoHeapTid, &aoTid);
if (aoFetchDesc != NULL)
{
appendonly_fetch(aoFetchDesc, &aoTid, slot);
}
if (TupIsNull(slot))
continue;
pgstat_count_heap_fetch(node->ss.ss_currentRelation);
/*
* If we are using lossy info, we have to recheck the qual
* conditions at every tuple.
*/
if (node->baos_lossy)
{
econtext->ecxt_scantuple = slot;
ResetExprContext(econtext);
if (!ExecQual(node->baos_bitmapqualorig, econtext, false))
{
/* Fails recheck, so drop it and loop back for another */
ExecClearTuple(slot);
continue;
}
}
/* OK to return this tuple */
if (!TupIsNull(slot))
{
Gpmon_M_Incr_Rows_Out(GpmonPktFromBitmapAppendOnlyScanState(node));
CheckSendPlanStateGpmonPkt(&node->ss.ps);
}
return slot;
}
/*
* if we get here it means we are at the end of the scan..
*/
ExecEagerFreeBitmapAppendOnlyScan(node);
return ExecClearTuple(slot);
}
/* ----------------------------------------------------------------
* ExecBitmapAppendOnlyScan(node)
* ----------------------------------------------------------------
*/
TupleTableSlot *
ExecBitmapAppendOnlyScan(BitmapAppendOnlyScanState *node)
{
/*
* use BitmapAppendOnlyNext as access method
*/
return ExecScan(&node->ss, (ExecScanAccessMtd) BitmapAppendOnlyScanNext);
}
/* ----------------------------------------------------------------
* ExecBitmapAppendOnlyReScan(node)
* ----------------------------------------------------------------
*/
void
ExecBitmapAppendOnlyReScan(BitmapAppendOnlyScanState *node, ExprContext *exprCtxt)
{
EState *estate;
Index scanrelid;
estate = node->ss.ps.state;
scanrelid = ((BitmapAppendOnlyScan *) node->ss.ps.plan)->scan.scanrelid;
/* node->aofs.ps.ps_TupFromTlist = false; */
/*
* If we are being passed an outer tuple, link it into the "regular"
* per-tuple econtext for possible qual eval.
*/
if (exprCtxt != NULL)
{
ExprContext *stdecontext;
stdecontext = node->ss.ps.ps_ExprContext;
stdecontext->ecxt_outertuple = exprCtxt->ecxt_outertuple;
}
/* If this is re-scanning of PlanQual ... */
if (estate->es_evTuple != NULL &&
estate->es_evTuple[scanrelid - 1] != NULL)
{
estate->es_evTupleNull[scanrelid - 1] = false;
}
/*
* NOTE: The appendonly_fetch routine can fetch randomly, so no need to reset it.
*/
freeBitmapState(node);
tbm_reset_bitmaps(outerPlanState(node));
/*
* Always rescan the input immediately, to ensure we can pass down any
* outer tuple that might be used in index quals.
*/
Gpmon_M_Incr(GpmonPktFromBitmapAppendOnlyScanState(node), GPMON_BITMAPAPPENDONLYSCAN_RESCAN);
CheckSendPlanStateGpmonPkt(&node->ss.ps);
ExecReScan(outerPlanState(node), exprCtxt);
}
/* ----------------------------------------------------------------
* ExecEndBitmapAppendOnlyScan
* ----------------------------------------------------------------
*/
void
ExecEndBitmapAppendOnlyScan(BitmapAppendOnlyScanState *node)
{
Relation relation;
/*
* extract information from the node
*/
relation = node->ss.ss_currentRelation;
/*
* Free the exprcontext
*/
ExecFreeExprContext(&node->ss.ps);
/*
* clear out tuple table slots
*/
ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
ExecClearTuple(node->ss.ss_ScanTupleSlot);
/*
* close down subplans
*/
ExecEndNode(outerPlanState(node));
ExecEagerFreeBitmapAppendOnlyScan(node);
/*
* close the heap relation.
*/
ExecCloseScanRelation(relation);
node->baos_gotpage = false;
node->baos_lossy = false;
node->baos_cindex = 0;
node->baos_ntuples = 0;
EndPlanStateGpmonPkt(&node->ss.ps);
}
/* ----------------------------------------------------------------
* ExecInitBitmapAppendOnlyScan
*
* Initializes the scan's state information.
* ----------------------------------------------------------------
*/
BitmapAppendOnlyScanState *
ExecInitBitmapAppendOnlyScan(BitmapAppendOnlyScan *node, EState *estate, int eflags)
{
BitmapAppendOnlyScanState *scanstate;
Relation currentRelation;
/* check for unsupported flags */
Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
Assert(IsA(node, BitmapAppendOnlyScan));
/*
* Assert caller didn't ask for an unsafe snapshot --- see comments at
* head of file.
*
* MPP-4703: the MVCC-snapshot restriction is required for correct results.
* our test-mode may deliberately return incorrect results, but that's OK.
*/
Assert(IsMVCCSnapshot(estate->es_snapshot) || gp_select_invisible);
/*
* create state structure
*/
scanstate = makeNode(BitmapAppendOnlyScanState);
scanstate->ss.ps.plan = (Plan *) node;
scanstate->ss.ps.state = estate;
scanstate->baos_tbm = NULL;
scanstate->baos_tbmres = NULL;
scanstate->baos_gotpage = false;
scanstate->baos_lossy = false;
scanstate->baos_cindex = 0;
scanstate->baos_ntuples = 0;
/*
* Miscellaneous initialization
*
* create expression context for node
*/
ExecAssignExprContext(estate, &scanstate->ss.ps);
/* scanstate->aofs.ps.ps_TupFromTlist = false;*/
/*
* initialize child expressions
*/
scanstate->ss.ps.targetlist = (List *)
ExecInitExpr((Expr *) node->scan.plan.targetlist,
(PlanState *) scanstate);
scanstate->ss.ps.qual = (List *)
ExecInitExpr((Expr *) node->scan.plan.qual,
(PlanState *) scanstate);
scanstate->baos_bitmapqualorig = (List *)
ExecInitExpr((Expr *) node->bitmapqualorig,
(PlanState *) scanstate);
#define BITMAPAPPENDONLYSCAN_NSLOTS 2
/*
* tuple table initialization
*/
ExecInitResultTupleSlot(estate, &scanstate->ss.ps);
ExecInitScanTupleSlot(estate, &scanstate->ss);
/*
* open the base relation and acquire appropriate lock on it.
*/
currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid);
scanstate->ss.ss_currentRelation = currentRelation;
/*
* get the scan type from the relation descriptor.
*/
ExecAssignScanType(&scanstate->ss, RelationGetDescr(currentRelation));
/*
* Initialize result tuple type and projection info.
*/
ExecAssignResultTypeFromTL(&scanstate->ss.ps);
ExecAssignScanProjectionInfo(&scanstate->ss);
scanstate->baos_currentAOFetchDesc = NULL;
/*
* initialize child nodes
*
* We do this last because the child nodes will open indexscans on our
* relation's indexes, and we want to be sure we have acquired a lock on
* the relation first.
*/
outerPlanState(scanstate) = ExecInitNode(outerPlan(node), estate, eflags);
initGpmonPktForBitmapAppendOnlyScan((Plan *)node, &scanstate->ss.ps.gpmon_pkt, estate);
/*
* all done.
*/
return scanstate;
}
int
ExecCountSlotsBitmapAppendOnlyScan(BitmapAppendOnlyScan *node)
{
return ExecCountSlotsNode(outerPlan((Plan *) node)) +
ExecCountSlotsNode(innerPlan((Plan *) node)) + BITMAPAPPENDONLYSCAN_NSLOTS;
}
void
initGpmonPktForBitmapAppendOnlyScan(Plan *planNode, gpmon_packet_t *gpmon_pkt, EState *estate)
{
Assert(planNode != NULL && gpmon_pkt != NULL && IsA(planNode, BitmapAppendOnlyScan));
{
RangeTblEntry *rte = rt_fetch(((BitmapAppendOnlyScan *)planNode)->scan.scanrelid,
estate->es_range_table);
char schema_rel_name[SCAN_REL_NAME_BUF_SIZE] = {0};
Assert(GPMON_BITMAPAPPENDONLYSCAN_TOTAL <= (int)GPMON_QEXEC_M_COUNT);
InitPlanNodeGpmonPkt(planNode, gpmon_pkt, estate, PMNT_BitmapAppendOnlyScan,
(int64)planNode->plan_rows,
GetScanRelNameGpmon(rte->relid, schema_rel_name));
}
}
void
ExecEagerFreeBitmapAppendOnlyScan(BitmapAppendOnlyScanState *node)
{
freeFetchDesc(node);
freeBitmapState(node);
}