blob: f90f7849fe0fb46b5d2896f771f53490e5f83561 [file] [log] [blame]
/*------------------------------------------------------------------------------
*
* cdbappendonlyblockdirectory.h
*
* Portions Copyright (c) 2009, Greenplum Inc.
* Portions Copyright (c) 2012-Present VMware, Inc. or its affiliates.
*
*
* IDENTIFICATION
* src/include/cdb/cdbappendonlyblockdirectory.h
*
*------------------------------------------------------------------------------
*/
#ifndef CDBAPPENDONLYBLOCKDIRECTORY_H
#define CDBAPPENDONLYBLOCKDIRECTORY_H
#include "access/aosegfiles.h"
#include "access/aocssegfiles.h"
#include "access/appendonlytid.h"
#include "access/skey.h"
#include "catalog/indexing.h"
extern int gp_blockdirectory_entry_min_range;
extern int gp_blockdirectory_minipage_size;
/*
* In-memory equivalent of on-disk data structure MinipageEntry, used to
* represent a block directory entry.
*/
typedef struct AppendOnlyBlockDirectoryEntry
{
/*
* The range of blocks covered by the Block Directory entry, which is the
* continuous range [firstRowNum, lastRowNum]. There are no gaps (or holes)
* within this range. However, there may be gaps between successive block
* directory entries. For e.g. entry0 could have range [1,50] and entry1
* could have: [100,150]. The reason gaps arise between successive entries
* is that we allocate row numbers using the gp_fastsequence mechanism,
* which allocates blocks of row numbers of a pre-determined size (that may
* be larger than the number of blocks being inserted)
*/
struct range
{
int64 fileOffset;
int64 firstRowNum;
int64 afterFileOffset;
int64 lastRowNum;
} range;
} AppendOnlyBlockDirectoryEntry;
/*
* The entry in the minipage.
*/
typedef struct MinipageEntry
{
int64 firstRowNum;
int64 fileOffset;
int64 rowCount;
} MinipageEntry;
/*
* Define a varlena type for a minipage.
*/
typedef struct Minipage
{
/* Total length. Must be the first. */
int32 _len;
int32 version;
uint32 nEntry;
/* Varlena array */
MinipageEntry entry[1];
} Minipage;
/*
* Define the relevant info for a minipage for each
* column group.
*/
typedef struct MinipagePerColumnGroup
{
Minipage *minipage;
uint32 numMinipageEntries;
ItemPointerData tupleTid;
} MinipagePerColumnGroup;
/*
* I don't know the ideal value here. But let us put approximate
* 8 minipages per heap page.
*/
#define NUM_MINIPAGE_ENTRIES (((MaxHeapTupleSize)/8 - sizeof(HeapTupleHeaderData) - 64 * 3)\
/ sizeof(MinipageEntry))
#define IsMinipageFull(minipagePerColumnGroup) \
((minipagePerColumnGroup)->numMinipageEntries == (uint32) gp_blockdirectory_minipage_size)
#define InvalidEntryNum (-1)
/*
* Define a structure for the append-only relation block directory.
*/
typedef struct AppendOnlyBlockDirectory
{
Relation aoRel;
Snapshot appendOnlyMetaDataSnapshot;
Relation blkdirRel;
Relation blkdirIdx;
CatalogIndexState indinfo;
int numColumnGroups;
bool isAOCol;
bool *proj; /* projected columns, used only if isAOCol = TRUE */
MemoryContext memoryContext;
int totalSegfiles;
FileSegInfo **segmentFileInfo;
/*
* Current segment file number.
*/
int currentSegmentFileNum;
FileSegInfo *currentSegmentFileInfo;
/*
* Last minipage that contains an array of MinipageEntries.
*/
MinipagePerColumnGroup *minipages;
/*
* Some temporary space to help form tuples to be inserted into
* the block directory, and to help the index scan.
*/
Datum *values;
bool *nulls;
int numScanKeys;
ScanKey scanKeys;
StrategyNumber *strategyNumbers;
/*
* Minipage entry number, for caching purpose.
*
* XXX: scenarios which call AppendOnlyBlockDirectory_GetEntry()
* may need to consider using this cache.
*/
int cached_mpentry_num;
} AppendOnlyBlockDirectory;
typedef struct AOFetchBlockMetadata
{
/*
* Current cached block directory entry.
* FIXME: At times, we rely upon the values in this struct to be valid even
* when AOFetchBlockMetadata->valid = false. This indicates that this should
* live elsewhere.
*/
AppendOnlyBlockDirectoryEntry blockDirectoryEntry;
bool have;
int64 fileOffset;
int32 overallBlockLen;
int64 firstRowNum;
int64 lastRowNum;
bool gotContents;
} AOFetchBlockMetadata;
typedef struct AOFetchSegmentFile
{
bool isOpen;
int num;
int64 logicalEof;
} AOFetchSegmentFile;
/*
* Tracks block directory scan state for block-directory based ANALYZE.
*/
typedef struct AOBlkDirScanData
{
AppendOnlyBlockDirectory *blkdir;
SysScanDesc sysscan;
int segno;
int colgroupno;
} AOBlkDirScanData, *AOBlkDirScan;
extern void AppendOnlyBlockDirectoryEntry_GetBeginRange(
AppendOnlyBlockDirectoryEntry *directoryEntry,
int64 *fileOffset,
int64 *firstRowNum);
extern void AppendOnlyBlockDirectoryEntry_GetEndRange(
AppendOnlyBlockDirectoryEntry *directoryEntry,
int64 *afterFileOffset,
int64 *lastRowNum);
extern bool AppendOnlyBlockDirectoryEntry_RangeHasRow(
AppendOnlyBlockDirectoryEntry *directoryEntry,
int64 checkRowNum);
extern bool AppendOnlyBlockDirectory_GetEntry(
AppendOnlyBlockDirectory *blockDirectory,
AOTupleId *aoTupleId,
int columnGroupNo,
AppendOnlyBlockDirectoryEntry *directoryEntry);
extern int64 AOBlkDirScan_GetRowNum(
AOBlkDirScan blkdirscan,
int targsegno,
int colgroupno,
int64 targrow,
int64 *startrow);
extern bool AppendOnlyBlockDirectory_CoversTuple(
AppendOnlyBlockDirectory *blockDirectory,
AOTupleId *aoTupleId);
extern void AppendOnlyBlockDirectory_Init_forInsert(
AppendOnlyBlockDirectory *blockDirectory,
Snapshot appendOnlyMetaDataSnapshot,
FileSegInfo *segmentFileInfo,
int64 lastSequence,
Relation aoRel,
int segno,
int numColumnGroups,
bool isAOCol);
extern void AppendOnlyBlockDirectory_Init_forSearch(
AppendOnlyBlockDirectory *blockDirectory,
Snapshot appendOnlyMetaDataSnapshot,
FileSegInfo **segmentFileInfo,
int totalSegfiles,
Relation aoRel,
int numColumnGroups,
bool isAOCol,
bool *proj);
extern void AppendOnlyBlockDirectory_Init_forUniqueChecks(AppendOnlyBlockDirectory *blockDirectory,
Relation aoRel,
int numColumnGroups,
Snapshot snapshot);
extern void AppendOnlyBlockDirectory_Init_addCol(
AppendOnlyBlockDirectory *blockDirectory,
Snapshot appendOnlyMetaDataSnapshot,
FileSegInfo *segmentFileInfo,
Relation aoRel,
int segno,
int numColumnGroups,
bool isAOCol);
extern bool AppendOnlyBlockDirectory_InsertEntry(
AppendOnlyBlockDirectory *blockDirectory,
int columnGroupNo,
int64 firstRowNum,
int64 fileOffset,
int64 rowCount,
bool addColAction);
extern void AppendOnlyBlockDirectory_End_forInsert(
AppendOnlyBlockDirectory *blockDirectory);
extern void AppendOnlyBlockDirectory_End_forSearch(
AppendOnlyBlockDirectory *blockDirectory);
extern void AppendOnlyBlockDirectory_End_addCol(
AppendOnlyBlockDirectory *blockDirectory);
extern void AppendOnlyBlockDirectory_DeleteSegmentFile(
Relation aoRel,
Snapshot snapshot,
int segno,
int columnGroupNo);
extern void AppendOnlyBlockDirectory_End_forUniqueChecks(
AppendOnlyBlockDirectory *blockDirectory);
extern void AppendOnlyBlockDirectory_End_forSearch_InSequence(
AOBlkDirScan seqscan);
extern void AppendOnlyBlockDirectory_InsertPlaceholder(AppendOnlyBlockDirectory *blockDirectory,
int64 firstRowNum,
int64 fileOffset,
int columnGroupNo);
/*
* AppendOnlyBlockDirectory_UniqueCheck
*
* Check to see if there is a block directory entry for the tuple. If no such
* entry exists, the tuple doesn't exist physically in the segfile.
*
* Note: We need to use the passed in per-tuple snapshot to perform the block
* directory lookup. See AppendOnlyBlockDirectory_Init_forUniqueCheck() for
* details on why we can't set up the metadata snapshot at init time.
*/
static inline bool AppendOnlyBlockDirectory_UniqueCheck(
AppendOnlyBlockDirectory *blockDirectory,
AOTupleId *aoTupleId,
Snapshot appendOnlyMetaDataSnapshot
)
{
bool covers;
Assert(appendOnlyMetaDataSnapshot->snapshot_type == SNAPSHOT_DIRTY ||
appendOnlyMetaDataSnapshot->snapshot_type == SNAPSHOT_SELF);
Assert(blockDirectory->appendOnlyMetaDataSnapshot == InvalidSnapshot);
/* Set up the snapshot to use for the block directory scan */
blockDirectory->appendOnlyMetaDataSnapshot = appendOnlyMetaDataSnapshot;
covers = AppendOnlyBlockDirectory_CoversTuple(blockDirectory,
aoTupleId);
/*
* Reset the metadata snapshot to avoid leaking a stack reference. We have
* to do this since SNAPSHOT_DIRTY is stack-allocated.
*/
blockDirectory->appendOnlyMetaDataSnapshot = InvalidSnapshot;
return covers;
}
static inline uint32
minipage_size(uint32 nEntry)
{
return offsetof(Minipage, entry) + sizeof(MinipageEntry) * nEntry;
}
/*
* copy_out_minipage
*
* Copy out the minipage content from a deformed tuple.
*/
static inline void
copy_out_minipage(MinipagePerColumnGroup *minipageInfo,
Datum minipage_value,
bool minipage_isnull)
{
struct varlena *value;
struct varlena *detoast_value;
Assert(!minipage_isnull);
value = (struct varlena *)
DatumGetPointer(minipage_value);
detoast_value = pg_detoast_datum(value);
Assert(VARSIZE(detoast_value) <= minipage_size(NUM_MINIPAGE_ENTRIES));
memcpy(minipageInfo->minipage, detoast_value, VARSIZE(detoast_value));
if (detoast_value != value)
pfree(detoast_value);
Assert(minipageInfo->minipage->nEntry <= NUM_MINIPAGE_ENTRIES);
minipageInfo->numMinipageEntries = minipageInfo->minipage->nEntry;
}
static inline void
AOBlkDirScan_Init(AOBlkDirScan blkdirscan,
AppendOnlyBlockDirectory *blkdir)
{
blkdirscan->blkdir = blkdir;
blkdirscan->sysscan = NULL;
blkdirscan->segno = -1;
blkdirscan->colgroupno = 0;
}
/* should be called before fetch_finish() */
static inline void
AOBlkDirScan_Finish(AOBlkDirScan blkdirscan)
{
/*
* Make sure blkdir hasn't been destroyed by fetch_finish(),
* or systable_endscan_ordered() will be crashed for sysscan
* is holding blkdir relation which is freed.
*/
Assert(blkdirscan->blkdir != NULL);
if (blkdirscan->sysscan != NULL)
{
systable_endscan_ordered(blkdirscan->sysscan);
blkdirscan->sysscan = NULL;
}
blkdirscan->segno = -1;
blkdirscan->colgroupno = 0;
blkdirscan->blkdir = NULL;
}
#endif