blob: 12df08f3d671ed0dd7df17af5ca533b59ea0b554 [file] [log] [blame]
/*------------------------------------------------------------------------------
*
* Code dealing with the compaction of append-only tables.
*
* Copyright (c) 2013-Present VMware, Inc. or its affiliates.
*
*
* IDENTIFICATION
* src/backend/access/aocs/aocs_compaction.c
*
*------------------------------------------------------------------------------
*/
#include "postgres.h"
#include <limits.h>
#include "access/amapi.h"
#include "access/genam.h"
#include "access/heapam.h"
#include "access/transam.h"
#include "access/aosegfiles.h"
#include "access/aomd.h"
#include "access/aocs_compaction.h"
#include "access/appendonly_compaction.h"
#include "access/appendonlywriter.h"
#include "catalog/catalog.h"
#include "catalog/indexing.h"
#include "catalog/pg_appendonly.h"
#include "catalog/pg_attribute_encoding.h"
#include "cdb/cdbaocsam.h"
#include "cdb/cdbvars.h"
#include "commands/vacuum.h"
#include "executor/executor.h"
#include "nodes/execnodes.h"
#include "storage/lmgr.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/relcache.h"
#include "utils/snapmgr.h"
#include "utils/guc.h"
#include "miscadmin.h"
#include "commands/progress.h"
#include "pgstat.h"
/*
* Hook for plugins to get control after move or throw away tuple in
* AOCSSegmentFileFullCompaction().
*
* For example, zombodb will delete the corresponding docs after the tuple
* is moved or thrown away.
*/
aocs_compaction_delete_hook_type aocs_compaction_delete_hook = NULL;
/*
* Drops a segment file.
*
* Actually, we just truncate the segfile to 0 bytes, to reclaim the space.
* Before GPDB 6, we used to remove the file, but with WAL replication, we
* no longer have a convenient function to remove a single segment of a
* relation. An empty file is as almost as good as a non-existent file. If
* the relation is dropped later, the code in mdunlink() will remove all
* segments, including any empty ones we've left behind.
*/
void
AOCSCompaction_DropSegmentFile(Relation aorel, int segno, AOVacuumRelStats *vacrelstats)
{
int col;
Assert(RelationStorageIsAoCols(aorel));
for (col = 0; col < RelationGetNumberOfAttributes(aorel); col++)
{
char filenamepath[MAXPGPATH];
int pseudoSegNo;
File fd;
/* Filenum for the col */
FileNumber filenum = GetFilenumForAttribute(RelationGetRelid(aorel), col + 1);
/* Open and truncate the relation segfile */
MakeAOSegmentFileName(aorel, segno, filenum, &pseudoSegNo, filenamepath);
elogif(Debug_appendonly_print_compaction, LOG,
"Drop segment file: "
"segno %d",
pseudoSegNo);
fd = OpenAOSegmentFile(aorel, filenamepath, 0);
if (fd >= 0)
{
TruncateAOSegmentFile(fd, aorel, pseudoSegNo, 0, vacrelstats);
CloseAOSegmentFile(fd, aorel);
}
else
{
/*
* The file we were about to drop/truncate didn't exist. That's normal,
* for example, if a column is added with ALTER TABLE ADD COLUMN.
*/
elog(DEBUG1, "could not truncate segfile %s, because it does not exist", filenamepath);
}
}
}
/*
* AOCSSegmentFileTruncateToEOF()
*
* Truncates the files for all columns of logical segfile 'segno' to
* the EOF values from 'vpinfo'. The caller is responsible for locking so
* that a concurrent backend doesn't write to the segfile while we truncate
* it.
*
* This is used to clean up space left behind by aborted or crashed
* transactions.
*/
void
AOCSSegmentFileTruncateToEOF(Relation aorel, int segno, AOCSVPInfo *vpinfo, AOVacuumRelStats *vacrelstats)
{
const char *relname = RelationGetRelationName(aorel);
int j;
Assert(RelationStorageIsAoCols(aorel));
for (j = 0; j < vpinfo->nEntry; ++j)
{
int64 segeof;
char filenamepath[MAXPGPATH];
AOCSVPInfoEntry *entry;
File fd;
int32 fileSegNo;
/* Filenum for the column */
FileNumber filenum = GetFilenumForAttribute(RelationGetRelid(aorel), j + 1);
entry = &vpinfo->entry[j];
segeof = entry->eof;
/* Open and truncate the relation segfile to its eof */
MakeAOSegmentFileName(aorel, segno, filenum, &fileSegNo, filenamepath);
elogif(Debug_appendonly_print_compaction, LOG,
"Opening AO COL relation \"%s.%s\", relation id %u, relfilenode %u column #%d, logical segment #%d (physical segment file #%d, logical EOF " INT64_FORMAT ")",
get_namespace_name(RelationGetNamespace(aorel)),
relname,
aorel->rd_id,
aorel->rd_locator.relNumber,
j,
segno,
fileSegNo,
segeof);
fd = OpenAOSegmentFile(aorel, filenamepath, segeof);
if (fd >= 0)
{
TruncateAOSegmentFile(fd, aorel, fileSegNo, segeof, vacrelstats);
CloseAOSegmentFile(fd, aorel);
elogif(Debug_appendonly_print_compaction, LOG,
"Successfully truncated AO COL relation \"%s.%s\", relation id %u, relfilenode %u column #%d, logical segment #%d (physical segment file #%d, logical EOF " INT64_FORMAT ")",
get_namespace_name(RelationGetNamespace(aorel)),
relname,
aorel->rd_id,
aorel->rd_locator.relNumber,
j,
segno,
fileSegNo,
segeof);
}
else
{
elogif(Debug_appendonly_print_compaction, LOG,
"No gp_relation_node entry for AO COL relation \"%s.%s\", relation id %u, relfilenode %u column #%d, logical segment #%d (physical segment file #%d, logical EOF " INT64_FORMAT ")",
get_namespace_name(RelationGetNamespace(aorel)),
relname,
aorel->rd_id,
aorel->rd_locator.relNumber,
j,
segno,
fileSegNo,
segeof);
}
}
}
static void
AOCSMoveTuple(TupleTableSlot *slot,
AOCSInsertDesc insertDesc,
ResultRelInfo *resultRelInfo,
EState *estate)
{
AOTupleId *oldAoTupleId;
AOTupleId newAoTupleId;
Assert(resultRelInfo);
Assert(slot);
Assert(estate);
oldAoTupleId = (AOTupleId *) &slot->tts_tid;
/* Extract all the values of the tuple */
slot_getallattrs(slot);
(void) aocs_insert_values(insertDesc,
slot->tts_values,
slot->tts_isnull,
&newAoTupleId);
memcpy(&slot->tts_tid, &newAoTupleId, sizeof(ItemPointerData));
/* insert index' tuples if needed */
if (resultRelInfo->ri_NumIndices > 0)
{
ExecInsertIndexTuples(resultRelInfo,
slot, estate, false, false,
NULL, NIL, false);
ResetPerTupleExprContext(estate);
}
elogif(Debug_appendonly_print_compaction, DEBUG5,
"Compaction: Moved tuple (%d," INT64_FORMAT ") -> (%d," INT64_FORMAT ")",
AOTupleIdGet_segmentFileNum(oldAoTupleId), AOTupleIdGet_rowNum(oldAoTupleId),
AOTupleIdGet_segmentFileNum(&newAoTupleId), AOTupleIdGet_rowNum(&newAoTupleId));
}
/*
* Subroutine of AOCSCompact().
*/
static bool
AOCSSegmentFileFullCompaction(Relation aorel,
AOCSInsertDesc insertDesc,
AOCSFileSegInfo *fsinfo,
Snapshot snapshot,
AOVacuumRelStats *vacrelstats)
{
const char *relname;
AppendOnlyVisimap visiMap;
AOCSScanDesc scanDesc;
TupleDesc tupDesc;
TupleTableSlot *slot;
int compact_segno;
ResultRelInfo *resultRelInfo;
EState *estate;
int64 tupleCount = 0;
int64 tuplePerPage = INT_MAX;
ItemPointerData otid;
AOTupleId *aoTupleId;
int64 curr_num_dead_tuples = 0;
int64 prev_num_dead_tuples = 0;
int64 curr_heap_blks_scanned = 0;
int64 prev_heap_blks_scanned = 0;
Assert(Gp_role == GP_ROLE_EXECUTE || Gp_role == GP_ROLE_UTILITY);
Assert(RelationStorageIsAoCols(aorel));
Assert(insertDesc);
compact_segno = fsinfo->segno;
if (fsinfo->varblockcount > 0)
{
tuplePerPage = fsinfo->total_tupcount / fsinfo->varblockcount;
}
relname = RelationGetRelationName(aorel);
AppendOnlyVisimap_Init(&visiMap,
insertDesc->visimaprelid,
insertDesc->visimapidxid,
ShareLock,
snapshot);
elogif(Debug_appendonly_print_compaction,
LOG, "Compact AO segfile %d, relation %sd",
compact_segno, relname);
scanDesc = aocs_beginrangescan(aorel,
snapshot, snapshot,
&compact_segno, 1);
tupDesc = RelationGetDescr(aorel);
slot = MakeSingleTupleTableSlot(tupDesc, &TTSOpsVirtual);
slot->tts_tableOid = RelationGetRelid(aorel);
/*
* We need a ResultRelInfo and an EState so we can use the regular
* executor's index-entry-making machinery.
*/
estate = CreateExecutorState();
resultRelInfo = makeNode(ResultRelInfo);
resultRelInfo->ri_RangeTableIndex = 1; /* dummy */
resultRelInfo->ri_RelationDesc = aorel;
resultRelInfo->ri_TrigDesc = NULL; /* we don't fire triggers */
ExecOpenIndices(resultRelInfo, false);
estate->es_opened_result_relations =
lappend(estate->es_opened_result_relations, resultRelInfo);
/*
* We don't want uniqueness checks to be performed while "insert"ing tuples
* to a destination segfile during AOCSMoveTuple(). This is to ensure that
* we can avoid spurious conflicts between the moved tuple and the original
* tuple.
*/
estate->gp_bypass_unique_check = true;
while (aocs_getnext(scanDesc, ForwardScanDirection, slot))
{
CHECK_FOR_INTERRUPTS();
/*
* AppendOnlyVisimap_IsVisible() has already been called in aocs_getnext().
*/
Assert(AppendOnlyVisimap_IsVisible(&scanDesc->visibilityMap,
(AOTupleId *) &slot->tts_tid));
aoTupleId = (AOTupleId *) &slot->tts_tid;
otid = slot->tts_tid;
AOCSMoveTuple(slot,
insertDesc,
resultRelInfo,
estate);
if (aocs_compaction_delete_hook)
(*aocs_compaction_delete_hook) (aorel, &otid);
/*
* Check for vacuum delay point after approximatly a var block
*/
tupleCount++;
if (VacuumCostActive && tupleCount % tuplePerPage == 0)
{
vacuum_delay_point();
}
/*
* Report that we are now scanning and compacting segment files.
*/
curr_num_dead_tuples = scanDesc->segrowsprocessed + 1 - tupleCount;
if (curr_num_dead_tuples > prev_num_dead_tuples)
{
pgstat_progress_update_param(PROGRESS_VACUUM_NUM_DEAD_TUPLES,
vacrelstats->num_dead_tuples + curr_num_dead_tuples);
prev_num_dead_tuples = curr_num_dead_tuples;
}
curr_heap_blks_scanned = RelationGuessNumberOfBlocksFromSize(scanDesc->totalBytesRead);
if (curr_heap_blks_scanned > prev_heap_blks_scanned)
{
pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED,
curr_heap_blks_scanned);
prev_heap_blks_scanned = curr_heap_blks_scanned;
}
}
/* Accumulate total number dead tuples */
vacrelstats->num_dead_tuples += scanDesc->segrowsprocessed - tupleCount;
MarkAOCSFileSegInfoAwaitingDrop(aorel, compact_segno);
AppendOnlyVisimap_DeleteSegmentFile(&visiMap,
compact_segno);
/* Delete all mini pages of the segment files if block directory exists */
if (OidIsValid(insertDesc->blkdirrelid))
{
AppendOnlyBlockDirectory_DeleteSegmentFile(aorel,
snapshot,
compact_segno,
0);
}
elogif(Debug_appendonly_print_compaction, LOG,
"Finished compaction: "
"AO segfile %d, relation %s, moved tuple count " INT64_FORMAT,
compact_segno, relname, tupleCount);
AppendOnlyVisimap_Finish(&visiMap, NoLock);
ExecCloseIndices(resultRelInfo);
FreeExecutorState(estate);
ExecDropSingleTupleTableSlot(slot);
aocs_endscan(scanDesc);
return true;
}
/*
* Performs a compaction of an append-only relation in column-orientation.
*
* The compaction segment file should be locked for this transaction in
* the appendonlywriter.c code.
*
* On exit, *insert_segno will be set to the segment that was used as the
* insertion target. The segfiles listed in 'avoid_segnos' will not be used
* for insertion.
*
* The caller is required to hold either an AccessExclusiveLock (vacuum full)
* or a ShareLock on the relation.
*/
void
AOCSCompact(Relation aorel,
int compaction_segno,
int *insert_segno,
bool isFull,
List *avoid_segnos,
AOVacuumRelStats *vacrelstats)
{
const char *relname;
AOCSInsertDesc insertDesc = NULL;
AOCSFileSegInfo *fsinfo;
Snapshot appendOnlyMetaDataSnapshot = RegisterSnapshot(GetCatalogSnapshot(InvalidOid));
Assert(RelationStorageIsAoCols(aorel));
Assert(Gp_role == GP_ROLE_EXECUTE || Gp_role == GP_ROLE_UTILITY);
relname = RelationGetRelationName(aorel);
elogif(Debug_appendonly_print_compaction, LOG,
"Compact AO relation %s", relname);
/* Fetch under the write lock to get latest committed eof. */
fsinfo = GetAOCSFileSegInfo(aorel, appendOnlyMetaDataSnapshot, compaction_segno, true);
if (AppendOnlyCompaction_ShouldCompact(aorel,
compaction_segno, fsinfo->total_tupcount, isFull,
appendOnlyMetaDataSnapshot))
{
if (*insert_segno == -1)
{
/* get the insertion segment on first call. */
*insert_segno = ChooseSegnoForCompactionWrite(aorel, avoid_segnos);
}
if (*insert_segno != -1)
{
insertDesc = aocs_insert_init(aorel, *insert_segno);
AOCSSegmentFileFullCompaction(aorel,
insertDesc,
fsinfo,
appendOnlyMetaDataSnapshot,
vacrelstats);
insertDesc->skipModCountIncrement = true;
aocs_insert_finish(insertDesc, NULL);
}
else
{
/* FIXME: Could not find a target segment. What now? */
}
}
pfree(fsinfo);
UnregisterSnapshot(appendOnlyMetaDataSnapshot);
}