| /*------------------------------------------------------------------------- |
| * |
| * vacuumlazy.c |
| * Concurrent ("lazy") vacuuming. |
| * |
| * |
| * The major space usage for LAZY VACUUM is storage for the array of dead |
| * tuple TIDs, with the next biggest need being storage for per-disk-page |
| * free space info. We want to ensure we can vacuum even the very largest |
| * relations with finite memory space usage. To do that, we set upper bounds |
| * on the number of tuples and pages we will keep track of at once. |
| * |
| * We are willing to use at most maintenance_work_mem memory space to keep |
| * track of dead tuples. We initially allocate an array of TIDs of that size, |
| * with an upper limit that depends on table size (this limit ensures we don't |
| * allocate a huge area uselessly for vacuuming small tables). If the array |
| * threatens to overflow, we suspend the heap scan phase and perform a pass of |
| * index cleanup and page compaction, then resume the heap scan with an empty |
| * TID array. |
| * |
| * We can limit the storage for page free space to MaxFSMPages entries, |
| * since that's the most the free space map will be willing to remember |
| * anyway. If the relation has fewer than that many pages with free space, |
| * life is easy: just build an array of per-page info. If it has more, |
| * we store the free space info as a heap ordered by amount of free space, |
| * so that we can discard the pages with least free space to ensure we never |
| * have more than MaxFSMPages entries in all. The surviving page entries |
| * are passed to the free space map at conclusion of the scan. |
| * |
| * If we're processing a table with no indexes, we can just vacuum each page |
| * as we go; there's no need to save up multiple tuples to minimize the number |
| * of index scans performed. So we don't use maintenance_work_mem memory for |
| * the TID array, just enough to hold as many heap tuples as fit on one page. |
| * |
| * |
| * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group |
| * Portions Copyright (c) 1994, Regents of the University of California |
| * |
| * |
| * IDENTIFICATION |
| * $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.81.2.3 2007/09/16 02:37:54 tgl Exp $ |
| * |
| *------------------------------------------------------------------------- |
| */ |
| #include "postgres.h" |
| |
| #include <sys/file.h> |
| #include <math.h> |
| #include <fcntl.h> |
| |
| #include "access/genam.h" |
| #include "access/heapam.h" |
| #include "access/transam.h" |
| #include "access/aosegfiles.h" |
| #include "access/parquetsegfiles.h" |
| #include "access/aomd.h" |
| #include "commands/vacuum.h" |
| #include "catalog/catalog.h" |
| #include "catalog/indexing.h" |
| #include "cdb/cdbappendonlyam.h" |
| #include "miscadmin.h" |
| #include "pgstat.h" |
| #include "storage/freespace.h" |
| #include "utils/lsyscache.h" |
| #include "utils/memutils.h" |
| #include "utils/pg_rusage.h" |
| #include "utils/fmgroids.h" |
| #include "storage/smgr.h" |
| |
| #include "cdb/cdbvars.h" |
| #include "cdb/cdbpersistentfilesysobj.h" |
| |
| |
| /* |
| * Space/time tradeoff parameters: do these need to be user-tunable? |
| * |
| * To consider truncating the relation, we want there to be at least |
| * REL_TRUNCATE_MINIMUM or (relsize / REL_TRUNCATE_FRACTION) (whichever |
| * is less) potentially-freeable pages. |
| */ |
| #define REL_TRUNCATE_MINIMUM 1000 |
| #define REL_TRUNCATE_FRACTION 16 |
| |
| /* |
| * Guesstimation of number of dead tuples per page. This is used to |
| * provide an upper limit to memory allocated when vacuuming small |
| * tables. |
| */ |
| #define LAZY_ALLOC_TUPLES 200 |
| |
| typedef struct LVRelStats |
| { |
| /* hasindex = true means two-pass strategy; false means one-pass */ |
| bool hasindex; |
| /* Overall statistics about rel */ |
| BlockNumber rel_pages; |
| double rel_tuples; |
| BlockNumber pages_removed; |
| double tuples_deleted; |
| BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */ |
| Size threshold; /* minimum interesting free space */ |
| /* List of TIDs of tuples we intend to delete */ |
| /* NB: this list is ordered by TID address */ |
| int num_dead_tuples; /* current # of entries */ |
| int max_dead_tuples; /* # slots allocated in array */ |
| ItemPointer dead_tuples; /* array of ItemPointerData */ |
| /* Array or heap of per-page info about free space */ |
| /* We use a simple array until it fills up, then convert to heap */ |
| bool fs_is_heap; /* are we using heap organization? */ |
| int num_free_pages; /* current # of entries */ |
| int max_free_pages; /* # slots allocated in array */ |
| PageFreeSpaceInfo *free_pages; /* array or heap of blkno/avail */ |
| BlockNumber tot_free_pages; /* total pages with >= threshold space */ |
| } LVRelStats; |
| |
| |
| static int elevel = -1; |
| |
| static TransactionId OldestXmin; |
| static TransactionId FreezeLimit; |
| |
| |
| /* non-export function prototypes */ |
| static void lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, |
| Relation *Irel, int nindexes, List *updated_stats, List *all_extra_oids); |
| static void lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats); |
| static void lazy_vacuum_index(Relation indrel, |
| IndexBulkDeleteResult **stats, |
| LVRelStats *vacrelstats, |
| List *extra_oids); |
| static void lazy_cleanup_index(Relation indrel, |
| IndexBulkDeleteResult *stats, |
| LVRelStats *vacrelstats, |
| List *updated_stats); |
| static int lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer, |
| int tupindex, LVRelStats *vacrelstats); |
| static void lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats); |
| static BlockNumber count_nondeletable_pages(Relation onerel, |
| LVRelStats *vacrelstats); |
| static void lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks); |
| static void lazy_record_dead_tuple(LVRelStats *vacrelstats, |
| ItemPointer itemptr); |
| static void lazy_record_free_space(LVRelStats *vacrelstats, |
| BlockNumber page, Size avail); |
| static bool lazy_tid_reaped(ItemPointer itemptr, void *state); |
| static void lazy_update_fsm(Relation onerel, LVRelStats *vacrelstats); |
| static int vac_cmp_itemptr(const void *left, const void *right); |
| static int vac_cmp_page_spaces(const void *left, const void *right); |
| |
| |
| /* |
| * lazy_vacuum_rel() -- perform LAZY VACUUM for one heap relation |
| * |
| * This routine vacuums a single heap, cleans out its indexes, and |
| * updates its relpages and reltuples statistics. |
| * |
| * At entry, we have already established a transaction and opened |
| * and locked the relation. |
| */ |
| void |
| lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt, List *updated_stats) |
| { |
| LVRelStats *vacrelstats; |
| Relation *Irel; |
| int nindexes; |
| BlockNumber possibly_freeable; |
| |
| if (vacstmt->verbose) |
| elevel = INFO; |
| else |
| elevel = DEBUG2; |
| |
| if (Gp_role == GP_ROLE_DISPATCH) |
| elevel = DEBUG2; /* vacuum and analyze messages aren't interesting from the QD */ |
| |
| vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats)); |
| |
| /* |
| * Execute the various vaccum operations. Appendonly tables are treated |
| * differently as they are not heap based and have no indexes. |
| */ |
| if (RelationIsAoRows(onerel)) |
| { |
| /* append-only relation. has a special path */ |
| vacuum_appendonly_rel(onerel, vacrelstats, /* FULL */ false); |
| } |
| else if (RelationIsParquet(onerel)) |
| { |
| vacuum_parquet_rel(onerel, vacrelstats, /* FULL */ false); |
| } |
| else |
| { |
| vacuum_set_xid_limits(vacstmt, onerel->rd_rel->relisshared, |
| &OldestXmin, &FreezeLimit); |
| |
| /* Set threshold for interesting free space = average request size */ |
| /* XXX should we scale it up or down? Adjust vacuum.c too, if so */ |
| vacrelstats->threshold = GetAvgFSMRequestSize(&onerel->rd_node); |
| |
| /* Open all indexes of the relation */ |
| vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &Irel); |
| vacrelstats->hasindex = (nindexes > 0); |
| |
| /* Do the vacuuming */ |
| lazy_scan_heap(onerel, vacrelstats, Irel, nindexes, updated_stats, vacstmt->extra_oids); |
| |
| /* Done with indexes */ |
| vac_close_indexes(nindexes, Irel, NoLock); |
| |
| /* |
| * Optionally truncate the relation. |
| * |
| * Don't even think about it unless we have a shot at releasing a goodly |
| * number of pages. Otherwise, the time taken isn't worth it. |
| */ |
| possibly_freeable = vacrelstats->rel_pages - vacrelstats->nonempty_pages; |
| if (possibly_freeable >= REL_TRUNCATE_MINIMUM || |
| possibly_freeable >= vacrelstats->rel_pages / REL_TRUNCATE_FRACTION) |
| lazy_truncate_heap(onerel, vacrelstats); |
| |
| /* Update shared free space map with final free space info */ |
| lazy_update_fsm(onerel, vacrelstats); |
| } |
| |
| /* Update statistics in pg_class */ |
| vac_update_relstats(onerel, |
| vacrelstats->rel_pages, |
| vacrelstats->rel_tuples, |
| vacrelstats->hasindex, |
| FreezeLimit, |
| updated_stats); |
| |
| /* report results to the stats collector, too */ |
| pgstat_report_vacuum(RelationGetRelid(onerel), |
| onerel->rd_rel->relisshared, |
| true /*vacrelstats->scanned_all*/, |
| vacstmt->analyze, vacrelstats->rel_tuples); |
| } |
| |
| |
| /* |
| * lazy_scan_heap() -- scan an open heap relation |
| * |
| * This routine sets commit status bits, builds lists of dead tuples |
| * and pages with free space, and calculates statistics on the number |
| * of live tuples in the heap. When done, or when we run low on space |
| * for dead-tuple TIDs, invoke vacuuming of indexes and heap. |
| * |
| * If there are no indexes then we just vacuum each dirty page as we |
| * process it, since there's no point in gathering many tuples. |
| */ |
| static void |
| lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, |
| Relation *Irel, int nindexes, List *updated_stats, List *all_extra_oids) |
| { |
| MIRROREDLOCK_BUFMGR_DECLARE; |
| |
| BlockNumber nblocks, |
| blkno; |
| HeapTupleData tuple; |
| char *relname; |
| BlockNumber empty_pages, |
| vacuumed_pages; |
| double num_tuples, |
| tups_vacuumed, |
| nkeep, |
| nunused; |
| IndexBulkDeleteResult **indstats; |
| int i; |
| int reindex_count = 1; |
| PGRUsage ru0; |
| |
| /* Fetch gp_persistent_relation_node information that will be added to XLOG record. */ |
| RelationFetchGpRelationNodeForXLog(onerel); |
| |
| pg_rusage_init(&ru0); |
| |
| relname = RelationGetRelationName(onerel); |
| ereport(elevel, |
| (errmsg("vacuuming \"%s.%s\"", |
| get_namespace_name(RelationGetNamespace(onerel)), |
| relname))); |
| |
| empty_pages = vacuumed_pages = 0; |
| num_tuples = tups_vacuumed = nkeep = nunused = 0; |
| |
| indstats = (IndexBulkDeleteResult **) |
| palloc0(nindexes * sizeof(IndexBulkDeleteResult *)); |
| |
| nblocks = RelationGetNumberOfBlocks(onerel); |
| vacrelstats->rel_pages = nblocks; |
| vacrelstats->nonempty_pages = 0; |
| |
| lazy_space_alloc(vacrelstats, nblocks); |
| |
| for (blkno = 0; blkno < nblocks; blkno++) |
| { |
| Buffer buf; |
| Page page; |
| OffsetNumber offnum, |
| maxoff; |
| bool tupgone, |
| hastup; |
| int prev_dead_count; |
| OffsetNumber frozen[MaxOffsetNumber]; |
| int nfrozen; |
| |
| vacuum_delay_point(); |
| |
| /* |
| * If we are close to overrunning the available space for dead-tuple |
| * TIDs, pause and do a cycle of vacuuming before we tackle this page. |
| */ |
| if ((vacrelstats->max_dead_tuples - vacrelstats->num_dead_tuples) < MaxHeapTuplesPerPage && |
| vacrelstats->num_dead_tuples > 0) |
| { |
| /* Remove index entries */ |
| for (i = 0; i < nindexes; i++) |
| { |
| List *extra_oids = get_oids_for_bitmap(all_extra_oids, Irel[i], |
| onerel, reindex_count); |
| |
| lazy_vacuum_index(Irel[i], |
| &indstats[i], |
| vacrelstats, |
| extra_oids); |
| list_free(extra_oids); |
| } |
| reindex_count++; |
| |
| /* Remove tuples from heap */ |
| lazy_vacuum_heap(onerel, vacrelstats); |
| /* Forget the now-vacuumed tuples, and press on */ |
| vacrelstats->num_dead_tuples = 0; |
| } |
| |
| /* -------- MirroredLock ---------- */ |
| MIRROREDLOCK_BUFMGR_LOCK; |
| |
| buf = ReadBuffer(onerel, blkno); |
| |
| /* Initially, we only need shared access to the buffer */ |
| LockBuffer(buf, BUFFER_LOCK_SHARE); |
| |
| page = BufferGetPage(buf); |
| |
| if (PageIsNew(page)) |
| { |
| /* |
| * An all-zeroes page could be left over if a backend extends the |
| * relation but crashes before initializing the page. Reclaim such |
| * pages for use. |
| * |
| * We have to be careful here because we could be looking at a |
| * page that someone has just added to the relation and not yet |
| * been able to initialize (see RelationGetBufferForTuple). To |
| * protect against that, release the buffer lock, grab the |
| * relation extension lock momentarily, and re-lock the buffer. If |
| * the page is still uninitialized by then, it must be left over |
| * from a crashed backend, and we can initialize it. |
| * |
| * We don't really need the relation lock when this is a new or |
| * temp relation, but it's probably not worth the code space to |
| * check that, since this surely isn't a critical path. |
| * |
| * Note: the comparable code in vacuum.c need not worry because |
| * it's got exclusive lock on the whole relation. |
| */ |
| LockBuffer(buf, BUFFER_LOCK_UNLOCK); |
| |
| MIRROREDLOCK_BUFMGR_UNLOCK; |
| /* -------- MirroredLock ---------- */ |
| |
| LockRelationForExtension(onerel, ExclusiveLock); |
| UnlockRelationForExtension(onerel, ExclusiveLock); |
| |
| /* -------- MirroredLock ---------- */ |
| MIRROREDLOCK_BUFMGR_LOCK; |
| |
| LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); /* LockBufferForCleanup(buf)? */ |
| if (PageIsNew(page)) |
| { |
| ereport(WARNING, |
| (errmsg("relation \"%s\" page %u is uninitialized --- fixing", |
| relname, blkno))); |
| PageInit(page, BufferGetPageSize(buf), 0); |
| |
| /* must record in xlog so that changetracking will know about this change */ |
| log_heap_newpage(onerel, page, blkno); |
| |
| empty_pages++; |
| lazy_record_free_space(vacrelstats, blkno, |
| PageGetFreeSpace(page)); |
| } |
| MarkBufferDirty(buf); |
| UnlockReleaseBuffer(buf); |
| |
| MIRROREDLOCK_BUFMGR_UNLOCK; |
| /* -------- MirroredLock ---------- */ |
| |
| continue; |
| } |
| |
| if (PageIsEmpty(page)) |
| { |
| empty_pages++; |
| lazy_record_free_space(vacrelstats, blkno, |
| PageGetFreeSpace(page)); |
| UnlockReleaseBuffer(buf); |
| |
| MIRROREDLOCK_BUFMGR_UNLOCK; |
| /* -------- MirroredLock ---------- */ |
| |
| continue; |
| } |
| |
| nfrozen = 0; |
| hastup = false; |
| prev_dead_count = vacrelstats->num_dead_tuples; |
| maxoff = PageGetMaxOffsetNumber(page); |
| for (offnum = FirstOffsetNumber; |
| offnum <= maxoff; |
| offnum = OffsetNumberNext(offnum)) |
| { |
| ItemId itemid; |
| |
| itemid = PageGetItemId(page, offnum); |
| |
| if (!ItemIdIsUsed(itemid)) |
| { |
| nunused += 1; |
| continue; |
| } |
| |
| tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); |
| tuple.t_len = ItemIdGetLength(itemid); |
| ItemPointerSet(&(tuple.t_self), blkno, offnum); |
| |
| tupgone = false; |
| |
| switch (HeapTupleSatisfiesVacuum(tuple.t_data, OldestXmin, buf, false)) |
| { |
| case HEAPTUPLE_DEAD: |
| tupgone = true; /* we can delete the tuple */ |
| break; |
| case HEAPTUPLE_LIVE: |
| /* Tuple is good --- but let's do some validity checks */ |
| if (onerel->rd_rel->relhasoids && |
| !OidIsValid(HeapTupleGetOid(&tuple))) |
| elog(WARNING, "relation \"%s\" TID %u/%u: OID is invalid", |
| relname, blkno, offnum); |
| break; |
| case HEAPTUPLE_RECENTLY_DEAD: |
| |
| /* |
| * If tuple is recently deleted then we must not remove it |
| * from relation. |
| */ |
| nkeep += 1; |
| break; |
| case HEAPTUPLE_INSERT_IN_PROGRESS: |
| /* This is an expected case during concurrent vacuum */ |
| break; |
| case HEAPTUPLE_DELETE_IN_PROGRESS: |
| /* This is an expected case during concurrent vacuum */ |
| break; |
| default: |
| elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result"); |
| break; |
| } |
| |
| if (tupgone) |
| { |
| lazy_record_dead_tuple(vacrelstats, &(tuple.t_self)); |
| tups_vacuumed += 1; |
| } |
| else |
| { |
| num_tuples += 1; |
| hastup = true; |
| |
| /* |
| * Each non-removable tuple must be checked to see if it |
| * needs freezing. If we already froze anything, then |
| * we've already switched the buffer lock to exclusive. |
| */ |
| if (heap_freeze_tuple(tuple.t_data, FreezeLimit, |
| (nfrozen > 0) ? InvalidBuffer : buf)) |
| frozen[nfrozen++] = offnum; |
| } |
| } /* scan along page */ |
| |
| /* |
| * If we froze any tuples, mark the buffer dirty, and write a WAL |
| * record recording the changes. We must log the changes to be |
| * crash-safe against future truncation of CLOG. |
| */ |
| if (nfrozen > 0) |
| { |
| MarkBufferDirty(buf); |
| /* no XLOG for temp tables, though */ |
| if (!onerel->rd_istemp) |
| { |
| XLogRecPtr recptr; |
| |
| recptr = log_heap_freeze(onerel, buf, FreezeLimit, |
| frozen, nfrozen); |
| PageSetLSN(page, recptr); |
| PageSetTLI(page, ThisTimeLineID); |
| } |
| } |
| |
| /* |
| * If there are no indexes then we can vacuum the page right now |
| * instead of doing a second scan. |
| */ |
| if (nindexes == 0 && |
| vacrelstats->num_dead_tuples > 0) |
| { |
| /* Trade in buffer share lock for super-exclusive lock */ |
| LockBuffer(buf, BUFFER_LOCK_UNLOCK); |
| LockBufferForCleanup(buf); |
| |
| /* Remove tuples from heap */ |
| lazy_vacuum_page(onerel, blkno, buf, 0, vacrelstats); |
| /* Forget the now-vacuumed tuples, and press on */ |
| vacrelstats->num_dead_tuples = 0; |
| vacuumed_pages++; |
| } |
| |
| /* |
| * If we remembered any tuples for deletion, then the page will be |
| * visited again by lazy_vacuum_heap, which will compute and record |
| * its post-compaction free space. If not, then we're done with this |
| * page, so remember its free space as-is. (This path will always be |
| * taken if there are no indexes.) |
| */ |
| if (vacrelstats->num_dead_tuples == prev_dead_count) |
| { |
| lazy_record_free_space(vacrelstats, blkno, |
| PageGetFreeSpace(page)); |
| } |
| |
| /* Remember the location of the last page with nonremovable tuples */ |
| if (hastup) |
| vacrelstats->nonempty_pages = blkno + 1; |
| |
| UnlockReleaseBuffer(buf); |
| |
| MIRROREDLOCK_BUFMGR_UNLOCK; |
| /* -------- MirroredLock ---------- */ |
| |
| } |
| |
| /* save stats for use later */ |
| vacrelstats->rel_tuples = num_tuples; |
| vacrelstats->tuples_deleted = tups_vacuumed; |
| |
| /* If any tuples need to be deleted, perform final vacuum cycle */ |
| /* XXX put a threshold on min number of tuples here? */ |
| if (vacrelstats->num_dead_tuples > 0) |
| { |
| /* Remove index entries */ |
| for (i = 0; i < nindexes; i++) |
| { |
| List *extra_oids = get_oids_for_bitmap(all_extra_oids, Irel[i], |
| onerel, reindex_count); |
| |
| lazy_vacuum_index(Irel[i], |
| &indstats[i], |
| vacrelstats, |
| extra_oids); |
| list_free(extra_oids); |
| } |
| reindex_count++; |
| |
| /* Remove tuples from heap */ |
| lazy_vacuum_heap(onerel, vacrelstats); |
| } |
| |
| /* Do post-vacuum cleanup and statistics update for each index */ |
| for (i = 0; i < nindexes; i++) |
| lazy_cleanup_index(Irel[i], indstats[i], vacrelstats, updated_stats); |
| |
| /* If no indexes, make log report that lazy_vacuum_heap would've made */ |
| if (vacuumed_pages) |
| ereport(elevel, |
| (errmsg("\"%s\": removed %.0f row versions in %u pages", |
| RelationGetRelationName(onerel), |
| tups_vacuumed, vacuumed_pages))); |
| |
| ereport(elevel, |
| (errmsg("\"%s\": found %.0f removable, %.0f nonremovable row versions in %u pages", |
| RelationGetRelationName(onerel), |
| tups_vacuumed, num_tuples, nblocks), |
| errdetail("%.0f dead row versions cannot be removed yet.\n" |
| "There were %.0f unused item pointers.\n" |
| "%u pages contain useful free space.\n" |
| "%u pages are entirely empty.\n" |
| "%s.", |
| nkeep, |
| nunused, |
| vacrelstats->tot_free_pages, |
| empty_pages, |
| pg_rusage_show(&ru0)))); |
| |
| if (vacrelstats->tot_free_pages > MaxFSMPages) |
| ereport(WARNING, |
| (errmsg("relation \"%s.%s\" contains more than \"max_fsm_pages\" pages with useful free space", |
| get_namespace_name(RelationGetNamespace(onerel)), |
| relname), |
| errhint("Consider compacting this relation or increasing the configuration parameter \"max_fsm_pages\"."))); |
| } |
| |
| |
| /* |
| * lazy_vacuum_heap() -- second pass over the heap |
| * |
| * This routine marks dead tuples as unused and compacts out free |
| * space on their pages. Pages not having dead tuples recorded from |
| * lazy_scan_heap are not visited at all. |
| * |
| * Note: the reason for doing this as a second pass is we cannot remove |
| * the tuples until we've removed their index entries, and we want to |
| * process index entry removal in batches as large as possible. |
| */ |
| static void |
| lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats) |
| { |
| MIRROREDLOCK_BUFMGR_DECLARE; |
| |
| int tupindex; |
| int npages; |
| PGRUsage ru0; |
| |
| pg_rusage_init(&ru0); |
| npages = 0; |
| |
| tupindex = 0; |
| |
| /* Fetch gp_persistent_relation_node information that will be added to XLOG record. */ |
| RelationFetchGpRelationNodeForXLog(onerel); |
| |
| while (tupindex < vacrelstats->num_dead_tuples) |
| { |
| BlockNumber tblk; |
| Buffer buf; |
| Page page; |
| |
| vacuum_delay_point(); |
| |
| tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples[tupindex]); |
| |
| /* -------- MirroredLock ---------- */ |
| MIRROREDLOCK_BUFMGR_LOCK; |
| |
| buf = ReadBuffer(onerel, tblk); |
| LockBufferForCleanup(buf); |
| tupindex = lazy_vacuum_page(onerel, tblk, buf, tupindex, vacrelstats); |
| /* Now that we've compacted the page, record its available space */ |
| page = BufferGetPage(buf); |
| lazy_record_free_space(vacrelstats, tblk, |
| PageGetFreeSpace(page)); |
| UnlockReleaseBuffer(buf); |
| |
| MIRROREDLOCK_BUFMGR_UNLOCK; |
| /* -------- MirroredLock ---------- */ |
| |
| npages++; |
| } |
| |
| ereport(elevel, |
| (errmsg("\"%s\": removed %d row versions in %d pages", |
| RelationGetRelationName(onerel), |
| tupindex, npages), |
| errdetail("%s.", |
| pg_rusage_show(&ru0)))); |
| } |
| |
| /* |
| * lazy_vacuum_page() -- free dead tuples on a page |
| * and repair its fragmentation. |
| * |
| * Caller must hold pin and buffer cleanup lock on the buffer. |
| * |
| * tupindex is the index in vacrelstats->dead_tuples of the first dead |
| * tuple for this page. We assume the rest follow sequentially. |
| * The return value is the first tupindex after the tuples of this page. |
| */ |
| static int |
| lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer, |
| int tupindex, LVRelStats *vacrelstats) |
| { |
| OffsetNumber unused[MaxOffsetNumber]; |
| int uncnt; |
| Page page = BufferGetPage(buffer); |
| ItemId itemid; |
| |
| MIRROREDLOCK_BUFMGR_MUST_ALREADY_BE_HELD; |
| |
| START_CRIT_SECTION(); |
| |
| for (; tupindex < vacrelstats->num_dead_tuples; tupindex++) |
| { |
| BlockNumber tblk; |
| OffsetNumber toff; |
| |
| tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples[tupindex]); |
| if (tblk != blkno) |
| break; /* past end of tuples for this block */ |
| toff = ItemPointerGetOffsetNumber(&vacrelstats->dead_tuples[tupindex]); |
| itemid = PageGetItemId(page, toff); |
| itemid->lp_flags &= ~LP_USED; |
| } |
| |
| uncnt = PageRepairFragmentation(page, unused); |
| |
| MarkBufferDirty(buffer); |
| |
| /* XLOG stuff */ |
| if (!onerel->rd_istemp) |
| { |
| XLogRecPtr recptr; |
| |
| recptr = log_heap_clean(onerel, buffer, unused, uncnt); |
| PageSetLSN(page, recptr); |
| PageSetTLI(page, ThisTimeLineID); |
| } |
| else |
| { |
| /* No XLOG record, but still need to flag that XID exists on disk */ |
| MyXactMadeTempRelUpdate = true; |
| } |
| |
| END_CRIT_SECTION(); |
| |
| return tupindex; |
| } |
| |
| /* |
| * lazy_vacuum_index() -- vacuum one index relation. |
| * |
| * Delete all the index entries pointing to tuples listed in |
| * vacrelstats->dead_tuples, and update running statistics. |
| */ |
| static void |
| lazy_vacuum_index(Relation indrel, |
| IndexBulkDeleteResult **stats, |
| LVRelStats *vacrelstats, |
| List *extra_oids) |
| { |
| IndexVacuumInfo ivinfo; |
| PGRUsage ru0; |
| |
| pg_rusage_init(&ru0); |
| |
| ivinfo.index = indrel; |
| ivinfo.vacuum_full = false; |
| ivinfo.message_level = elevel; |
| /* We don't yet know rel_tuples, so pass -1 */ |
| ivinfo.num_heap_tuples = -1; |
| ivinfo.extra_oids = extra_oids; |
| |
| /* Do bulk deletion */ |
| *stats = index_bulk_delete(&ivinfo, *stats, |
| lazy_tid_reaped, (void *) vacrelstats); |
| |
| ereport(elevel, |
| (errmsg("scanned index \"%s\" to remove %d row versions", |
| RelationGetRelationName(indrel), |
| vacrelstats->num_dead_tuples), |
| errdetail("%s.", pg_rusage_show(&ru0)))); |
| } |
| |
| /* |
| * lazy_cleanup_index() -- do post-vacuum cleanup for one index relation. |
| */ |
| static void |
| lazy_cleanup_index(Relation indrel, |
| IndexBulkDeleteResult *stats, |
| LVRelStats *vacrelstats, |
| List *updated_stats) |
| { |
| IndexVacuumInfo ivinfo; |
| PGRUsage ru0; |
| |
| pg_rusage_init(&ru0); |
| |
| ivinfo.index = indrel; |
| ivinfo.vacuum_full = false; |
| ivinfo.message_level = elevel; |
| ivinfo.num_heap_tuples = vacrelstats->rel_tuples; |
| ivinfo.extra_oids = NIL; |
| |
| stats = index_vacuum_cleanup(&ivinfo, stats); |
| |
| if (!stats) |
| return; |
| |
| /* now update statistics in pg_class */ |
| vac_update_relstats(indrel, |
| stats->num_pages, |
| stats->num_index_tuples, |
| false, |
| InvalidTransactionId, |
| updated_stats); |
| |
| ereport(elevel, |
| (errmsg("index \"%s\" now contains %.0f row versions in %u pages", |
| RelationGetRelationName(indrel), |
| stats->num_index_tuples, |
| stats->num_pages), |
| errdetail("%.0f index row versions were removed.\n" |
| "%u index pages have been deleted, %u are currently reusable.\n" |
| "%s.", |
| stats->tuples_removed, |
| stats->pages_deleted, stats->pages_free, |
| pg_rusage_show(&ru0)))); |
| |
| pfree(stats); |
| } |
| |
| /* |
| * lazy_truncate_heap - try to truncate off any empty pages at the end |
| */ |
| static void |
| lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats) |
| { |
| BlockNumber old_rel_pages = vacrelstats->rel_pages; |
| BlockNumber new_rel_pages; |
| PageFreeSpaceInfo *pageSpaces; |
| int n; |
| int i, |
| j; |
| PGRUsage ru0; |
| |
| pg_rusage_init(&ru0); |
| |
| /* |
| * We need full exclusive lock on the relation in order to do truncation. |
| * If we can't get it, give up rather than waiting --- we don't want to |
| * block other backends, and we don't want to deadlock (which is quite |
| * possible considering we already hold a lower-grade lock). |
| */ |
| if (!ConditionalLockRelation(onerel, AccessExclusiveLock)) |
| return; |
| |
| /* |
| * Now that we have exclusive lock, look to see if the rel has grown |
| * whilst we were vacuuming with non-exclusive lock. If so, give up; the |
| * newly added pages presumably contain non-deletable tuples. |
| */ |
| new_rel_pages = RelationGetNumberOfBlocks(onerel); |
| if (new_rel_pages != old_rel_pages) |
| { |
| /* might as well use the latest news when we update pg_class stats */ |
| vacrelstats->rel_pages = new_rel_pages; |
| UnlockRelation(onerel, AccessExclusiveLock); |
| return; |
| } |
| |
| /* |
| * Scan backwards from the end to verify that the end pages actually |
| * contain no tuples. This is *necessary*, not optional, because other |
| * backends could have added tuples to these pages whilst we were |
| * vacuuming. |
| */ |
| new_rel_pages = count_nondeletable_pages(onerel, vacrelstats); |
| |
| if (new_rel_pages >= old_rel_pages) |
| { |
| /* can't do anything after all */ |
| UnlockRelation(onerel, AccessExclusiveLock); |
| return; |
| } |
| |
| /* |
| * Okay to truncate. |
| */ |
| RelationTruncate( |
| onerel, |
| new_rel_pages, |
| /* markPersistentAsPhysicallyTruncated */ true); |
| |
| /* |
| * Drop free-space info for removed blocks; these must not get entered |
| * into the FSM! |
| */ |
| pageSpaces = vacrelstats->free_pages; |
| n = vacrelstats->num_free_pages; |
| j = 0; |
| for (i = 0; i < n; i++) |
| { |
| if (pageSpaces[i].blkno < new_rel_pages) |
| { |
| pageSpaces[j] = pageSpaces[i]; |
| j++; |
| } |
| } |
| vacrelstats->num_free_pages = j; |
| |
| /* |
| * If tot_free_pages was more than num_free_pages, we can't tell for sure |
| * what its correct value is now, because we don't know which of the |
| * forgotten pages are getting truncated. Conservatively set it equal to |
| * num_free_pages. |
| */ |
| vacrelstats->tot_free_pages = j; |
| |
| /* We destroyed the heap ordering, so mark array unordered */ |
| vacrelstats->fs_is_heap = false; |
| |
| /* update statistics */ |
| vacrelstats->rel_pages = new_rel_pages; |
| vacrelstats->pages_removed = old_rel_pages - new_rel_pages; |
| |
| /* |
| * We can't keep the exclusive lock until commit, since this will cause |
| * deadlock, see MPP-5733. |
| */ |
| UnlockRelation(onerel, AccessExclusiveLock); |
| |
| ereport(elevel, |
| (errmsg("\"%s\": truncated %u to %u pages", |
| RelationGetRelationName(onerel), |
| old_rel_pages, new_rel_pages), |
| errdetail("%s.", |
| pg_rusage_show(&ru0)))); |
| } |
| |
| /* |
| * vacuum_appendonly_rel() -- vaccum an append-only relation |
| * |
| * This prodecure will be what gets executed both for VACUUM |
| * and VACUUM FULL (and also ANALYZE or any other thing that |
| * needs the pg_class stats updated). |
| * |
| * In here we don't really vacuum nor scan anything per-se. We |
| * already have the information that we need in pg_aoseg_<oid> |
| * relation on each segment. We are interested in reltuples and |
| * relpages. reltuples is the same as "pg_aoseg_<oid>:tupcount" |
| * column and we simulate relpages by subdividing the eof value |
| * ("pg_aoseg_<oid>:eof") over the defined page size. |
| * |
| * We also truncate any leftover bytes that come after eof |
| * and are not a valid part of the relation (for example invalid rows |
| * that were left over after a failed COPY operation and needs |
| * vacuuming). |
| * |
| * There are txn ids, hint bits, free space, dead tuples, |
| * etc. these are all irrelevant in the append only relation context. |
| * |
| * There may be indexes on the table, but currently nothing is done |
| * to clean up the indexes. |
| */ |
| void |
| vacuum_appendonly_rel(Relation aorel, void *vacrelstats, bool isVacFull) |
| { |
| FileSegTotals *fstotal; |
| BlockNumber nblocks; |
| char *relname; |
| double num_tuples; |
| double totalbytes; |
| double eof; |
| PGRUsage ru0; |
| |
| pg_rusage_init(&ru0); |
| |
| relname = RelationGetRelationName(aorel); |
| ereport(elevel, |
| (errmsg("vacuuming \"%s.%s\"", |
| get_namespace_name(RelationGetNamespace(aorel)), |
| relname))); |
| |
| /* get statistics from the pg_aoseg table */ |
| fstotal = GetSegFilesTotals(aorel, SnapshotNow); |
| |
| /* calculate the values we care about */ |
| eof = (double)fstotal->totalbytes; |
| num_tuples = (double)fstotal->totaltuples; |
| totalbytes = eof; |
| nblocks = (uint32)RelationGuessNumberOfBlocks(totalbytes); |
| |
| if(isVacFull) |
| { |
| /* FULL */ |
| |
| /* fill in remaining VRelStats values */ |
| ((VRelStats *)vacrelstats)->rel_pages = nblocks; |
| ((VRelStats *)vacrelstats)->rel_tuples = num_tuples; |
| ((VRelStats *)vacrelstats)->hasindex = aorel->rd_rel->relhasindex; |
| ((VRelStats *)vacrelstats)->min_tlen = 0; |
| ((VRelStats *)vacrelstats)->max_tlen = 0; |
| ((VRelStats *)vacrelstats)->num_vtlinks = 0; |
| ((VRelStats *)vacrelstats)->vtlinks = NULL; |
| } |
| else |
| { |
| /* LAZY */ |
| |
| /* fill in remaining LVRelStats values */ |
| ((LVRelStats *)vacrelstats)->rel_pages = nblocks; |
| ((LVRelStats *)vacrelstats)->rel_tuples = num_tuples; |
| ((LVRelStats *)vacrelstats)->hasindex = aorel->rd_rel->relhasindex; |
| ((LVRelStats *)vacrelstats)->nonempty_pages = 0; |
| ((LVRelStats *)vacrelstats)->num_dead_tuples = 0; |
| ((LVRelStats *)vacrelstats)->max_dead_tuples = 0; |
| ((LVRelStats *)vacrelstats)->tuples_deleted = 0; |
| ((LVRelStats *)vacrelstats)->tot_free_pages = 0; |
| ((LVRelStats *)vacrelstats)->fs_is_heap = false; |
| ((LVRelStats *)vacrelstats)->num_free_pages = 0; |
| ((LVRelStats *)vacrelstats)->max_free_pages = 0; |
| ((LVRelStats *)vacrelstats)->pages_removed = 0; |
| } |
| |
| /* |
| * in both VACUUM and VACUUM FULL we want to also truncate the relation |
| * after eof. This must only be done when the relation is exclusively locked |
| * (as in VACUUM FULL) or the aoseg tuple for the segfile being truncated |
| * is exclusively locked. |
| * |
| * If we don't guarantee the locking conditions we can truncate concurrent |
| * user's data while he's in the process of writing it! |
| */ |
| |
| /* |
| * for lock reason, in hawq, we currently do not truncate the relation |
| */ |
| pfree(fstotal); |
| |
| return; |
| } |
| |
| /* |
| * vacuum_parquet_rel() -- vaccum a parquet relation |
| * |
| * This prodecure will be what gets executed both for VACUUM |
| * and VACUUM FULL (and also ANALYZE or any other thing that |
| * needs the pg_class stats updated). |
| * |
| * In here we don't really vacuum nor scan anything per-se. We |
| * already have the information that we need in pg_paqseg_<oid> |
| * relation on each segment. We are interested in reltuples and |
| * relpages. reltuples is the same as "pg_paqseg_<oid>:tupcount" |
| * column and we simulate relpages by subdividing the eof value |
| * ("pg_aoseg_<oid>:eof") over the defined page size. |
| * |
| */ |
| void |
| vacuum_parquet_rel(Relation parquetrel, void *vacrelstats, bool isVacFull) |
| { |
| ParquetFileSegTotals *fstotal; |
| BlockNumber nblocks; |
| char *relname; |
| double num_tuples; |
| double totalbytes; |
| double eof; |
| PGRUsage ru0; |
| |
| pg_rusage_init(&ru0); |
| |
| relname = RelationGetRelationName(parquetrel); |
| ereport(elevel, |
| (errmsg("vacuuming \"%s.%s\"", |
| get_namespace_name(RelationGetNamespace(parquetrel)), |
| relname))); |
| |
| /* get statistics from the pg_aoseg table */ |
| fstotal = GetParquetSegFilesTotals(parquetrel, SnapshotNow); |
| |
| /* calculate the values we care about */ |
| eof = (double)fstotal->totalbytes; |
| num_tuples = (double)fstotal->totaltuples; |
| totalbytes = eof; |
| nblocks = (uint32)RelationGuessNumberOfBlocks(totalbytes); |
| |
| if(isVacFull) |
| { |
| /* FULL */ |
| |
| /* fill in remaining VRelStats values */ |
| ((VRelStats *)vacrelstats)->rel_pages = nblocks; |
| ((VRelStats *)vacrelstats)->rel_tuples = num_tuples; |
| ((VRelStats *)vacrelstats)->hasindex = parquetrel->rd_rel->relhasindex; |
| ((VRelStats *)vacrelstats)->min_tlen = 0; |
| ((VRelStats *)vacrelstats)->max_tlen = 0; |
| ((VRelStats *)vacrelstats)->num_vtlinks = 0; |
| ((VRelStats *)vacrelstats)->vtlinks = NULL; |
| } |
| else |
| { |
| /* LAZY */ |
| |
| /* fill in remaining LVRelStats values */ |
| ((LVRelStats *)vacrelstats)->rel_pages = nblocks; |
| ((LVRelStats *)vacrelstats)->rel_tuples = num_tuples; |
| ((LVRelStats *)vacrelstats)->hasindex = parquetrel->rd_rel->relhasindex; |
| ((LVRelStats *)vacrelstats)->nonempty_pages = 0; |
| ((LVRelStats *)vacrelstats)->num_dead_tuples = 0; |
| ((LVRelStats *)vacrelstats)->max_dead_tuples = 0; |
| ((LVRelStats *)vacrelstats)->tuples_deleted = 0; |
| ((LVRelStats *)vacrelstats)->tot_free_pages = 0; |
| ((LVRelStats *)vacrelstats)->fs_is_heap = false; |
| ((LVRelStats *)vacrelstats)->num_free_pages = 0; |
| ((LVRelStats *)vacrelstats)->max_free_pages = 0; |
| ((LVRelStats *)vacrelstats)->pages_removed = 0; |
| } |
| |
| /* |
| * in both VACUUM and VACUUM FULL we want to also truncate the relation |
| * after eof. This must only be done when the relation is exclusively locked |
| * (as in VACUUM FULL) or the aoseg tuple for the segfile being truncated |
| * is exclusively locked. |
| * |
| * If we don't guarantee the locking conditions we can truncate concurrent |
| * user's data while he's in the process of writing it! |
| */ |
| |
| /* |
| * for lock reason, in hawq, we currently do not truncate the relation |
| */ |
| pfree(fstotal); |
| |
| return; |
| } |
| |
| /* |
| * Rescan end pages to verify that they are (still) empty of tuples. |
| * |
| * Returns number of nondeletable pages (last nonempty page + 1). |
| */ |
| static BlockNumber |
| count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats) |
| { |
| MIRROREDLOCK_BUFMGR_DECLARE; |
| |
| BlockNumber blkno; |
| |
| /* Strange coding of loop control is needed because blkno is unsigned */ |
| blkno = vacrelstats->rel_pages; |
| while (blkno > vacrelstats->nonempty_pages) |
| { |
| Buffer buf; |
| Page page; |
| OffsetNumber offnum, |
| maxoff; |
| bool hastup; |
| |
| /* |
| * We don't insert a vacuum delay point here, because we have an |
| * exclusive lock on the table which we want to hold for as short a |
| * time as possible. We still need to check for interrupts however. |
| */ |
| CHECK_FOR_INTERRUPTS(); |
| |
| blkno--; |
| |
| /* -------- MirroredLock ---------- */ |
| MIRROREDLOCK_BUFMGR_LOCK; |
| |
| buf = ReadBuffer(onerel, blkno); |
| |
| /* In this phase we only need shared access to the buffer */ |
| LockBuffer(buf, BUFFER_LOCK_SHARE); |
| |
| page = BufferGetPage(buf); |
| |
| if (PageIsNew(page) || PageIsEmpty(page)) |
| { |
| /* PageIsNew probably shouldn't happen... */ |
| UnlockReleaseBuffer(buf); |
| |
| MIRROREDLOCK_BUFMGR_UNLOCK; |
| /* -------- MirroredLock ---------- */ |
| |
| continue; |
| } |
| |
| hastup = false; |
| maxoff = PageGetMaxOffsetNumber(page); |
| for (offnum = FirstOffsetNumber; |
| offnum <= maxoff; |
| offnum = OffsetNumberNext(offnum)) |
| { |
| ItemId itemid; |
| |
| itemid = PageGetItemId(page, offnum); |
| |
| /* |
| * Note: any non-unused item should be taken as a reason to keep |
| * this page. We formerly thought that DEAD tuples could be |
| * thrown away, but that's not so, because we'd not have cleaned |
| * out their index entries. |
| */ |
| if (ItemIdIsUsed(itemid)) |
| { |
| hastup = true; |
| break; /* can stop scanning */ |
| } |
| } /* scan along page */ |
| |
| UnlockReleaseBuffer(buf); |
| |
| MIRROREDLOCK_BUFMGR_UNLOCK; |
| /* -------- MirroredLock ---------- */ |
| |
| /* Done scanning if we found a tuple here */ |
| if (hastup) |
| return blkno + 1; |
| } |
| |
| /* |
| * If we fall out of the loop, all the previously-thought-to-be-empty |
| * pages still are; we need not bother to look at the last known-nonempty |
| * page. |
| */ |
| return vacrelstats->nonempty_pages; |
| } |
| |
| /* |
| * lazy_space_alloc - space allocation decisions for lazy vacuum |
| * |
| * See the comments at the head of this file for rationale. |
| */ |
| static void |
| lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks) |
| { |
| long maxtuples; |
| int maxpages; |
| |
| if (vacrelstats->hasindex) |
| { |
| maxtuples = (maintenance_work_mem * 1024L) / sizeof(ItemPointerData); |
| maxtuples = Min(maxtuples, INT_MAX); |
| maxtuples = Min(maxtuples, MaxAllocSize / sizeof(ItemPointerData)); |
| |
| /* curious coding here to ensure the multiplication can't overflow */ |
| if ((BlockNumber) (maxtuples / LAZY_ALLOC_TUPLES) > relblocks) |
| maxtuples = relblocks * LAZY_ALLOC_TUPLES; |
| |
| /* stay sane if small maintenance_work_mem */ |
| maxtuples = Max(maxtuples, MaxHeapTuplesPerPage); |
| } |
| else |
| { |
| maxtuples = MaxHeapTuplesPerPage; |
| } |
| |
| vacrelstats->num_dead_tuples = 0; |
| vacrelstats->max_dead_tuples = (int) maxtuples; |
| vacrelstats->dead_tuples = (ItemPointer) |
| palloc(maxtuples * sizeof(ItemPointerData)); |
| |
| maxpages = MaxFSMPages; |
| maxpages = Min(maxpages, MaxAllocSize / sizeof(PageFreeSpaceInfo)); |
| /* No need to allocate more pages than the relation has blocks */ |
| if (relblocks < (BlockNumber) maxpages) |
| maxpages = (int) relblocks; |
| |
| vacrelstats->fs_is_heap = false; |
| vacrelstats->num_free_pages = 0; |
| vacrelstats->max_free_pages = maxpages; |
| vacrelstats->free_pages = (PageFreeSpaceInfo *) |
| palloc(maxpages * sizeof(PageFreeSpaceInfo)); |
| vacrelstats->tot_free_pages = 0; |
| } |
| |
| /* |
| * lazy_record_dead_tuple - remember one deletable tuple |
| */ |
| static void |
| lazy_record_dead_tuple(LVRelStats *vacrelstats, |
| ItemPointer itemptr) |
| { |
| /* |
| * The array shouldn't overflow under normal behavior, but perhaps it |
| * could if we are given a really small maintenance_work_mem. In that |
| * case, just forget the last few tuples (we'll get 'em next time). |
| */ |
| if (vacrelstats->num_dead_tuples < vacrelstats->max_dead_tuples) |
| { |
| vacrelstats->dead_tuples[vacrelstats->num_dead_tuples] = *itemptr; |
| vacrelstats->num_dead_tuples++; |
| } |
| } |
| |
| /* |
| * lazy_record_free_space - remember free space on one page |
| */ |
| static void |
| lazy_record_free_space(LVRelStats *vacrelstats, |
| BlockNumber page, |
| Size avail) |
| { |
| PageFreeSpaceInfo *pageSpaces; |
| int n; |
| |
| /* |
| * A page with less than stats->threshold free space will be forgotten |
| * immediately, and never passed to the free space map. Removing the |
| * uselessly small entries early saves cycles, and in particular reduces |
| * the amount of time we spend holding the FSM lock when we finally call |
| * RecordRelationFreeSpace. Since the FSM will probably drop pages with |
| * little free space anyway, there's no point in making this really small. |
| * |
| * XXX Is it worth trying to measure average tuple size, and using that to |
| * adjust the threshold? Would be worthwhile if FSM has no stats yet for |
| * this relation. But changing the threshold as we scan the rel might |
| * lead to bizarre behavior, too. Also, it's probably better if vacuum.c |
| * has the same thresholding behavior as we do here. |
| */ |
| if (avail < vacrelstats->threshold) |
| return; |
| |
| /* Count all pages over threshold, even if not enough space in array */ |
| vacrelstats->tot_free_pages++; |
| |
| /* Copy pointers to local variables for notational simplicity */ |
| pageSpaces = vacrelstats->free_pages; |
| n = vacrelstats->max_free_pages; |
| |
| /* If we haven't filled the array yet, just keep adding entries */ |
| if (vacrelstats->num_free_pages < n) |
| { |
| pageSpaces[vacrelstats->num_free_pages].blkno = page; |
| pageSpaces[vacrelstats->num_free_pages].avail = avail; |
| vacrelstats->num_free_pages++; |
| return; |
| } |
| |
| /*---------- |
| * The rest of this routine works with "heap" organization of the |
| * free space arrays, wherein we maintain the heap property |
| * avail[(j-1) div 2] <= avail[j] for 0 < j < n. |
| * In particular, the zero'th element always has the smallest available |
| * space and can be discarded to make room for a new page with more space. |
| * See Knuth's discussion of heap-based priority queues, sec 5.2.3; |
| * but note he uses 1-origin array subscripts, not 0-origin. |
| *---------- |
| */ |
| |
| /* If we haven't yet converted the array to heap organization, do it */ |
| if (!vacrelstats->fs_is_heap) |
| { |
| /* |
| * Scan backwards through the array, "sift-up" each value into its |
| * correct position. We can start the scan at n/2-1 since each entry |
| * above that position has no children to worry about. |
| */ |
| int l = n / 2; |
| |
| while (--l >= 0) |
| { |
| BlockNumber R = pageSpaces[l].blkno; |
| Size K = pageSpaces[l].avail; |
| int i; /* i is where the "hole" is */ |
| |
| i = l; |
| for (;;) |
| { |
| int j = 2 * i + 1; |
| |
| if (j >= n) |
| break; |
| if (j + 1 < n && pageSpaces[j].avail > pageSpaces[j + 1].avail) |
| j++; |
| if (K <= pageSpaces[j].avail) |
| break; |
| pageSpaces[i] = pageSpaces[j]; |
| i = j; |
| } |
| pageSpaces[i].blkno = R; |
| pageSpaces[i].avail = K; |
| } |
| |
| vacrelstats->fs_is_heap = true; |
| } |
| |
| /* If new page has more than zero'th entry, insert it into heap */ |
| if (avail > pageSpaces[0].avail) |
| { |
| /* |
| * Notionally, we replace the zero'th entry with the new data, and |
| * then sift-up to maintain the heap property. Physically, the new |
| * data doesn't get stored into the arrays until we find the right |
| * location for it. |
| */ |
| int i = 0; /* i is where the "hole" is */ |
| |
| for (;;) |
| { |
| int j = 2 * i + 1; |
| |
| if (j >= n) |
| break; |
| if (j + 1 < n && pageSpaces[j].avail > pageSpaces[j + 1].avail) |
| j++; |
| if (avail <= pageSpaces[j].avail) |
| break; |
| pageSpaces[i] = pageSpaces[j]; |
| i = j; |
| } |
| pageSpaces[i].blkno = page; |
| pageSpaces[i].avail = avail; |
| } |
| } |
| |
| /* |
| * lazy_tid_reaped() -- is a particular tid deletable? |
| * |
| * This has the right signature to be an IndexBulkDeleteCallback. |
| * |
| * Assumes dead_tuples array is in sorted order. |
| */ |
| static bool |
| lazy_tid_reaped(ItemPointer itemptr, void *state) |
| { |
| LVRelStats *vacrelstats = (LVRelStats *) state; |
| ItemPointer res; |
| |
| res = (ItemPointer) bsearch((void *) itemptr, |
| (void *) vacrelstats->dead_tuples, |
| vacrelstats->num_dead_tuples, |
| sizeof(ItemPointerData), |
| vac_cmp_itemptr); |
| |
| return (res != NULL); |
| } |
| |
| /* |
| * Update the shared Free Space Map with the info we now have about |
| * free space in the relation, discarding any old info the map may have. |
| */ |
| static void |
| lazy_update_fsm(Relation onerel, LVRelStats *vacrelstats) |
| { |
| PageFreeSpaceInfo *pageSpaces = vacrelstats->free_pages; |
| int nPages = vacrelstats->num_free_pages; |
| |
| /* |
| * Sort data into order, as required by RecordRelationFreeSpace. |
| */ |
| if (nPages > 1) |
| qsort(pageSpaces, nPages, sizeof(PageFreeSpaceInfo), |
| vac_cmp_page_spaces); |
| |
| RecordRelationFreeSpace(&onerel->rd_node, vacrelstats->tot_free_pages, |
| nPages, pageSpaces); |
| } |
| |
| /* |
| * Comparator routines for use with qsort() and bsearch(). |
| */ |
| static int |
| vac_cmp_itemptr(const void *left, const void *right) |
| { |
| BlockNumber lblk, |
| rblk; |
| OffsetNumber loff, |
| roff; |
| |
| lblk = ItemPointerGetBlockNumber((ItemPointer) left); |
| rblk = ItemPointerGetBlockNumber((ItemPointer) right); |
| |
| if (lblk < rblk) |
| return -1; |
| if (lblk > rblk) |
| return 1; |
| |
| loff = ItemPointerGetOffsetNumber((ItemPointer) left); |
| roff = ItemPointerGetOffsetNumber((ItemPointer) right); |
| |
| if (loff < roff) |
| return -1; |
| if (loff > roff) |
| return 1; |
| |
| return 0; |
| } |
| |
| static int |
| vac_cmp_page_spaces(const void *left, const void *right) |
| { |
| PageFreeSpaceInfo *linfo = (PageFreeSpaceInfo *) left; |
| PageFreeSpaceInfo *rinfo = (PageFreeSpaceInfo *) right; |
| |
| if (linfo->blkno < rinfo->blkno) |
| return -1; |
| else if (linfo->blkno > rinfo->blkno) |
| return 1; |
| return 0; |
| } |