| /*------------------------------------------------------------------------- |
| * |
| * spgvacuum.c |
| * vacuum for SP-GiST |
| * |
| * |
| * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group |
| * Portions Copyright (c) 1994, Regents of the University of California |
| * |
| * IDENTIFICATION |
| * src/backend/access/spgist/spgvacuum.c |
| * |
| *------------------------------------------------------------------------- |
| */ |
| |
| #include "postgres.h" |
| |
| #include "access/genam.h" |
| #include "access/spgist_private.h" |
| #include "access/spgxlog.h" |
| #include "access/transam.h" |
| #include "access/xloginsert.h" |
| #include "catalog/storage_xlog.h" |
| #include "commands/vacuum.h" |
| #include "miscadmin.h" |
| #include "storage/bufmgr.h" |
| #include "storage/indexfsm.h" |
| #include "storage/lmgr.h" |
| #include "utils/snapmgr.h" |
| |
| |
| /* Entry in pending-list of TIDs we need to revisit */ |
| typedef struct spgVacPendingItem |
| { |
| ItemPointerData tid; /* redirection target to visit */ |
| bool done; /* have we dealt with this? */ |
| struct spgVacPendingItem *next; /* list link */ |
| } spgVacPendingItem; |
| |
| /* Local state for vacuum operations */ |
| typedef struct spgBulkDeleteState |
| { |
| /* Parameters passed in to spgvacuumscan */ |
| IndexVacuumInfo *info; |
| IndexBulkDeleteResult *stats; |
| IndexBulkDeleteCallback callback; |
| void *callback_state; |
| |
| /* Additional working state */ |
| SpGistState spgstate; /* for SPGiST operations that need one */ |
| spgVacPendingItem *pendingList; /* TIDs we need to (re)visit */ |
| TransactionId myXmin; /* for detecting newly-added redirects */ |
| BlockNumber lastFilledBlock; /* last non-deletable block */ |
| } spgBulkDeleteState; |
| |
| |
| /* |
| * Add TID to pendingList, but only if not already present. |
| * |
| * Note that new items are always appended at the end of the list; this |
| * ensures that scans of the list don't miss items added during the scan. |
| */ |
| static void |
| spgAddPendingTID(spgBulkDeleteState *bds, ItemPointer tid) |
| { |
| spgVacPendingItem *pitem; |
| spgVacPendingItem **listLink; |
| |
| /* search the list for pre-existing entry */ |
| listLink = &bds->pendingList; |
| while (*listLink != NULL) |
| { |
| pitem = *listLink; |
| if (ItemPointerEquals(tid, &pitem->tid)) |
| return; /* already in list, do nothing */ |
| listLink = &pitem->next; |
| } |
| /* not there, so append new entry */ |
| pitem = (spgVacPendingItem *) palloc(sizeof(spgVacPendingItem)); |
| pitem->tid = *tid; |
| pitem->done = false; |
| pitem->next = NULL; |
| *listLink = pitem; |
| } |
| |
| /* |
| * Clear pendingList |
| */ |
| static void |
| spgClearPendingList(spgBulkDeleteState *bds) |
| { |
| spgVacPendingItem *pitem; |
| spgVacPendingItem *nitem; |
| |
| for (pitem = bds->pendingList; pitem != NULL; pitem = nitem) |
| { |
| nitem = pitem->next; |
| /* All items in list should have been dealt with */ |
| Assert(pitem->done); |
| pfree(pitem); |
| } |
| bds->pendingList = NULL; |
| } |
| |
| /* |
| * Vacuum a regular (non-root) leaf page |
| * |
| * We must delete tuples that are targeted for deletion by the VACUUM, |
| * but not move any tuples that are referenced by outside links; we assume |
| * those are the ones that are heads of chains. |
| * |
| * If we find a REDIRECT that was made by a concurrently-running transaction, |
| * we must add its target TID to pendingList. (We don't try to visit the |
| * target immediately, first because we don't want VACUUM locking more than |
| * one buffer at a time, and second because the duplicate-filtering logic |
| * in spgAddPendingTID is useful to ensure we can't get caught in an infinite |
| * loop in the face of continuous concurrent insertions.) |
| * |
| * If forPending is true, we are examining the page as a consequence of |
| * chasing a redirect link, not as part of the normal sequential scan. |
| * We still vacuum the page normally, but we don't increment the stats |
| * about live tuples; else we'd double-count those tuples, since the page |
| * has been or will be visited in the sequential scan as well. |
| */ |
| static void |
| vacuumLeafPage(spgBulkDeleteState *bds, Relation index, Buffer buffer, |
| bool forPending) |
| { |
| Page page = BufferGetPage(buffer); |
| spgxlogVacuumLeaf xlrec; |
| OffsetNumber toDead[MaxIndexTuplesPerPage]; |
| OffsetNumber toPlaceholder[MaxIndexTuplesPerPage]; |
| OffsetNumber moveSrc[MaxIndexTuplesPerPage]; |
| OffsetNumber moveDest[MaxIndexTuplesPerPage]; |
| OffsetNumber chainSrc[MaxIndexTuplesPerPage]; |
| OffsetNumber chainDest[MaxIndexTuplesPerPage]; |
| OffsetNumber predecessor[MaxIndexTuplesPerPage + 1]; |
| bool deletable[MaxIndexTuplesPerPage + 1]; |
| int nDeletable; |
| OffsetNumber i, |
| max = PageGetMaxOffsetNumber(page); |
| |
| memset(predecessor, 0, sizeof(predecessor)); |
| memset(deletable, 0, sizeof(deletable)); |
| nDeletable = 0; |
| |
| /* Scan page, identify tuples to delete, accumulate stats */ |
| for (i = FirstOffsetNumber; i <= max; i++) |
| { |
| SpGistLeafTuple lt; |
| |
| lt = (SpGistLeafTuple) PageGetItem(page, |
| PageGetItemId(page, i)); |
| if (lt->tupstate == SPGIST_LIVE) |
| { |
| Assert(ItemPointerIsValid(<->heapPtr)); |
| |
| if (bds->callback(<->heapPtr, bds->callback_state)) |
| { |
| bds->stats->tuples_removed += 1; |
| deletable[i] = true; |
| nDeletable++; |
| } |
| else |
| { |
| if (!forPending) |
| bds->stats->num_index_tuples += 1; |
| } |
| |
| /* Form predecessor map, too */ |
| if (SGLT_GET_NEXTOFFSET(lt) != InvalidOffsetNumber) |
| { |
| /* paranoia about corrupted chain links */ |
| if (SGLT_GET_NEXTOFFSET(lt) < FirstOffsetNumber || |
| SGLT_GET_NEXTOFFSET(lt) > max || |
| predecessor[SGLT_GET_NEXTOFFSET(lt)] != InvalidOffsetNumber) |
| elog(ERROR, "inconsistent tuple chain links in page %u of index \"%s\"", |
| BufferGetBlockNumber(buffer), |
| RelationGetRelationName(index)); |
| predecessor[SGLT_GET_NEXTOFFSET(lt)] = i; |
| } |
| } |
| else if (lt->tupstate == SPGIST_REDIRECT) |
| { |
| SpGistDeadTuple dt = (SpGistDeadTuple) lt; |
| |
| Assert(SGLT_GET_NEXTOFFSET(dt) == InvalidOffsetNumber); |
| Assert(ItemPointerIsValid(&dt->pointer)); |
| |
| /* |
| * Add target TID to pending list if the redirection could have |
| * happened since VACUUM started. (If xid is invalid, assume it |
| * must have happened before VACUUM started, since REINDEX |
| * CONCURRENTLY locks out VACUUM.) |
| * |
| * Note: we could make a tighter test by seeing if the xid is |
| * "running" according to the active snapshot; but snapmgr.c |
| * doesn't currently export a suitable API, and it's not entirely |
| * clear that a tighter test is worth the cycles anyway. |
| */ |
| if (TransactionIdFollowsOrEquals(dt->xid, bds->myXmin)) |
| spgAddPendingTID(bds, &dt->pointer); |
| } |
| else |
| { |
| Assert(SGLT_GET_NEXTOFFSET(lt) == InvalidOffsetNumber); |
| } |
| } |
| |
| if (nDeletable == 0) |
| return; /* nothing more to do */ |
| |
| /*---------- |
| * Figure out exactly what we have to do. We do this separately from |
| * actually modifying the page, mainly so that we have a representation |
| * that can be dumped into WAL and then the replay code can do exactly |
| * the same thing. The output of this step consists of six arrays |
| * describing four kinds of operations, to be performed in this order: |
| * |
| * toDead[]: tuple numbers to be replaced with DEAD tuples |
| * toPlaceholder[]: tuple numbers to be replaced with PLACEHOLDER tuples |
| * moveSrc[]: tuple numbers that need to be relocated to another offset |
| * (replacing the tuple there) and then replaced with PLACEHOLDER tuples |
| * moveDest[]: new locations for moveSrc tuples |
| * chainSrc[]: tuple numbers whose chain links (nextOffset) need updates |
| * chainDest[]: new values of nextOffset for chainSrc members |
| * |
| * It's easiest to figure out what we have to do by processing tuple |
| * chains, so we iterate over all the tuples (not just the deletable |
| * ones!) to identify chain heads, then chase down each chain and make |
| * work item entries for deletable tuples within the chain. |
| *---------- |
| */ |
| xlrec.nDead = xlrec.nPlaceholder = xlrec.nMove = xlrec.nChain = 0; |
| |
| for (i = FirstOffsetNumber; i <= max; i++) |
| { |
| SpGistLeafTuple head; |
| bool interveningDeletable; |
| OffsetNumber prevLive; |
| OffsetNumber j; |
| |
| head = (SpGistLeafTuple) PageGetItem(page, |
| PageGetItemId(page, i)); |
| if (head->tupstate != SPGIST_LIVE) |
| continue; /* can't be a chain member */ |
| if (predecessor[i] != 0) |
| continue; /* not a chain head */ |
| |
| /* initialize ... */ |
| interveningDeletable = false; |
| prevLive = deletable[i] ? InvalidOffsetNumber : i; |
| |
| /* scan down the chain ... */ |
| j = SGLT_GET_NEXTOFFSET(head); |
| while (j != InvalidOffsetNumber) |
| { |
| SpGistLeafTuple lt; |
| |
| lt = (SpGistLeafTuple) PageGetItem(page, |
| PageGetItemId(page, j)); |
| if (lt->tupstate != SPGIST_LIVE) |
| { |
| /* all tuples in chain should be live */ |
| elog(ERROR, "unexpected SPGiST tuple state: %d", |
| lt->tupstate); |
| } |
| |
| if (deletable[j]) |
| { |
| /* This tuple should be replaced by a placeholder */ |
| toPlaceholder[xlrec.nPlaceholder] = j; |
| xlrec.nPlaceholder++; |
| /* previous live tuple's chain link will need an update */ |
| interveningDeletable = true; |
| } |
| else if (prevLive == InvalidOffsetNumber) |
| { |
| /* |
| * This is the first live tuple in the chain. It has to move |
| * to the head position. |
| */ |
| moveSrc[xlrec.nMove] = j; |
| moveDest[xlrec.nMove] = i; |
| xlrec.nMove++; |
| /* Chain updates will be applied after the move */ |
| prevLive = i; |
| interveningDeletable = false; |
| } |
| else |
| { |
| /* |
| * Second or later live tuple. Arrange to re-chain it to the |
| * previous live one, if there was a gap. |
| */ |
| if (interveningDeletable) |
| { |
| chainSrc[xlrec.nChain] = prevLive; |
| chainDest[xlrec.nChain] = j; |
| xlrec.nChain++; |
| } |
| prevLive = j; |
| interveningDeletable = false; |
| } |
| |
| j = SGLT_GET_NEXTOFFSET(lt); |
| } |
| |
| if (prevLive == InvalidOffsetNumber) |
| { |
| /* The chain is entirely removable, so we need a DEAD tuple */ |
| toDead[xlrec.nDead] = i; |
| xlrec.nDead++; |
| } |
| else if (interveningDeletable) |
| { |
| /* One or more deletions at end of chain, so close it off */ |
| chainSrc[xlrec.nChain] = prevLive; |
| chainDest[xlrec.nChain] = InvalidOffsetNumber; |
| xlrec.nChain++; |
| } |
| } |
| |
| /* sanity check ... */ |
| if (nDeletable != xlrec.nDead + xlrec.nPlaceholder + xlrec.nMove) |
| elog(ERROR, "inconsistent counts of deletable tuples"); |
| |
| /* Do the updates */ |
| START_CRIT_SECTION(); |
| |
| spgPageIndexMultiDelete(&bds->spgstate, page, |
| toDead, xlrec.nDead, |
| SPGIST_DEAD, SPGIST_DEAD, |
| InvalidBlockNumber, InvalidOffsetNumber); |
| |
| spgPageIndexMultiDelete(&bds->spgstate, page, |
| toPlaceholder, xlrec.nPlaceholder, |
| SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER, |
| InvalidBlockNumber, InvalidOffsetNumber); |
| |
| /* |
| * We implement the move step by swapping the line pointers of the source |
| * and target tuples, then replacing the newly-source tuples with |
| * placeholders. This is perhaps unduly friendly with the page data |
| * representation, but it's fast and doesn't risk page overflow when a |
| * tuple to be relocated is large. |
| */ |
| for (i = 0; i < xlrec.nMove; i++) |
| { |
| ItemId idSrc = PageGetItemId(page, moveSrc[i]); |
| ItemId idDest = PageGetItemId(page, moveDest[i]); |
| ItemIdData tmp; |
| |
| tmp = *idSrc; |
| *idSrc = *idDest; |
| *idDest = tmp; |
| } |
| |
| spgPageIndexMultiDelete(&bds->spgstate, page, |
| moveSrc, xlrec.nMove, |
| SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER, |
| InvalidBlockNumber, InvalidOffsetNumber); |
| |
| for (i = 0; i < xlrec.nChain; i++) |
| { |
| SpGistLeafTuple lt; |
| |
| lt = (SpGistLeafTuple) PageGetItem(page, |
| PageGetItemId(page, chainSrc[i])); |
| Assert(lt->tupstate == SPGIST_LIVE); |
| SGLT_SET_NEXTOFFSET(lt, chainDest[i]); |
| } |
| |
| MarkBufferDirty(buffer); |
| |
| if (RelationNeedsWAL(index)) |
| { |
| XLogRecPtr recptr; |
| |
| XLogBeginInsert(); |
| |
| STORE_STATE(&bds->spgstate, xlrec.stateSrc); |
| |
| XLogRegisterData((char *) &xlrec, SizeOfSpgxlogVacuumLeaf); |
| /* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */ |
| XLogRegisterData((char *) toDead, sizeof(OffsetNumber) * xlrec.nDead); |
| XLogRegisterData((char *) toPlaceholder, sizeof(OffsetNumber) * xlrec.nPlaceholder); |
| XLogRegisterData((char *) moveSrc, sizeof(OffsetNumber) * xlrec.nMove); |
| XLogRegisterData((char *) moveDest, sizeof(OffsetNumber) * xlrec.nMove); |
| XLogRegisterData((char *) chainSrc, sizeof(OffsetNumber) * xlrec.nChain); |
| XLogRegisterData((char *) chainDest, sizeof(OffsetNumber) * xlrec.nChain); |
| |
| XLogRegisterBuffer(0, buffer, REGBUF_STANDARD); |
| |
| recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_LEAF); |
| |
| PageSetLSN(page, recptr); |
| } |
| |
| END_CRIT_SECTION(); |
| } |
| |
| /* |
| * Vacuum a root page when it is also a leaf |
| * |
| * On the root, we just delete any dead leaf tuples; no fancy business |
| */ |
| static void |
| vacuumLeafRoot(spgBulkDeleteState *bds, Relation index, Buffer buffer) |
| { |
| Page page = BufferGetPage(buffer); |
| spgxlogVacuumRoot xlrec; |
| OffsetNumber toDelete[MaxIndexTuplesPerPage]; |
| OffsetNumber i, |
| max = PageGetMaxOffsetNumber(page); |
| |
| xlrec.nDelete = 0; |
| |
| /* Scan page, identify tuples to delete, accumulate stats */ |
| for (i = FirstOffsetNumber; i <= max; i++) |
| { |
| SpGistLeafTuple lt; |
| |
| lt = (SpGistLeafTuple) PageGetItem(page, |
| PageGetItemId(page, i)); |
| if (lt->tupstate == SPGIST_LIVE) |
| { |
| Assert(ItemPointerIsValid(<->heapPtr)); |
| |
| if (bds->callback(<->heapPtr, bds->callback_state)) |
| { |
| bds->stats->tuples_removed += 1; |
| toDelete[xlrec.nDelete] = i; |
| xlrec.nDelete++; |
| } |
| else |
| { |
| bds->stats->num_index_tuples += 1; |
| } |
| } |
| else |
| { |
| /* all tuples on root should be live */ |
| elog(ERROR, "unexpected SPGiST tuple state: %d", |
| lt->tupstate); |
| } |
| } |
| |
| if (xlrec.nDelete == 0) |
| return; /* nothing more to do */ |
| |
| /* Do the update */ |
| START_CRIT_SECTION(); |
| |
| /* The tuple numbers are in order, so we can use PageIndexMultiDelete */ |
| PageIndexMultiDelete(page, toDelete, xlrec.nDelete); |
| |
| MarkBufferDirty(buffer); |
| |
| if (RelationNeedsWAL(index)) |
| { |
| XLogRecPtr recptr; |
| |
| XLogBeginInsert(); |
| |
| /* Prepare WAL record */ |
| STORE_STATE(&bds->spgstate, xlrec.stateSrc); |
| |
| XLogRegisterData((char *) &xlrec, SizeOfSpgxlogVacuumRoot); |
| /* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */ |
| XLogRegisterData((char *) toDelete, |
| sizeof(OffsetNumber) * xlrec.nDelete); |
| |
| XLogRegisterBuffer(0, buffer, REGBUF_STANDARD); |
| |
| recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_ROOT); |
| |
| PageSetLSN(page, recptr); |
| } |
| |
| END_CRIT_SECTION(); |
| } |
| |
| /* |
| * Clean up redirect and placeholder tuples on the given page |
| * |
| * Redirect tuples can be marked placeholder once they're old enough. |
| * Placeholder tuples can be removed if it won't change the offsets of |
| * non-placeholder ones. |
| * |
| * Unlike the routines above, this works on both leaf and inner pages. |
| */ |
| static void |
| vacuumRedirectAndPlaceholder(Relation index, Relation heaprel, Buffer buffer) |
| { |
| Page page = BufferGetPage(buffer); |
| SpGistPageOpaque opaque = SpGistPageGetOpaque(page); |
| OffsetNumber i, |
| max = PageGetMaxOffsetNumber(page), |
| firstPlaceholder = InvalidOffsetNumber; |
| bool hasNonPlaceholder = false; |
| bool hasUpdate = false; |
| OffsetNumber itemToPlaceholder[MaxIndexTuplesPerPage]; |
| OffsetNumber itemnos[MaxIndexTuplesPerPage]; |
| spgxlogVacuumRedirect xlrec; |
| GlobalVisState *vistest; |
| |
| xlrec.isCatalogRel = RelationIsAccessibleInLogicalDecoding(heaprel); |
| xlrec.nToPlaceholder = 0; |
| xlrec.snapshotConflictHorizon = InvalidTransactionId; |
| |
| vistest = GlobalVisTestFor(heaprel); |
| |
| START_CRIT_SECTION(); |
| |
| /* |
| * Scan backwards to convert old redirection tuples to placeholder tuples, |
| * and identify location of last non-placeholder tuple while at it. |
| */ |
| for (i = max; |
| i >= FirstOffsetNumber && |
| (opaque->nRedirection > 0 || !hasNonPlaceholder); |
| i--) |
| { |
| SpGistDeadTuple dt; |
| |
| dt = (SpGistDeadTuple) PageGetItem(page, PageGetItemId(page, i)); |
| |
| /* |
| * We can convert a REDIRECT to a PLACEHOLDER if there could no longer |
| * be any index scans "in flight" to it. Such an index scan would |
| * have to be in a transaction whose snapshot sees the REDIRECT's XID |
| * as still running, so comparing the XID against global xmin is a |
| * conservatively safe test. If the XID is invalid, it must have been |
| * inserted by REINDEX CONCURRENTLY, so we can zap it immediately. |
| */ |
| if (dt->tupstate == SPGIST_REDIRECT && |
| (!TransactionIdIsValid(dt->xid) || |
| GlobalVisTestIsRemovableXid(vistest, dt->xid))) |
| { |
| dt->tupstate = SPGIST_PLACEHOLDER; |
| Assert(opaque->nRedirection > 0); |
| opaque->nRedirection--; |
| opaque->nPlaceholder++; |
| |
| /* remember newest XID among the removed redirects */ |
| if (!TransactionIdIsValid(xlrec.snapshotConflictHorizon) || |
| TransactionIdPrecedes(xlrec.snapshotConflictHorizon, dt->xid)) |
| xlrec.snapshotConflictHorizon = dt->xid; |
| |
| ItemPointerSetInvalid(&dt->pointer); |
| |
| itemToPlaceholder[xlrec.nToPlaceholder] = i; |
| xlrec.nToPlaceholder++; |
| |
| hasUpdate = true; |
| } |
| |
| if (dt->tupstate == SPGIST_PLACEHOLDER) |
| { |
| if (!hasNonPlaceholder) |
| firstPlaceholder = i; |
| } |
| else |
| { |
| hasNonPlaceholder = true; |
| } |
| } |
| |
| /* |
| * Any placeholder tuples at the end of page can safely be removed. We |
| * can't remove ones before the last non-placeholder, though, because we |
| * can't alter the offset numbers of non-placeholder tuples. |
| */ |
| if (firstPlaceholder != InvalidOffsetNumber) |
| { |
| /* |
| * We do not store this array to rdata because it's easy to recreate. |
| */ |
| for (i = firstPlaceholder; i <= max; i++) |
| itemnos[i - firstPlaceholder] = i; |
| |
| i = max - firstPlaceholder + 1; |
| Assert(opaque->nPlaceholder >= i); |
| opaque->nPlaceholder -= i; |
| |
| /* The array is surely sorted, so can use PageIndexMultiDelete */ |
| PageIndexMultiDelete(page, itemnos, i); |
| |
| hasUpdate = true; |
| } |
| |
| xlrec.firstPlaceholder = firstPlaceholder; |
| |
| if (hasUpdate) |
| MarkBufferDirty(buffer); |
| |
| if (hasUpdate && RelationNeedsWAL(index)) |
| { |
| XLogRecPtr recptr; |
| |
| XLogBeginInsert(); |
| |
| XLogRegisterData((char *) &xlrec, SizeOfSpgxlogVacuumRedirect); |
| XLogRegisterData((char *) itemToPlaceholder, |
| sizeof(OffsetNumber) * xlrec.nToPlaceholder); |
| |
| XLogRegisterBuffer(0, buffer, REGBUF_STANDARD); |
| |
| recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_REDIRECT); |
| |
| PageSetLSN(page, recptr); |
| } |
| |
| END_CRIT_SECTION(); |
| } |
| |
| /* |
| * Process one page during a bulkdelete scan |
| */ |
| static void |
| spgvacuumpage(spgBulkDeleteState *bds, BlockNumber blkno) |
| { |
| Relation index = bds->info->index; |
| Buffer buffer; |
| Page page; |
| |
| /* call vacuum_delay_point while not holding any buffer lock */ |
| vacuum_delay_point(); |
| |
| buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno, |
| RBM_NORMAL, bds->info->strategy); |
| LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); |
| page = (Page) BufferGetPage(buffer); |
| |
| if (PageIsNew(page)) |
| { |
| /* |
| * We found an all-zero page, which could happen if the database |
| * crashed just after extending the file. Recycle it. |
| */ |
| } |
| else if (PageIsEmpty(page)) |
| { |
| /* nothing to do */ |
| } |
| else if (SpGistPageIsLeaf(page)) |
| { |
| if (SpGistBlockIsRoot(blkno)) |
| { |
| vacuumLeafRoot(bds, index, buffer); |
| /* no need for vacuumRedirectAndPlaceholder */ |
| } |
| else |
| { |
| vacuumLeafPage(bds, index, buffer, false); |
| vacuumRedirectAndPlaceholder(index, bds->info->heaprel, buffer); |
| } |
| } |
| else |
| { |
| /* inner page */ |
| vacuumRedirectAndPlaceholder(index, bds->info->heaprel, buffer); |
| } |
| |
| /* |
| * The root pages must never be deleted, nor marked as available in FSM, |
| * because we don't want them ever returned by a search for a place to put |
| * a new tuple. Otherwise, check for empty page, and make sure the FSM |
| * knows about it. |
| */ |
| if (!SpGistBlockIsRoot(blkno)) |
| { |
| if (PageIsNew(page) || PageIsEmpty(page)) |
| { |
| RecordFreeIndexPage(index, blkno); |
| bds->stats->pages_deleted++; |
| } |
| else |
| { |
| SpGistSetLastUsedPage(index, buffer); |
| bds->lastFilledBlock = blkno; |
| } |
| } |
| |
| UnlockReleaseBuffer(buffer); |
| } |
| |
| /* |
| * Process the pending-TID list between pages of the main scan |
| */ |
| static void |
| spgprocesspending(spgBulkDeleteState *bds) |
| { |
| Relation index = bds->info->index; |
| spgVacPendingItem *pitem; |
| spgVacPendingItem *nitem; |
| BlockNumber blkno; |
| Buffer buffer; |
| Page page; |
| |
| for (pitem = bds->pendingList; pitem != NULL; pitem = pitem->next) |
| { |
| if (pitem->done) |
| continue; /* ignore already-done items */ |
| |
| /* call vacuum_delay_point while not holding any buffer lock */ |
| vacuum_delay_point(); |
| |
| /* examine the referenced page */ |
| blkno = ItemPointerGetBlockNumber(&pitem->tid); |
| buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno, |
| RBM_NORMAL, bds->info->strategy); |
| LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); |
| page = (Page) BufferGetPage(buffer); |
| |
| if (PageIsNew(page) || SpGistPageIsDeleted(page)) |
| { |
| /* Probably shouldn't happen, but ignore it */ |
| } |
| else if (SpGistPageIsLeaf(page)) |
| { |
| if (SpGistBlockIsRoot(blkno)) |
| { |
| /* this should definitely not happen */ |
| elog(ERROR, "redirection leads to root page of index \"%s\"", |
| RelationGetRelationName(index)); |
| } |
| |
| /* deal with any deletable tuples */ |
| vacuumLeafPage(bds, index, buffer, true); |
| /* might as well do this while we are here */ |
| vacuumRedirectAndPlaceholder(index, bds->info->heaprel, buffer); |
| |
| SpGistSetLastUsedPage(index, buffer); |
| |
| /* |
| * We can mark as done not only this item, but any later ones |
| * pointing at the same page, since we vacuumed the whole page. |
| */ |
| pitem->done = true; |
| for (nitem = pitem->next; nitem != NULL; nitem = nitem->next) |
| { |
| if (ItemPointerGetBlockNumber(&nitem->tid) == blkno) |
| nitem->done = true; |
| } |
| } |
| else |
| { |
| /* |
| * On an inner page, visit the referenced inner tuple and add all |
| * its downlinks to the pending list. We might have pending items |
| * for more than one inner tuple on the same page (in fact this is |
| * pretty likely given the way space allocation works), so get |
| * them all while we are here. |
| */ |
| for (nitem = pitem; nitem != NULL; nitem = nitem->next) |
| { |
| if (nitem->done) |
| continue; |
| if (ItemPointerGetBlockNumber(&nitem->tid) == blkno) |
| { |
| OffsetNumber offset; |
| SpGistInnerTuple innerTuple; |
| |
| offset = ItemPointerGetOffsetNumber(&nitem->tid); |
| innerTuple = (SpGistInnerTuple) PageGetItem(page, |
| PageGetItemId(page, offset)); |
| if (innerTuple->tupstate == SPGIST_LIVE) |
| { |
| SpGistNodeTuple node; |
| int i; |
| |
| SGITITERATE(innerTuple, i, node) |
| { |
| if (ItemPointerIsValid(&node->t_tid)) |
| spgAddPendingTID(bds, &node->t_tid); |
| } |
| } |
| else if (innerTuple->tupstate == SPGIST_REDIRECT) |
| { |
| /* transfer attention to redirect point */ |
| spgAddPendingTID(bds, |
| &((SpGistDeadTuple) innerTuple)->pointer); |
| } |
| else |
| elog(ERROR, "unexpected SPGiST tuple state: %d", |
| innerTuple->tupstate); |
| |
| nitem->done = true; |
| } |
| } |
| } |
| |
| UnlockReleaseBuffer(buffer); |
| } |
| |
| spgClearPendingList(bds); |
| } |
| |
| /* |
| * Perform a bulkdelete scan |
| */ |
| static void |
| spgvacuumscan(spgBulkDeleteState *bds) |
| { |
| Relation index = bds->info->index; |
| bool needLock; |
| BlockNumber num_pages, |
| blkno; |
| |
| /* Finish setting up spgBulkDeleteState */ |
| initSpGistState(&bds->spgstate, index); |
| bds->pendingList = NULL; |
| bds->myXmin = GetActiveSnapshot()->xmin; |
| bds->lastFilledBlock = SPGIST_LAST_FIXED_BLKNO; |
| |
| /* |
| * Reset counts that will be incremented during the scan; needed in case |
| * of multiple scans during a single VACUUM command |
| */ |
| bds->stats->estimated_count = false; |
| bds->stats->num_index_tuples = 0; |
| bds->stats->pages_deleted = 0; |
| |
| /* We can skip locking for new or temp relations */ |
| needLock = !RELATION_IS_LOCAL(index); |
| |
| /* |
| * The outer loop iterates over all index pages except the metapage, in |
| * physical order (we hope the kernel will cooperate in providing |
| * read-ahead for speed). It is critical that we visit all leaf pages, |
| * including ones added after we start the scan, else we might fail to |
| * delete some deletable tuples. See more extensive comments about this |
| * in btvacuumscan(). |
| */ |
| blkno = SPGIST_METAPAGE_BLKNO + 1; |
| for (;;) |
| { |
| /* Get the current relation length */ |
| if (needLock) |
| LockRelationForExtension(index, ExclusiveLock); |
| num_pages = RelationGetNumberOfBlocks(index); |
| if (needLock) |
| UnlockRelationForExtension(index, ExclusiveLock); |
| |
| /* Quit if we've scanned the whole relation */ |
| if (blkno >= num_pages) |
| break; |
| /* Iterate over pages, then loop back to recheck length */ |
| for (; blkno < num_pages; blkno++) |
| { |
| spgvacuumpage(bds, blkno); |
| /* empty the pending-list after each page */ |
| if (bds->pendingList != NULL) |
| spgprocesspending(bds); |
| } |
| } |
| |
| /* Propagate local lastUsedPages cache to metablock */ |
| SpGistUpdateMetaPage(index); |
| |
| /* |
| * If we found any empty pages (and recorded them in the FSM), then |
| * forcibly update the upper-level FSM pages to ensure that searchers can |
| * find them. It's possible that the pages were also found during |
| * previous scans and so this is a waste of time, but it's cheap enough |
| * relative to scanning the index that it shouldn't matter much, and |
| * making sure that free pages are available sooner not later seems |
| * worthwhile. |
| * |
| * Note that if no empty pages exist, we don't bother vacuuming the FSM at |
| * all. |
| */ |
| if (bds->stats->pages_deleted > 0) |
| IndexFreeSpaceMapVacuum(index); |
| |
| /* |
| * Truncate index if possible |
| * |
| * XXX disabled because it's unsafe due to possible concurrent inserts. |
| * We'd have to rescan the pages to make sure they're still empty, and it |
| * doesn't seem worth it. Note that btree doesn't do this either. |
| * |
| * Another reason not to truncate is that it could invalidate the cached |
| * pages-with-freespace pointers in the metapage and other backends' |
| * relation caches, that is leave them pointing to nonexistent pages. |
| * Adding RelationGetNumberOfBlocks calls to protect the places that use |
| * those pointers would be unduly expensive. |
| */ |
| #ifdef NOT_USED |
| if (num_pages > bds->lastFilledBlock + 1) |
| { |
| BlockNumber lastBlock = num_pages - 1; |
| |
| num_pages = bds->lastFilledBlock + 1; |
| RelationTruncate(index, num_pages); |
| bds->stats->pages_removed += lastBlock - bds->lastFilledBlock; |
| bds->stats->pages_deleted -= lastBlock - bds->lastFilledBlock; |
| } |
| #endif |
| |
| /* Report final stats */ |
| bds->stats->num_pages = num_pages; |
| bds->stats->pages_newly_deleted = bds->stats->pages_deleted; |
| bds->stats->pages_free = bds->stats->pages_deleted; |
| } |
| |
| /* |
| * Bulk deletion of all index entries pointing to a set of heap tuples. |
| * The set of target tuples is specified via a callback routine that tells |
| * whether any given heap tuple (identified by ItemPointer) is being deleted. |
| * |
| * Result: a palloc'd struct containing statistical info for VACUUM displays. |
| */ |
| IndexBulkDeleteResult * |
| spgbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, |
| IndexBulkDeleteCallback callback, void *callback_state) |
| { |
| spgBulkDeleteState bds; |
| |
| /* allocate stats if first time through, else re-use existing struct */ |
| if (stats == NULL) |
| stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult)); |
| bds.info = info; |
| bds.stats = stats; |
| bds.callback = callback; |
| bds.callback_state = callback_state; |
| |
| spgvacuumscan(&bds); |
| |
| return stats; |
| } |
| |
| /* Dummy callback to delete no tuples during spgvacuumcleanup */ |
| static bool |
| dummy_callback(ItemPointer itemptr, void *state) |
| { |
| return false; |
| } |
| |
| /* |
| * Post-VACUUM cleanup. |
| * |
| * Result: a palloc'd struct containing statistical info for VACUUM displays. |
| */ |
| IndexBulkDeleteResult * |
| spgvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats) |
| { |
| spgBulkDeleteState bds; |
| |
| /* No-op in ANALYZE ONLY mode */ |
| if (info->analyze_only) |
| return stats; |
| |
| /* |
| * We don't need to scan the index if there was a preceding bulkdelete |
| * pass. Otherwise, make a pass that won't delete any live tuples, but |
| * might still accomplish useful stuff with redirect/placeholder cleanup |
| * and/or FSM housekeeping, and in any case will provide stats. |
| */ |
| if (stats == NULL) |
| { |
| stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult)); |
| bds.info = info; |
| bds.stats = stats; |
| bds.callback = dummy_callback; |
| bds.callback_state = NULL; |
| |
| spgvacuumscan(&bds); |
| } |
| |
| /* |
| * It's quite possible for us to be fooled by concurrent tuple moves into |
| * double-counting some index tuples, so disbelieve any total that exceeds |
| * the underlying heap's count ... if we know that accurately. Otherwise |
| * this might just make matters worse. |
| */ |
| if (!info->estimated_count) |
| { |
| if (stats->num_index_tuples > info->num_heap_tuples) |
| stats->num_index_tuples = info->num_heap_tuples; |
| } |
| |
| return stats; |
| } |