| /*------------------------------------------------------------------------- |
| * |
| * bufpage.h |
| * Standard POSTGRES buffer page definitions. |
| * |
| * |
| * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group |
| * Portions Copyright (c) 1994, Regents of the University of California |
| * |
| * src/include/storage/bufpage.h |
| * |
| *------------------------------------------------------------------------- |
| */ |
| #ifndef BUFPAGE_H |
| #define BUFPAGE_H |
| |
| #include "access/xlogdefs.h" |
| #include "common/relpath.h" |
| #include "storage/block.h" |
| #include "storage/item.h" |
| #include "storage/off.h" |
| #include "miscadmin.h" |
| |
| /* |
| * A postgres disk page is an abstraction layered on top of a postgres |
| * disk block (which is simply a unit of i/o, see block.h). |
| * |
| * specifically, while a disk block can be unformatted, a postgres |
| * disk page is always a slotted page of the form: |
| * |
| * +----------------+---------------------------------+ |
| * | PageHeaderData | linp1 linp2 linp3 ... | |
| * +-----------+----+---------------------------------+ |
| * | ... linpN | | |
| * +-----------+--------------------------------------+ |
| * | ^ pd_lower | |
| * | | |
| * | v pd_upper | |
| * +-------------+------------------------------------+ |
| * | | tupleN ... | |
| * +-------------+------------------+-----------------+ |
| * | ... tuple3 tuple2 tuple1 | "special space" | |
| * +--------------------------------+-----------------+ |
| * ^ pd_special |
| * |
| * a page is full when nothing can be added between pd_lower and |
| * pd_upper. |
| * |
| * all blocks written out by an access method must be disk pages. |
| * |
| * EXCEPTIONS: |
| * |
| * obviously, a page is not formatted before it is initialized by |
| * a call to PageInit. |
| * |
| * NOTES: |
| * |
| * linp1..N form an ItemId (line pointer) array. ItemPointers point |
| * to a physical block number and a logical offset (line pointer |
| * number) within that block/page. Note that OffsetNumbers |
| * conventionally start at 1, not 0. |
| * |
| * tuple1..N are added "backwards" on the page. Since an ItemPointer |
| * offset is used to access an ItemId entry rather than an actual |
| * byte-offset position, tuples can be physically shuffled on a page |
| * whenever the need arises. This indirection also keeps crash recovery |
| * relatively simple, because the low-level details of page space |
| * management can be controlled by standard buffer page code during |
| * logging, and during recovery. |
| * |
| * AM-generic per-page information is kept in PageHeaderData. |
| * |
| * AM-specific per-page data (if any) is kept in the area marked "special |
| * space"; each AM has an "opaque" structure defined somewhere that is |
| * stored as the page trailer. an access method should always |
| * initialize its pages with PageInit and then set its own opaque |
| * fields. |
| */ |
| |
| typedef Pointer Page; |
| |
| |
| /* |
| * location (byte offset) within a page. |
| * |
| * note that this is actually limited to 2^15 because we have limited |
| * ItemIdData.lp_off and ItemIdData.lp_len to 15 bits (see itemid.h). |
| */ |
| typedef uint16 LocationIndex; |
| |
| |
| /* |
| * For historical reasons, the 64-bit LSN value is stored as two 32-bit |
| * values. |
| */ |
| typedef struct |
| { |
| uint32 xlogid; /* high bits */ |
| uint32 xrecoff; /* low bits */ |
| } PageXLogRecPtr; |
| |
| #define PageXLogRecPtrGet(val) \ |
| ((uint64) (val).xlogid << 32 | (val).xrecoff) |
| #define PageXLogRecPtrSet(ptr, lsn) \ |
| ((ptr).xlogid = (uint32) ((lsn) >> 32), (ptr).xrecoff = (uint32) (lsn)) |
| |
| /* |
| * disk page organization |
| * |
| * space management information generic to any page |
| * |
| * pd_lsn - identifies xlog record for last change to this page. |
| * pd_checksum - page checksum, if set. |
| * pd_flags - flag bits. |
| * pd_lower - offset to start of free space. |
| * pd_upper - offset to end of free space. |
| * pd_special - offset to start of special space. |
| * pd_pagesize_version - size in bytes and page layout version number. |
| * pd_prune_xid - oldest XID among potentially prunable tuples on page. |
| * |
| * The LSN is used by the buffer manager to enforce the basic rule of WAL: |
| * "thou shalt write xlog before data". A dirty buffer cannot be dumped |
| * to disk until xlog has been flushed at least as far as the page's LSN. |
| * |
| * pd_checksum stores the page checksum, if it has been set for this page; |
| * zero is a valid value for a checksum. If a checksum is not in use then |
| * we leave the field unset. This will typically mean the field is zero |
| * though non-zero values may also be present if databases have been |
| * pg_upgraded from releases prior to 9.3, when the same byte offset was |
| * used to store the current timelineid when the page was last updated. |
| * Note that there is no indication on a page as to whether the checksum |
| * is valid or not, a deliberate design choice which avoids the problem |
| * of relying on the page contents to decide whether to verify it. Hence |
| * there are no flag bits relating to checksums. |
| * |
| * pd_prune_xid is a hint field that helps determine whether pruning will be |
| * useful. It is currently unused in index pages. |
| * |
| * The page version number and page size are packed together into a single |
| * uint16 field. This is for historical reasons: before PostgreSQL 7.3, |
| * there was no concept of a page version number, and doing it this way |
| * lets us pretend that pre-7.3 databases have page version number zero. |
| * We constrain page sizes to be multiples of 256, leaving the low eight |
| * bits available for a version number. |
| * |
| * Minimum possible page size is perhaps 64B to fit page header, opaque space |
| * and a minimal tuple; of course, in reality you want it much bigger, so |
| * the constraint on pagesize mod 256 is not an important restriction. |
| * On the high end, we can only support pages up to 32KB because lp_off/lp_len |
| * are 15 bits. |
| */ |
| |
| typedef struct PageHeaderData |
| { |
| /* XXX LSN is member of *any* block, not only page-organized ones */ |
| PageXLogRecPtr pd_lsn; /* LSN: next byte after last byte of xlog |
| * record for last change to this page */ |
| uint16 pd_checksum; /* checksum */ |
| uint16 pd_flags; /* flag bits, see below */ |
| LocationIndex pd_lower; /* offset to start of free space */ |
| LocationIndex pd_upper; /* offset to end of free space */ |
| LocationIndex pd_special; /* offset to start of special space */ |
| uint16 pd_pagesize_version; |
| TransactionId pd_prune_xid; /* oldest prunable XID, or zero if none */ |
| ItemIdData pd_linp[FLEXIBLE_ARRAY_MEMBER]; /* line pointer array */ |
| } PageHeaderData; |
| |
| typedef PageHeaderData *PageHeader; |
| #define PageEncryptOffset offsetof(PageHeaderData, pd_special) |
| #define SizeOfPageEncryption (BLCKSZ - PageEncryptOffset) |
| |
| /* |
| * pd_flags contains the following flag bits. Undefined bits are initialized |
| * to zero and may be used in the future. |
| * |
| * PD_HAS_FREE_LINES is set if there are any LP_UNUSED line pointers before |
| * pd_lower. This should be considered a hint rather than the truth, since |
| * changes to it are not WAL-logged. |
| * |
| * PD_PAGE_FULL is set if an UPDATE doesn't find enough free space in the |
| * page for its new tuple version; this suggests that a prune is needed. |
| * Again, this is just a hint. |
| */ |
| #define PD_HAS_FREE_LINES 0x0001 /* are there any unused line pointers? */ |
| #define PD_PAGE_FULL 0x0002 /* not enough free space for new tuple? */ |
| #define PD_ALL_VISIBLE 0x0004 /* all tuples on page are visible to |
| * everyone */ |
| |
| #define PD_VALID_FLAG_BITS 0x0007 /* OR of all valid pd_flags bits */ |
| |
| /* |
| * Page layout version number 0 is for pre-7.3 Postgres releases. |
| * Releases 7.3 and 7.4 use 1, denoting a new HeapTupleHeader layout. |
| * Release 8.0 uses 2; it changed the HeapTupleHeader layout again. |
| * Release 8.1 uses 3; it redefined HeapTupleHeader infomask bits. |
| * Release 8.3 uses 4; it changed the HeapTupleHeader layout again, and |
| * added the pd_flags field (by stealing some bits from pd_tli), |
| * as well as adding the pd_prune_xid field (which enlarges the header). |
| * |
| * As of Release 9.3, the checksum version must also be considered when |
| * handling pages. |
| * |
| * GPDB 4 uses 4. However, it didn't have the pd_prune_xid field |
| * GPDB 5.0 uses 14. The layout is the same as PostgreSQL 8.3's, but |
| * we couldn't use the same version number, because we had already |
| * used 4 for the previous format. |
| */ |
| #define PG_PAGE_LAYOUT_VERSION 14 |
| |
| #define PG_DATA_CHECKSUM_VERSION 1 |
| |
| /* ---------------------------------------------------------------- |
| * page support macros |
| * ---------------------------------------------------------------- |
| */ |
| |
| /* |
| * PageIsValid |
| * True iff page is valid. |
| */ |
| #define PageIsValid(page) PointerIsValid(page) |
| |
| /* |
| * line pointer(s) do not count as part of header |
| */ |
| #define SizeOfPageHeaderData (offsetof(PageHeaderData, pd_linp)) |
| |
| /* |
| * PageIsEmpty |
| * returns true iff no itemid has been allocated on the page |
| */ |
| #define PageIsEmpty(page) \ |
| (((PageHeader) (page))->pd_lower <= SizeOfPageHeaderData) |
| |
| /* |
| * PageIsNew |
| * returns true iff page has not been initialized (by PageInit) |
| */ |
| #define PageIsNew(page) (((PageHeader) (page))->pd_upper == 0) |
| |
| /* |
| * PageGetItemId |
| * Returns an item identifier of a page. |
| */ |
| #define PageGetItemId(page, offsetNumber) \ |
| ((ItemId) (&((PageHeader) (page))->pd_linp[(offsetNumber) - 1])) |
| |
| /* |
| * PageGetContents |
| * To be used in cases where the page does not contain line pointers. |
| * |
| * Note: prior to 8.3 this was not guaranteed to yield a MAXALIGN'd result. |
| * Now it is. Beware of old code that might think the offset to the contents |
| * is just SizeOfPageHeaderData rather than MAXALIGN(SizeOfPageHeaderData). |
| */ |
| #define PageGetContents(page) \ |
| ((char *) (page) + MAXALIGN(SizeOfPageHeaderData)) |
| |
| /* |
| * PageGetContentsMaxAligned |
| * Aligns PageGetContents for storing 8-byte data |
| */ |
| #define PageGetContentsMaxAligned(page) \ |
| ((char *) MAXALIGN(&((PageHeader) (page))->pd_linp[0])) |
| |
| /* ---------------- |
| * macros to access page size info |
| * ---------------- |
| */ |
| |
| /* |
| * PageSizeIsValid |
| * True iff the page size is valid. |
| */ |
| #define PageSizeIsValid(pageSize) ((pageSize) == BLCKSZ) |
| |
| /* |
| * PageGetPageSize |
| * Returns the page size of a page. |
| * |
| * this can only be called on a formatted page (unlike |
| * BufferGetPageSize, which can be called on an unformatted page). |
| * however, it can be called on a page that is not stored in a buffer. |
| */ |
| #define PageGetPageSize(page) \ |
| ((Size) (((PageHeader) (page))->pd_pagesize_version & (uint16) 0xFF00)) |
| |
| /* |
| * PageGetPageLayoutVersion |
| * Returns the page layout version of a page. |
| */ |
| #define PageGetPageLayoutVersion(page) \ |
| (((PageHeader) (page))->pd_pagesize_version & 0x00FF) |
| |
| /* |
| * PageSetPageSizeAndVersion |
| * Sets the page size and page layout version number of a page. |
| * |
| * We could support setting these two values separately, but there's |
| * no real need for it at the moment. |
| */ |
| #define PageSetPageSizeAndVersion(page, size, version) \ |
| ( \ |
| AssertMacro(((size) & 0xFF00) == (size)), \ |
| AssertMacro(((version) & 0x00FF) == (version)), \ |
| ((PageHeader) (page))->pd_pagesize_version = (size) | (version) \ |
| ) |
| |
| /* ---------------- |
| * page special data macros |
| * ---------------- |
| */ |
| /* |
| * PageGetSpecialSize |
| * Returns size of special space on a page. |
| */ |
| #define PageGetSpecialSize(page) \ |
| ((uint16) (PageGetPageSize(page) - ((PageHeader)(page))->pd_special)) |
| |
| /* |
| * Using assertions, validate that the page special pointer is OK. |
| * |
| * This is intended to catch use of the pointer before page initialization. |
| * It is implemented as a function due to the limitations of the MSVC |
| * compiler, which choked on doing all these tests within another macro. We |
| * return true so that AssertMacro() can be used while still getting the |
| * specifics from the macro failure within this function. |
| */ |
| static inline bool |
| PageValidateSpecialPointer(Page page) |
| { |
| Assert(PageIsValid(page)); |
| Assert(((PageHeader) (page))->pd_special <= BLCKSZ); |
| Assert(((PageHeader) (page))->pd_special >= SizeOfPageHeaderData); |
| |
| return true; |
| } |
| |
| /* |
| * PageGetSpecialPointer |
| * Returns pointer to special space on a page. |
| */ |
| #define PageGetSpecialPointer(page) \ |
| ( \ |
| AssertMacro(PageValidateSpecialPointer(page)), \ |
| (char *) ((char *) (page) + ((PageHeader) (page))->pd_special) \ |
| ) |
| |
| /* |
| * PageGetItem |
| * Retrieves an item on the given page. |
| * |
| * Note: |
| * This does not change the status of any of the resources passed. |
| * The semantics may change in the future. |
| */ |
| #define PageGetItem(page, itemId) \ |
| ( \ |
| AssertMacro(PageIsValid(page)), \ |
| AssertMacro(ItemIdHasStorage(itemId)), \ |
| (Item)(((char *)(page)) + ItemIdGetOffset(itemId)) \ |
| ) |
| |
| /* |
| * PageGetMaxOffsetNumber |
| * Returns the maximum offset number used by the given page. |
| * Since offset numbers are 1-based, this is also the number |
| * of items on the page. |
| * |
| * NOTE: if the page is not initialized (pd_lower == 0), we must |
| * return zero to ensure sane behavior. Accept double evaluation |
| * of the argument so that we can ensure this. |
| */ |
| #define PageGetMaxOffsetNumber(page) \ |
| (((PageHeader) (page))->pd_lower <= SizeOfPageHeaderData ? 0 : \ |
| ((((PageHeader) (page))->pd_lower - SizeOfPageHeaderData) \ |
| / sizeof(ItemIdData))) |
| |
| /* |
| * Retrieving LSN of a shared buffer is safe only if: (1) exclusive lock on the |
| * buffer's contents is held OR (2) shared lock on the buffer's contents and |
| * the buffer header spinlock is held. The Assert() validates that a shared |
| * buffer's contents are locked. That is not sufficient but there is no easy |
| * interface to determine if a spinlock is held or whether a LW lock is held in |
| * shared/exclusive mode. The assert applies only to shared buffers because |
| * local buffers do not need to worry about concurrency. |
| * |
| */ |
| extern bool BufferLockHeldByMe(Page page); |
| static inline XLogRecPtr |
| PageGetLSN(Page page) |
| { |
| #if defined (USE_ASSERT_CHECKING) && !defined(FRONTEND) |
| Assert(BufferLockHeldByMe(page)); |
| #endif |
| return PageXLogRecPtrGet(((PageHeader) (page))->pd_lsn); |
| } |
| |
| /* |
| * Additional macros for access to page headers. (Beware multiple evaluation |
| * of the arguments!) |
| */ |
| #define PageSetLSN(page, lsn) \ |
| PageXLogRecPtrSet(((PageHeader) (page))->pd_lsn, lsn) |
| |
| #define PageHasFreeLinePointers(page) \ |
| (((PageHeader) (page))->pd_flags & PD_HAS_FREE_LINES) |
| #define PageSetHasFreeLinePointers(page) \ |
| (((PageHeader) (page))->pd_flags |= PD_HAS_FREE_LINES) |
| #define PageClearHasFreeLinePointers(page) \ |
| (((PageHeader) (page))->pd_flags &= ~PD_HAS_FREE_LINES) |
| |
| #define PageIsFull(page) \ |
| (((PageHeader) (page))->pd_flags & PD_PAGE_FULL) |
| #define PageSetFull(page) \ |
| (((PageHeader) (page))->pd_flags |= PD_PAGE_FULL) |
| #define PageClearFull(page) \ |
| (((PageHeader) (page))->pd_flags &= ~PD_PAGE_FULL) |
| |
| #define PageIsAllVisible(page) \ |
| (((PageHeader) (page))->pd_flags & PD_ALL_VISIBLE) |
| #define PageSetAllVisible(page) \ |
| (((PageHeader) (page))->pd_flags |= PD_ALL_VISIBLE) |
| #define PageClearAllVisible(page) \ |
| (((PageHeader) (page))->pd_flags &= ~PD_ALL_VISIBLE) |
| |
| #define PageSetPrunable(page, xid) \ |
| do { \ |
| Assert(TransactionIdIsNormal(xid) || xid == FrozenTransactionId); \ |
| if (!TransactionIdIsValid(((PageHeader) (page))->pd_prune_xid) || \ |
| TransactionIdPrecedes(xid, ((PageHeader) (page))->pd_prune_xid)) \ |
| ((PageHeader) (page))->pd_prune_xid = (xid); \ |
| } while (0) |
| #define PageClearPrunable(page) \ |
| (((PageHeader) (page))->pd_prune_xid = InvalidTransactionId) |
| |
| |
| #define PageHasFreeLinePointers(page) \ |
| (((PageHeader) (page))->pd_flags & PD_HAS_FREE_LINES) |
| #define PageSetHasFreeLinePointers(page) \ |
| (((PageHeader) (page))->pd_flags |= PD_HAS_FREE_LINES) |
| #define PageClearHasFreeLinePointers(page) \ |
| (((PageHeader) (page))->pd_flags &= ~PD_HAS_FREE_LINES) |
| |
| /* ---------------------------------------------------------------- |
| * extern declarations |
| * ---------------------------------------------------------------- |
| */ |
| |
| /* flags for PageAddItemExtended() */ |
| #define PAI_OVERWRITE (1 << 0) |
| #define PAI_IS_HEAP (1 << 1) |
| |
| /* flags for PageIsVerifiedExtended() */ |
| #define PIV_LOG_WARNING (1 << 0) |
| #define PIV_REPORT_STAT (1 << 1) |
| |
| #define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap) \ |
| PageAddItemExtended(page, item, size, offsetNumber, \ |
| ((overwrite) ? PAI_OVERWRITE : 0) | \ |
| ((is_heap) ? PAI_IS_HEAP : 0)) |
| |
| #define PageIsVerified(page, blkno) \ |
| PageIsVerifiedExtended(page, MAIN_FORKNUM, blkno, \ |
| PIV_LOG_WARNING | PIV_REPORT_STAT) |
| |
| /* |
| * Check that BLCKSZ is a multiple of sizeof(size_t). In |
| * PageIsVerifiedExtended(), it is much faster to check if a page is |
| * full of zeroes using the native word size. Note that this assertion |
| * is kept within a header to make sure that StaticAssertDecl() works |
| * across various combinations of platforms and compilers. |
| */ |
| StaticAssertDecl(BLCKSZ == ((BLCKSZ / sizeof(size_t)) * sizeof(size_t)), |
| "BLCKSZ has to be a multiple of sizeof(size_t)"); |
| |
| extern void PageInit(Page page, Size pageSize, Size specialSize); |
| extern bool PageIsVerifiedExtended(Page page, ForkNumber forknum, |
| BlockNumber blkno, |
| int flags); |
| extern OffsetNumber PageAddItemExtended(Page page, Item item, Size size, |
| OffsetNumber offsetNumber, int flags); |
| extern Page PageGetTempPage(Page page); |
| extern Page PageGetTempPageCopy(Page page); |
| extern Page PageGetTempPageCopySpecial(Page page); |
| extern void PageRestoreTempPage(Page tempPage, Page oldPage); |
| extern void PageRepairFragmentation(Page page); |
| extern void PageTruncateLinePointerArray(Page page); |
| extern Size PageGetFreeSpace(Page page); |
| extern Size PageGetFreeSpaceForMultipleTuples(Page page, int ntups); |
| extern Size PageGetExactFreeSpace(Page page); |
| extern Size PageGetHeapFreeSpace(Page page); |
| extern void PageIndexTupleDelete(Page page, OffsetNumber offset); |
| extern void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems); |
| extern void PageIndexTupleDeleteNoCompact(Page page, OffsetNumber offset); |
| extern bool PageIndexTupleOverwrite(Page page, OffsetNumber offnum, |
| Item newtup, Size newsize); |
| extern char *PageSetChecksumCopy(Page page, BlockNumber blkno); |
| extern void PageSetChecksumInplace(Page page, BlockNumber blkno); |
| extern char *PageEncryptCopy(Page page, ForkNumber forknum, |
| BlockNumber blkno); |
| extern void PageEncryptInplace(Page page, ForkNumber forknum, |
| BlockNumber blkno); |
| extern void PageDecryptInplace(Page page, ForkNumber forknum, |
| BlockNumber blkno); |
| |
| #endif /* BUFPAGE_H */ |