| /* |
| * xlog_internal.h |
| * |
| * PostgreSQL write-ahead log internal declarations |
| * |
| * NOTE: this file is intended to contain declarations useful for |
| * manipulating the XLOG files directly, but it is not supposed to be |
| * needed by rmgr routines (redo support for individual record types). |
| * So the XLogRecord typedef and associated stuff appear in xlogrecord.h. |
| * |
| * Note: This file must be includable in both frontend and backend contexts, |
| * to allow stand-alone tools like pg_receivewal to deal with WAL files. |
| * |
| * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group |
| * Portions Copyright (c) 1994, Regents of the University of California |
| * |
| * src/include/access/xlog_internal.h |
| */ |
| #ifndef XLOG_INTERNAL_H |
| #define XLOG_INTERNAL_H |
| |
| #include "access/xlogdefs.h" |
| #include "access/xlogreader.h" |
| #include "datatype/timestamp.h" |
| #include "lib/stringinfo.h" |
| #include "pgtime.h" |
| #include "storage/block.h" |
| #include "storage/relfilenode.h" |
| |
| |
| /* |
| * Each page of XLOG file has a header like this: |
| */ |
| #define XLOG_PAGE_MAGIC 0xD10D /* can be used as WAL version indicator */ |
| |
| typedef struct XLogPageHeaderData |
| { |
| uint16 xlp_magic; /* magic value for correctness checks */ |
| uint16 xlp_info; /* flag bits, see below */ |
| TimeLineID xlp_tli; /* TimeLineID of first record on page */ |
| XLogRecPtr xlp_pageaddr; /* XLOG address of this page */ |
| |
| /* |
| * When there is not enough space on current page for whole record, we |
| * continue on the next page. xlp_rem_len is the number of bytes |
| * remaining from a previous page; it tracks xl_tot_len in the initial |
| * header. Note that the continuation data isn't necessarily aligned. |
| */ |
| uint32 xlp_rem_len; /* total len of remaining data for record */ |
| } XLogPageHeaderData; |
| |
| #define SizeOfXLogShortPHD MAXALIGN(sizeof(XLogPageHeaderData)) |
| |
| typedef XLogPageHeaderData *XLogPageHeader; |
| |
| /* |
| * When the XLP_LONG_HEADER flag is set, we store additional fields in the |
| * page header. (This is ordinarily done just in the first page of an |
| * XLOG file.) The additional fields serve to identify the file accurately. |
| */ |
| typedef struct XLogLongPageHeaderData |
| { |
| XLogPageHeaderData std; /* standard header fields */ |
| uint64 xlp_sysid; /* system identifier from pg_control */ |
| uint32 xlp_seg_size; /* just as a cross-check */ |
| uint32 xlp_xlog_blcksz; /* just as a cross-check */ |
| } XLogLongPageHeaderData; |
| |
| #define SizeOfXLogLongPHD MAXALIGN(sizeof(XLogLongPageHeaderData)) |
| |
| typedef XLogLongPageHeaderData *XLogLongPageHeader; |
| |
| /* When record crosses page boundary, set this flag in new page's header */ |
| #define XLP_FIRST_IS_CONTRECORD 0x0001 |
| /* This flag indicates a "long" page header */ |
| #define XLP_LONG_HEADER 0x0002 |
| /* This flag indicates backup blocks starting in this page are optional */ |
| #define XLP_BKP_REMOVABLE 0x0004 |
| /* Replaces a missing contrecord; see CreateOverwriteContrecordRecord */ |
| #define XLP_FIRST_IS_OVERWRITE_CONTRECORD 0x0008 |
| /* All defined flag bits in xlp_info (used for validity checking of header) */ |
| #define XLP_ALL_FLAGS 0x000F |
| |
| #define XLogPageHeaderSize(hdr) \ |
| (((hdr)->xlp_info & XLP_LONG_HEADER) ? SizeOfXLogLongPHD : SizeOfXLogShortPHD) |
| |
| /* wal_segment_size can range from 1MB to 1GB */ |
| #define WalSegMinSize (1024 * 1024) |
| #define WalSegMaxSize (1024 * 1024 * 1024) |
| /* default number of min and max wal segments */ |
| #define DEFAULT_MIN_WAL_SEGS 5 |
| /* |
| * gpdb: PG hardcodes this as 64. We don't expect to keep that many segment |
| * files given we use 64MB as default segment file size while PG uses 16MB, |
| * also note this value affects that checkpoint frequency. Let's just ensure |
| * the max wal segment size is same on gpdb and PG. |
| */ |
| #define DEFAULT_MAX_WAL_SEGS (64*(16*1024*1024)/DEFAULT_XLOG_SEG_SIZE) |
| |
| /* check that the given size is a valid wal_segment_size */ |
| #define IsPowerOf2(x) (x > 0 && ((x) & ((x)-1)) == 0) |
| #define IsValidWalSegSize(size) \ |
| (IsPowerOf2(size) && \ |
| ((size) >= WalSegMinSize && (size) <= WalSegMaxSize)) |
| |
| #define XLogSegmentsPerXLogId(wal_segsz_bytes) \ |
| (UINT64CONST(0x100000000) / (wal_segsz_bytes)) |
| |
| #define XLogSegNoOffsetToRecPtr(segno, offset, wal_segsz_bytes, dest) \ |
| (dest) = (segno) * (wal_segsz_bytes) + (offset) |
| |
| #define XLogSegmentOffset(xlogptr, wal_segsz_bytes) \ |
| ((xlogptr) & ((wal_segsz_bytes) - 1)) |
| |
| /* |
| * Compute a segment number from an XLogRecPtr. |
| * |
| * For XLByteToSeg, do the computation at face value. For XLByteToPrevSeg, |
| * a boundary byte is taken to be in the previous segment. This is suitable |
| * for deciding which segment to write given a pointer to a record end, |
| * for example. |
| */ |
| #define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes) \ |
| logSegNo = (xlrp) / (wal_segsz_bytes) |
| |
| #define XLByteToPrevSeg(xlrp, logSegNo, wal_segsz_bytes) \ |
| logSegNo = ((xlrp) - 1) / (wal_segsz_bytes) |
| |
| /* |
| * Convert values of GUCs measured in megabytes to equiv. segment count. |
| * Rounds down. |
| */ |
| #define XLogMBVarToSegs(mbvar, wal_segsz_bytes) \ |
| ((mbvar) / ((wal_segsz_bytes) / (1024 * 1024))) |
| |
| /* |
| * Is an XLogRecPtr within a particular XLOG segment? |
| * |
| * For XLByteInSeg, do the computation at face value. For XLByteInPrevSeg, |
| * a boundary byte is taken to be in the previous segment. |
| */ |
| #define XLByteInSeg(xlrp, logSegNo, wal_segsz_bytes) \ |
| (((xlrp) / (wal_segsz_bytes)) == (logSegNo)) |
| |
| #define XLByteInPrevSeg(xlrp, logSegNo, wal_segsz_bytes) \ |
| ((((xlrp) - 1) / (wal_segsz_bytes)) == (logSegNo)) |
| |
| /* Check if an XLogRecPtr value is in a plausible range */ |
| #define XRecOffIsValid(xlrp) \ |
| ((xlrp) % XLOG_BLCKSZ >= SizeOfXLogShortPHD) |
| |
| /* |
| * The XLog directory and control file (relative to $PGDATA) |
| */ |
| #define XLOGDIR "pg_wal" |
| #define XLOG_CONTROL_FILE "global/pg_control" |
| |
| /* |
| * These macros encapsulate knowledge about the exact layout of XLog file |
| * names, timeline history file names, and archive-status file names. |
| */ |
| #define MAXFNAMELEN 64 |
| |
| /* Length of XLog file name */ |
| #define XLOG_FNAME_LEN 24 |
| |
| /* |
| * Generate a WAL segment file name. Do not use this macro in a helper |
| * function allocating the result generated. |
| */ |
| #define XLogFileName(fname, tli, logSegNo, wal_segsz_bytes) \ |
| snprintf(fname, MAXFNAMELEN, "%08X%08X%08X", tli, \ |
| (uint32) ((logSegNo) / XLogSegmentsPerXLogId(wal_segsz_bytes)), \ |
| (uint32) ((logSegNo) % XLogSegmentsPerXLogId(wal_segsz_bytes))) |
| |
| #define XLogFileNameById(fname, tli, log, seg) \ |
| snprintf(fname, MAXFNAMELEN, "%08X%08X%08X", tli, log, seg) |
| |
| #define IsXLogFileName(fname) \ |
| (strlen(fname) == XLOG_FNAME_LEN && \ |
| strspn(fname, "0123456789ABCDEF") == XLOG_FNAME_LEN) |
| |
| /* |
| * XLOG segment with .partial suffix. Used by pg_receivewal and at end of |
| * archive recovery, when we want to archive a WAL segment but it might not |
| * be complete yet. |
| */ |
| #define IsPartialXLogFileName(fname) \ |
| (strlen(fname) == XLOG_FNAME_LEN + strlen(".partial") && \ |
| strspn(fname, "0123456789ABCDEF") == XLOG_FNAME_LEN && \ |
| strcmp((fname) + XLOG_FNAME_LEN, ".partial") == 0) |
| |
| #define XLogFromFileName(fname, tli, logSegNo, wal_segsz_bytes) \ |
| do { \ |
| uint32 log; \ |
| uint32 seg; \ |
| sscanf(fname, "%08X%08X%08X", tli, &log, &seg); \ |
| *logSegNo = (uint64) log * XLogSegmentsPerXLogId(wal_segsz_bytes) + seg; \ |
| } while (0) |
| |
| #define XLogFilePath(path, tli, logSegNo, wal_segsz_bytes) \ |
| snprintf(path, MAXPGPATH, XLOGDIR "/%08X%08X%08X", tli, \ |
| (uint32) ((logSegNo) / XLogSegmentsPerXLogId(wal_segsz_bytes)), \ |
| (uint32) ((logSegNo) % XLogSegmentsPerXLogId(wal_segsz_bytes))) |
| |
| #define TLHistoryFileName(fname, tli) \ |
| snprintf(fname, MAXFNAMELEN, "%08X.history", tli) |
| |
| #define IsTLHistoryFileName(fname) \ |
| (strlen(fname) == 8 + strlen(".history") && \ |
| strspn(fname, "0123456789ABCDEF") == 8 && \ |
| strcmp((fname) + 8, ".history") == 0) |
| |
| #define TLHistoryFilePath(path, tli) \ |
| snprintf(path, MAXPGPATH, XLOGDIR "/%08X.history", tli) |
| |
| #define StatusFilePath(path, xlog, suffix) \ |
| snprintf(path, MAXPGPATH, XLOGDIR "/archive_status/%s%s", xlog, suffix) |
| |
| #define BackupHistoryFileName(fname, tli, logSegNo, startpoint, wal_segsz_bytes) \ |
| snprintf(fname, MAXFNAMELEN, "%08X%08X%08X.%08X.backup", tli, \ |
| (uint32) ((logSegNo) / XLogSegmentsPerXLogId(wal_segsz_bytes)), \ |
| (uint32) ((logSegNo) % XLogSegmentsPerXLogId(wal_segsz_bytes)), \ |
| (uint32) (XLogSegmentOffset(startpoint, wal_segsz_bytes))) |
| |
| #define IsBackupHistoryFileName(fname) \ |
| (strlen(fname) > XLOG_FNAME_LEN && \ |
| strspn(fname, "0123456789ABCDEF") == XLOG_FNAME_LEN && \ |
| strcmp((fname) + strlen(fname) - strlen(".backup"), ".backup") == 0) |
| |
| #define BackupHistoryFilePath(path, tli, logSegNo, startpoint, wal_segsz_bytes) \ |
| snprintf(path, MAXPGPATH, XLOGDIR "/%08X%08X%08X.%08X.backup", tli, \ |
| (uint32) ((logSegNo) / XLogSegmentsPerXLogId(wal_segsz_bytes)), \ |
| (uint32) ((logSegNo) % XLogSegmentsPerXLogId(wal_segsz_bytes)), \ |
| (uint32) (XLogSegmentOffset((startpoint), wal_segsz_bytes))) |
| |
| /* |
| * Information logged when we detect a change in one of the parameters |
| * important for Hot Standby. |
| */ |
| typedef struct xl_parameter_change |
| { |
| int MaxConnections; |
| int max_worker_processes; |
| int max_wal_senders; |
| int max_prepared_xacts; |
| int max_locks_per_xact; |
| int wal_level; |
| bool wal_log_hints; |
| bool track_commit_timestamp; |
| } xl_parameter_change; |
| |
| /* logs restore point */ |
| typedef struct xl_restore_point |
| { |
| TimestampTz rp_time; |
| char rp_name[MAXFNAMELEN]; |
| } xl_restore_point; |
| |
| /* Overwrite of prior contrecord */ |
| typedef struct xl_overwrite_contrecord |
| { |
| XLogRecPtr overwritten_lsn; |
| TimestampTz overwrite_time; |
| } xl_overwrite_contrecord; |
| |
| /* End of recovery mark, when we don't do an END_OF_RECOVERY checkpoint */ |
| typedef struct xl_end_of_recovery |
| { |
| TimestampTz end_time; |
| TimeLineID ThisTimeLineID; /* new TLI */ |
| TimeLineID PrevTimeLineID; /* previous TLI we forked off from */ |
| } xl_end_of_recovery; |
| |
| typedef struct CheckpointExtendedRecord |
| { |
| struct TMGXACT_CHECKPOINT *dtxCheckpoint; |
| uint32 dtxCheckpointLen; |
| } CheckpointExtendedRecord; |
| |
| /* |
| * The functions in xloginsert.c construct a chain of XLogRecData structs |
| * to represent the final WAL record. |
| */ |
| typedef struct XLogRecData |
| { |
| struct XLogRecData *next; /* next struct in chain, or NULL */ |
| char *data; /* start of rmgr data to include */ |
| uint32 len; /* length of rmgr data to include */ |
| } XLogRecData; |
| |
| /* |
| * Recovery target action. |
| */ |
| typedef enum |
| { |
| RECOVERY_TARGET_ACTION_PAUSE, |
| RECOVERY_TARGET_ACTION_PROMOTE, |
| RECOVERY_TARGET_ACTION_SHUTDOWN |
| } RecoveryTargetAction; |
| |
| struct LogicalDecodingContext; |
| struct XLogRecordBuffer; |
| |
| /* |
| * Method table for resource managers. |
| * |
| * This struct must be kept in sync with the PG_RMGR definition in |
| * rmgr.c. |
| * |
| * rm_identify must return a name for the record based on xl_info (without |
| * reference to the rmid). For example, XLOG_BTREE_VACUUM would be named |
| * "VACUUM". rm_desc can then be called to obtain additional detail for the |
| * record, if available (e.g. the last block). |
| * |
| * rm_mask takes as input a page modified by the resource manager and masks |
| * out bits that shouldn't be flagged by wal_consistency_checking. |
| * |
| * RmgrTable[] is indexed by RmgrId values (see rmgrlist.h). If rm_name is |
| * NULL, the corresponding RmgrTable entry is considered invalid. |
| */ |
| typedef struct RmgrData |
| { |
| const char *rm_name; |
| void (*rm_redo) (XLogReaderState *record); |
| void (*rm_desc) (StringInfo buf, XLogReaderState *record); |
| const char *(*rm_identify) (uint8 info); |
| void (*rm_startup) (void); |
| void (*rm_cleanup) (void); |
| void (*rm_mask) (char *pagedata, BlockNumber blkno); |
| void (*rm_decode) (struct LogicalDecodingContext *ctx, |
| struct XLogRecordBuffer *buf); |
| } RmgrData; |
| |
| extern RmgrData RmgrTable[]; |
| extern void RmgrStartup(void); |
| extern void RmgrCleanup(void); |
| extern void RmgrNotFound(RmgrId rmid); |
| extern void RegisterCustomRmgr(RmgrId rmid, RmgrData *rmgr); |
| |
| static inline bool |
| RmgrIdExists(RmgrId rmid) |
| { |
| return RmgrTable[rmid].rm_name != NULL; |
| } |
| |
| static inline RmgrData |
| GetRmgr(RmgrId rmid) |
| { |
| if (unlikely(!RmgrIdExists(rmid))) |
| RmgrNotFound(rmid); |
| return RmgrTable[rmid]; |
| } |
| |
| /* |
| * Exported to support xlog switching from checkpointer |
| */ |
| extern pg_time_t GetLastSegSwitchData(XLogRecPtr *lastSwitchLSN); |
| extern XLogRecPtr RequestXLogSwitch(bool mark_unimportant); |
| |
| extern void GetOldestRestartPoint(XLogRecPtr *oldrecptr, TimeLineID *oldtli); |
| |
| /* |
| * Exported for the functions in timeline.c and xlogarchive.c. Only valid |
| * in the startup process. |
| */ |
| extern bool ArchiveRecoveryRequested; |
| extern bool InArchiveRecovery; |
| extern bool StandbyMode; |
| extern char *recoveryRestoreCommand; |
| |
| #endif /* XLOG_INTERNAL_H */ |