| /*------------------------------------------------------------------------- |
| * |
| * pg_control.h |
| * The system control file "pg_control" is not a heap relation. |
| * However, we define it here so that the format is documented. |
| * |
| * |
| * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group |
| * Portions Copyright (c) 1994, Regents of the University of California |
| * |
| * src/include/catalog/pg_control.h |
| * |
| *------------------------------------------------------------------------- |
| */ |
| #ifndef PG_CONTROL_H |
| #define PG_CONTROL_H |
| |
| #include "access/transam.h" |
| #include "access/xlogdefs.h" |
| #include "pgtime.h" /* for pg_time_t */ |
| #include "port/pg_crc32c.h" |
| |
| |
| /* |
| * Version identifier for this pg_control format. |
| * |
| * The first four digits is the PostgreSQL version number. The last |
| * four digits indicates the GPDB version. |
| */ |
| #define PG_CONTROL_VERSION 13000700 |
| |
| /* Nonce key length, see below */ |
| #define MOCK_AUTH_NONCE_LEN 32 |
| |
| /* |
| * Body of CheckPoint XLOG records. This is declared here because we keep |
| * a copy of the latest one in pg_control for possible disaster recovery. |
| * Changing this struct requires a PG_CONTROL_VERSION bump. |
| */ |
| typedef struct CheckPoint |
| { |
| XLogRecPtr redo; /* next RecPtr available when we began to |
| * create CheckPoint (i.e. REDO start point) */ |
| TimeLineID ThisTimeLineID; /* current TLI */ |
| TimeLineID PrevTimeLineID; /* previous TLI, if this record begins a new |
| * timeline (equals ThisTimeLineID otherwise) */ |
| bool fullPageWrites; /* current full_page_writes */ |
| FullTransactionId nextXid; /* next free transaction ID */ |
| DistributedTransactionId nextGxid; /* next free gxid */ |
| Oid nextOid; /* next free OID */ |
| Oid nextRelfilenode; /* next free Relfilenode */ |
| MultiXactId nextMulti; /* next free MultiXactId */ |
| MultiXactOffset nextMultiOffset; /* next free MultiXact offset */ |
| TransactionId oldestXid; /* cluster-wide minimum datfrozenxid */ |
| Oid oldestXidDB; /* database with minimum datfrozenxid */ |
| MultiXactId oldestMulti; /* cluster-wide minimum datminmxid */ |
| Oid oldestMultiDB; /* database with minimum datminmxid */ |
| pg_time_t time; /* time stamp of checkpoint */ |
| TransactionId oldestCommitTsXid; /* oldest Xid with valid commit |
| * timestamp */ |
| TransactionId newestCommitTsXid; /* newest Xid with valid commit |
| * timestamp */ |
| |
| /* |
| * Oldest XID still running. This is only needed to initialize hot standby |
| * mode from an online checkpoint, so we only bother calculating this for |
| * online checkpoints and only when wal_level is replica. Otherwise it's |
| * set to InvalidTransactionId. |
| */ |
| TransactionId oldestActiveXid; |
| |
| /* IN XLOG RECORD, MORE DATA FOLLOWS AT END OF STRUCT FOR DTM CHECKPOINT */ |
| } CheckPoint; |
| |
| /* XLOG info values for XLOG rmgr */ |
| #define XLOG_CHECKPOINT_SHUTDOWN 0x00 |
| #define XLOG_CHECKPOINT_ONLINE 0x10 |
| #define XLOG_NOOP 0x20 |
| #define XLOG_NEXTOID 0x30 |
| #define XLOG_SWITCH 0x40 |
| #define XLOG_BACKUP_END 0x50 |
| #define XLOG_PARAMETER_CHANGE 0x60 |
| #define XLOG_RESTORE_POINT 0x70 |
| #define XLOG_FPW_CHANGE 0x80 |
| #define XLOG_END_OF_RECOVERY 0x90 |
| #define XLOG_FPI_FOR_HINT 0xA0 |
| #define XLOG_FPI 0xB0 |
| #define XLOG_NEXTRELFILENODE 0xC0 |
| #define XLOG_NEXTGXID 0xD0 |
| /* GPDB_14_MERGE_FIXME: Compatible, Figure out whether 0xC0 already used? */ |
| /* 0xC0 is used in Postgres 9.5-11 */ |
| #define XLOG_OVERWRITE_CONTRECORD 0xE0 |
| #define XLOG_ENCRYPTION_LSN 0xF0 |
| |
| |
| /* |
| * System status indicator. Note this is stored in pg_control; if you change |
| * it, you must bump PG_CONTROL_VERSION |
| */ |
| typedef enum DBState |
| { |
| DB_STARTUP = 0, |
| DB_SHUTDOWNED, |
| DB_SHUTDOWNED_IN_RECOVERY, |
| DB_SHUTDOWNING, |
| DB_IN_CRASH_RECOVERY, |
| DB_IN_ARCHIVE_RECOVERY, |
| DB_IN_PRODUCTION |
| } DBState; |
| |
| /* |
| * Contents of pg_control. |
| */ |
| |
| typedef struct ControlFileData |
| { |
| /* |
| * Unique system identifier --- to ensure we match up xlog files with the |
| * installation that produced them. |
| */ |
| uint64 system_identifier; |
| |
| /* |
| * Version identifier information. Keep these fields at the same offset, |
| * especially pg_control_version; they won't be real useful if they move |
| * around. (For historical reasons they must be 8 bytes into the file |
| * rather than immediately at the front.) |
| * |
| * pg_control_version identifies the format of pg_control itself. |
| * catalog_version_no identifies the format of the system catalogs. |
| * |
| * There are additional version identifiers in individual files; for |
| * example, WAL logs contain per-page magic numbers that can serve as |
| * version cues for the WAL log. |
| */ |
| uint32 pg_control_version; /* PG_CONTROL_VERSION */ |
| uint32 catalog_version_no; /* see catversion.h */ |
| |
| /* |
| * System status data |
| */ |
| DBState state; /* see enum above */ |
| pg_time_t time; /* time stamp of last pg_control update */ |
| XLogRecPtr checkPoint; /* last check point record ptr */ |
| |
| CheckPoint checkPointCopy; /* copy of last check point record */ |
| |
| XLogRecPtr unloggedLSN; /* current fake LSN value, for unlogged rels */ |
| |
| /* |
| * These values determine the minimum point we must recover up to |
| * before starting up: |
| * |
| * minRecoveryPoint use in GPDB is very limited. Currently, it is used |
| * to simply to store the location of end of backup in standby mode |
| * That guards against starting standby, aborting it, and restarting with |
| * an earlier stop location. We can't get promoted unless we've at-least |
| * replayed up to minRecoveryPoint |
| * |
| * backupStartPoint is the redo pointer of the backup start checkpoint, if |
| * we are recovering from an online backup and haven't reached the end of |
| * backup yet. It is reset to zero when the end of backup is reached, and |
| * we mustn't start up before that. A boolean would suffice otherwise, but |
| * we use the redo pointer as a cross-check when we see an end-of-backup |
| * record, to make sure the end-of-backup record corresponds the base |
| * backup we're recovering from. |
| * |
| * backupEndPoint is the backup end location, if we are recovering from an |
| * online backup which was taken from the standby and haven't reached the |
| * end of backup yet. It is initialized to the minimum recovery point in |
| * pg_control which was backed up last. It is reset to zero when the end |
| * of backup is reached, and we mustn't start up before that. |
| * |
| * If backupEndRequired is true, we know for sure that we're restoring |
| * from a backup, and must see a backup-end record before we can safely |
| * start up. If it's false, but backupStartPoint is set, a backup_label |
| * file was found at startup but it may have been a leftover from a stray |
| * pg_start_backup() call, not accompanied by pg_stop_backup(). |
| */ |
| XLogRecPtr minRecoveryPoint; |
| TimeLineID minRecoveryPointTLI; |
| XLogRecPtr backupStartPoint; |
| XLogRecPtr backupEndPoint; |
| bool backupEndRequired; |
| |
| /* |
| * Parameter settings that determine if the WAL can be used for archival |
| * or hot standby. |
| */ |
| int wal_level; |
| bool wal_log_hints; |
| int MaxConnections; |
| int max_worker_processes; |
| int max_wal_senders; |
| int max_prepared_xacts; |
| int max_locks_per_xact; |
| bool track_commit_timestamp; |
| |
| /* |
| * This data is used to check for hardware-architecture compatibility of |
| * the database and the backend executable. We need not check endianness |
| * explicitly, since the pg_control version will surely look wrong to a |
| * machine of different endianness, but we do need to worry about MAXALIGN |
| * and floating-point format. (Note: storage layout nominally also |
| * depends on SHORTALIGN and INTALIGN, but in practice these are the same |
| * on all architectures of interest.) |
| * |
| * Testing just one double value is not a very bulletproof test for |
| * floating-point compatibility, but it will catch most cases. |
| */ |
| uint32 maxAlign; /* alignment requirement for tuples */ |
| double floatFormat; /* constant 1234567.0 */ |
| #define FLOATFORMAT_VALUE 1234567.0 |
| |
| /* |
| * This data is used to make sure that configuration of this database is |
| * compatible with the backend executable. |
| */ |
| uint32 blcksz; /* data block size for this DB */ |
| uint32 relseg_size; /* blocks per segment of large relation */ |
| |
| uint32 xlog_blcksz; /* block size within WAL files */ |
| uint32 xlog_seg_size; /* size of each WAL segment */ |
| |
| uint32 nameDataLen; /* catalog name field width */ |
| uint32 indexMaxKeys; /* max number of columns in an index */ |
| |
| uint32 toast_max_chunk_size; /* chunk size in TOAST tables */ |
| uint32 loblksize; /* chunk size in pg_largeobject */ |
| |
| bool float8ByVal; /* float8, int8, etc pass-by-value? */ |
| |
| /* Are data pages protected by checksums? Zero if no checksum version */ |
| uint32 data_checksum_version; |
| |
| /* |
| * Random nonce, used in authentication requests that need to proceed |
| * based on values that are cluster-unique, like a SASL exchange that |
| * failed at an early stage. |
| */ |
| char mock_authentication_nonce[MOCK_AUTH_NONCE_LEN]; |
| |
| /* File encryption method; index into encryption_methods[]. */ |
| int file_encryption_method; |
| |
| /* CRC of all above ... MUST BE LAST! */ |
| pg_crc32c crc; |
| } ControlFileData; |
| |
| /* |
| * Maximum safe value of sizeof(ControlFileData). For reliability's sake, |
| * it's critical that pg_control updates be atomic writes. That generally |
| * means the active data can't be more than one disk sector, which is 512 |
| * bytes on common hardware. Be very careful about raising this limit. |
| */ |
| #define PG_CONTROL_MAX_SAFE_SIZE 512 |
| |
| /* |
| * Physical size of the pg_control file. Note that this is considerably |
| * bigger than the actually used size (ie, sizeof(ControlFileData)). |
| * The idea is to keep the physical size constant independent of format |
| * changes, so that ReadControlFile will deliver a suitable wrong-version |
| * message instead of a read error if it's looking at an incompatible file. |
| */ |
| #define PG_CONTROL_FILE_SIZE 8192 |
| |
| #endif /* PG_CONTROL_H */ |