| /*------------------------------------------------------------------------- |
| * |
| * proc.h |
| * per-process shared memory data structures |
| * |
| * |
| * Portions Copyright (c) 2006-2008, Greenplum inc |
| * Portions Copyright (c) 2012-Present VMware, Inc. or its affiliates. |
| * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group |
| * Portions Copyright (c) 1994, Regents of the University of California |
| * |
| * src/include/storage/proc.h |
| * |
| *------------------------------------------------------------------------- |
| */ |
| #ifndef _PROC_H_ |
| #define _PROC_H_ |
| |
| #include "access/clog.h" |
| #include "access/xlogdefs.h" |
| #include "lib/ilist.h" |
| #include "storage/latch.h" |
| #include "storage/lock.h" |
| #include "storage/spin.h" |
| #include "storage/pg_sema.h" |
| #include "storage/proclist_types.h" |
| |
| #include "cdb/cdblocaldistribxact.h" /* LocalDistribXactData */ |
| #include "cdb/cdbtm.h" /* TMGXACT */ |
| #include "dsm.h" |
| |
| /* |
| * Each backend advertises up to PGPROC_MAX_CACHED_SUBXIDS TransactionIds |
| * for non-aborted subtransactions of its current top transaction. These |
| * have to be treated as running XIDs by other backends. |
| * |
| * We also keep track of whether the cache overflowed (ie, the transaction has |
| * generated at least one subtransaction that didn't fit in the cache). |
| * If none of the caches have overflowed, we can assume that an XID that's not |
| * listed anywhere in the PGPROC array is not a running transaction. Else we |
| * have to look at pg_subtrans. |
| */ |
| #define PGPROC_MAX_CACHED_SUBXIDS 64 /* XXX guessed-at value */ |
| |
| typedef struct XidCacheStatus |
| { |
| /* number of cached subxids, never more than PGPROC_MAX_CACHED_SUBXIDS */ |
| uint8 count; |
| /* has PGPROC->subxids overflowed */ |
| bool overflowed; |
| } XidCacheStatus; |
| |
| struct XidCache |
| { |
| TransactionId xids[PGPROC_MAX_CACHED_SUBXIDS]; |
| }; |
| |
| /* |
| * Flags for PGPROC->statusFlags and PROC_HDR->statusFlags[] |
| */ |
| #define PROC_IS_AUTOVACUUM 0x01 /* is it an autovac worker? */ |
| #define PROC_IN_VACUUM 0x02 /* currently running lazy vacuum */ |
| #define PROC_IN_SAFE_IC 0x04 /* currently running CREATE INDEX |
| * CONCURRENTLY or REINDEX |
| * CONCURRENTLY on non-expressional, |
| * non-partial index */ |
| #define PROC_VACUUM_FOR_WRAPAROUND 0x08 /* set by autovac only */ |
| #define PROC_IN_LOGICAL_DECODING 0x10 /* currently doing logical |
| * decoding outside xact */ |
| |
| /* flags reset at EOXact */ |
| #define PROC_VACUUM_STATE_MASK \ |
| (/* PROC_IN_VACUUM | */ PROC_IN_SAFE_IC | PROC_VACUUM_FOR_WRAPAROUND) |
| |
| /* |
| * Xmin-related flags. Make sure any flags that affect how the process' Xmin |
| * value is interpreted by VACUUM are included here. |
| */ |
| #define PROC_XMIN_FLAGS (PROC_IN_VACUUM | PROC_IN_SAFE_IC) |
| |
| /* |
| * We allow a small number of "weak" relation locks (AccessShareLock, |
| * RowShareLock, RowExclusiveLock) to be recorded in the PGPROC structure |
| * rather than the main lock table. This eases contention on the lock |
| * manager LWLocks. See storage/lmgr/README for additional details. |
| */ |
| #define FP_LOCK_SLOTS_PER_BACKEND 16 |
| |
| /* |
| * An invalid pgprocno. Must be larger than the maximum number of PGPROC |
| * structures we could possibly have. See comments for MAX_BACKENDS. |
| */ |
| #define INVALID_PGPROCNO PG_INT32_MAX |
| |
| /* |
| * Flags used only for type of internal functions |
| * GetVirtualXIDsDelayingChkptGuts and HaveVirtualXIDsDelayingChkptGuts. |
| */ |
| #define DELAY_CHKPT_START (1<<0) |
| #define DELAY_CHKPT_COMPLETE (1<<1) |
| |
| typedef enum |
| { |
| PROC_WAIT_STATUS_OK, |
| PROC_WAIT_STATUS_WAITING, |
| PROC_WAIT_STATUS_ERROR, |
| } ProcWaitStatus; |
| |
| /* |
| * Each backend has a PGPROC struct in shared memory. There is also a list of |
| * currently-unused PGPROC structs that will be reallocated to new backends. |
| * |
| * links: list link for any list the PGPROC is in. When waiting for a lock, |
| * the PGPROC is linked into that lock's waitProcs queue. A recycled PGPROC |
| * is linked into ProcGlobal's freeProcs list. |
| * |
| * Note: twophase.c also sets up a dummy PGPROC struct for each currently |
| * prepared transaction. These PGPROCs appear in the ProcArray data structure |
| * so that the prepared transactions appear to be still running and are |
| * correctly shown as holding locks. A prepared transaction PGPROC can be |
| * distinguished from a real one at need by the fact that it has pid == 0. |
| * The semaphore and lock-activity fields in a prepared-xact PGPROC are unused, |
| * but its myProcLocks[] lists are valid. |
| * |
| * We allow many fields of this struct to be accessed without locks, such as |
| * delayChkpt and isBackgroundWorker. However, keep in mind that writing |
| * mirrored ones (see below) requires holding ProcArrayLock or XidGenLock in |
| * at least shared mode, so that pgxactoff does not change concurrently. |
| * |
| * Mirrored fields: |
| * |
| * Some fields in PGPROC (see "mirrored in ..." comment) are mirrored into an |
| * element of more densely packed ProcGlobal arrays. These arrays are indexed |
| * by PGPROC->pgxactoff. Both copies need to be maintained coherently. |
| * |
| * NB: The pgxactoff indexed value can *never* be accessed without holding |
| * locks. |
| * |
| * See PROC_HDR for details. |
| */ |
| struct PGPROC |
| { |
| /* proc->links MUST BE FIRST IN STRUCT (see ProcSleep,ProcWakeup,etc) */ |
| SHM_QUEUE links; /* list link if process is in a list */ |
| PGPROC **procgloballist; /* procglobal list that owns this PGPROC */ |
| |
| PGSemaphore sem; /* ONE semaphore to sleep on */ |
| ProcWaitStatus waitStatus; |
| |
| Latch procLatch; /* generic latch for process */ |
| |
| |
| TransactionId xid; /* id of top-level transaction currently being |
| * executed by this proc, if running and XID |
| * is assigned; else InvalidTransactionId. |
| * mirrored in ProcGlobal->xids[pgxactoff] */ |
| |
| TransactionId xmin; /* minimal running XID as it was when we were |
| * starting our xact, excluding LAZY VACUUM: |
| * vacuum must not remove tuples deleted by |
| * xid >= xmin ! */ |
| |
| LocalTransactionId lxid; /* local id of top-level transaction currently |
| * being executed by this proc, if running; |
| * else InvalidLocalTransactionId */ |
| |
| /* |
| * Distributed transaction information. This is only maintained on QE's |
| * and accessed by the backend itself, so this doesn't need to be |
| * protected by any lock. On QD MyTmGxact provides this info, hence |
| * redundant info is not maintained here for QD. In fact, it could be just |
| * a global variable in backend-private memory, but it seems useful to |
| * have this information available for debugging purposes. |
| */ |
| LocalDistribXactData localDistribXactData; |
| |
| int pid; /* Backend's process ID; 0 if prepared xact */ |
| |
| int pgxactoff; /* offset into various ProcGlobal->arrays with |
| * data mirrored from this PGPROC */ |
| int pgprocno; |
| |
| /* These fields are zero while a backend is still starting up: */ |
| BackendId backendId; /* This backend's backend ID (if assigned) */ |
| Oid databaseId; /* OID of database this backend is using */ |
| Oid roleId; /* OID of role using this backend */ |
| int mppSessionId; /* serial num of the qDisp process */ |
| int mppLocalProcessSerial; /* this backend's PGPROC serial num */ |
| bool mppIsWriter; /* The writer gang member, holder of locks */ |
| |
| Oid tempNamespaceId; /* OID of temp schema this backend is |
| * using */ |
| |
| bool isBackgroundWorker; /* true if background worker. */ |
| |
| /* |
| * While in hot standby mode, shows that a conflict signal has been sent |
| * for the current transaction. Set/cleared while holding ProcArrayLock, |
| * though not required. Accessed without lock, if needed. |
| */ |
| bool recoveryConflictPending; |
| |
| /* Info about LWLock the process is currently waiting for, if any. */ |
| bool lwWaiting; /* true if waiting for an LW lock */ |
| uint8 lwWaitMode; /* lwlock mode being waited for */ |
| proclist_node lwWaitLink; /* position in LW lock wait list */ |
| |
| /* Support for condition variables. */ |
| proclist_node cvWaitLink; /* position in CV wait list */ |
| |
| /* Info about lock the process is currently waiting for, if any. */ |
| /* waitLock and waitProcLock are NULL if not currently waiting. */ |
| LOCK *waitLock; /* Lock object we're sleeping on ... */ |
| PROCLOCK *waitProcLock; /* Per-holder info for awaited lock */ |
| LOCKMODE waitLockMode; /* type of lock we're waiting for */ |
| LOCKMASK heldLocks; /* bitmask for lock types already held on this |
| * lock object by this backend */ |
| pg_atomic_uint64 waitStart; /* time at which wait for lock acquisition |
| * started */ |
| |
| bool delayChkpt; /* true if this proc delays checkpoint start */ |
| |
| uint8 statusFlags; /* this backend's status flags, see PROC_* |
| * above. mirrored in |
| * ProcGlobal->statusFlags[pgxactoff] */ |
| bool delayChkptEnd; /* true if this proc delays checkpoint end */ |
| |
| /* |
| * Info to allow us to wait for synchronous replication, if needed. |
| * waitLSN is InvalidXLogRecPtr if not waiting; set only by user backend. |
| * syncRepState must not be touched except by owning process or WALSender. |
| * syncRepLinks used only while holding SyncRepLock. |
| */ |
| XLogRecPtr waitLSN; /* waiting for this LSN or higher */ |
| int syncRepState; /* wait state for sync rep */ |
| SHM_QUEUE syncRepLinks; /* list link if process is in syncrep queue */ |
| |
| /* |
| * All PROCLOCK objects for locks held or awaited by this backend are |
| * linked into one of these lists, according to the partition number of |
| * their lock. |
| */ |
| SHM_QUEUE myProcLocks[NUM_LOCK_PARTITIONS]; |
| |
| XidCacheStatus subxidStatus; /* mirrored with |
| * ProcGlobal->subxidStates[i] */ |
| struct XidCache subxids; /* cache for subtransaction XIDs */ |
| |
| /* |
| * Info for Resource Scheduling, what portal (i.e statement) we might |
| * be waiting on. |
| */ |
| uint32 waitPortalId; /* portal id we are waiting on */ |
| |
| /* |
| * Handle for our shared comboCids array (populated in writer/dispatcher |
| * backends only) |
| */ |
| dsm_handle comboCidsHandle; |
| |
| /* |
| * Current command_id for the running query |
| * This counter is not dead code although there is no consumer in the gpdb |
| * code tree, it is required by external monitoring infrastructure. |
| * As a monitoring approach, each query execution is assigned with a unique |
| * ID. The queryCommandId is part of the ID. Monitoring extension with |
| * shared memory access can use queryCommandId to map query execution with |
| * a backend entity to access related metrics information. |
| */ |
| int queryCommandId; |
| |
| /* |
| * Information for resource group |
| */ |
| void *resSlot; /* the resource group slot granted. |
| * NULL indicates the resource group is |
| * locked for drop. */ |
| slock_t movetoMutex; /* spinlock to protect moveto* fields below */ |
| void *movetoResSlot; /* the resource group slot move to, valid only |
| * on QD; when slot become NULL, it means |
| * target process got the control over it */ |
| Oid movetoGroupId; /* the resource group id move to; valid on |
| * both QE and QD; when id become InvalidOid |
| * on QD, it means target process attempted to |
| * move process to this group and the result |
| * of attemption is in movetoResSlot */ |
| pid_t movetoCallerPid; /* pid of moving initiator; valid only on QD; |
| * guards current moving command from another |
| * commands */ |
| |
| /* Support for group XID clearing. */ |
| /* true, if member of ProcArray group waiting for XID clear */ |
| bool procArrayGroupMember; |
| /* next ProcArray group member waiting for XID clear */ |
| pg_atomic_uint32 procArrayGroupNext; |
| |
| /* |
| * latest transaction id among the transaction's main XID and |
| * subtransactions |
| */ |
| TransactionId procArrayGroupMemberXid; |
| |
| uint32 wait_event_info; /* proc's wait information */ |
| |
| /* Support for group transaction status update. */ |
| bool clogGroupMember; /* true, if member of clog group */ |
| pg_atomic_uint32 clogGroupNext; /* next clog group member */ |
| TransactionId clogGroupMemberXid; /* transaction id of clog group member */ |
| XidStatus clogGroupMemberXidStatus; /* transaction status of clog |
| * group member */ |
| int clogGroupMemberPage; /* clog page corresponding to |
| * transaction id of clog group member */ |
| XLogRecPtr clogGroupMemberLsn; /* WAL location of commit record for clog |
| * group member */ |
| |
| /* Lock manager data, recording fast-path locks taken by this backend. */ |
| LWLock fpInfoLock; /* protects per-backend fast-path state */ |
| uint64 fpLockBits; /* lock modes held for each fast-path slot */ |
| uint64 fpHoldTillEndXactBits; /* HoldTillEndXactBits for each slot */ |
| Oid fpRelId[FP_LOCK_SLOTS_PER_BACKEND]; /* slots for rel oids */ |
| bool fpVXIDLock; /* are we holding a fast-path VXID lock? */ |
| LocalTransactionId fpLocalTransactionId; /* lxid for fast-path VXID |
| * lock */ |
| |
| /* |
| * Support for lock groups. Use LockHashPartitionLockByProc on the group |
| * leader to get the LWLock protecting these fields. |
| */ |
| PGPROC *lockGroupLeader; /* lock group leader, if I'm a member */ |
| dlist_head lockGroupMembers; /* list of members, if I'm a leader */ |
| dlist_node lockGroupLink; /* my member link, if I'm a member */ |
| }; |
| |
| /* NOTE: "typedef struct PGPROC PGPROC" appears in storage/lock.h. */ |
| |
| |
| extern PGDLLIMPORT PGPROC *MyProc; |
| extern PGDLLIMPORT struct TMGXACT *MyTmGxact; |
| extern PGDLLIMPORT struct TMGXACTLOCAL *MyTmGxactLocal; |
| |
| /* Special for MPP reader gangs */ |
| extern PGDLLIMPORT PGPROC *lockHolderProcPtr; |
| |
| /* |
| * There is one ProcGlobal struct for the whole database cluster. |
| * |
| * Adding/Removing an entry into the procarray requires holding *both* |
| * ProcArrayLock and XidGenLock in exclusive mode (in that order). Both are |
| * needed because the dense arrays (see below) are accessed from |
| * GetNewTransactionId() and GetSnapshotData(), and we don't want to add |
| * further contention by both using the same lock. Adding/Removing a procarray |
| * entry is much less frequent. |
| * |
| * Some fields in PGPROC are mirrored into more densely packed arrays (e.g. |
| * xids), with one entry for each backend. These arrays only contain entries |
| * for PGPROCs that have been added to the shared array with ProcArrayAdd() |
| * (in contrast to PGPROC array which has unused PGPROCs interspersed). |
| * |
| * The dense arrays are indexed by PGPROC->pgxactoff. Any concurrent |
| * ProcArrayAdd() / ProcArrayRemove() can lead to pgxactoff of a procarray |
| * member to change. Therefore it is only safe to use PGPROC->pgxactoff to |
| * access the dense array while holding either ProcArrayLock or XidGenLock. |
| * |
| * As long as a PGPROC is in the procarray, the mirrored values need to be |
| * maintained in both places in a coherent manner. |
| * |
| * The denser separate arrays are beneficial for three main reasons: First, to |
| * allow for as tight loops accessing the data as possible. Second, to prevent |
| * updates of frequently changing data (e.g. xmin) from invalidating |
| * cachelines also containing less frequently changing data (e.g. xid, |
| * statusFlags). Third to condense frequently accessed data into as few |
| * cachelines as possible. |
| * |
| * There are two main reasons to have the data mirrored between these dense |
| * arrays and PGPROC. First, as explained above, a PGPROC's array entries can |
| * only be accessed with either ProcArrayLock or XidGenLock held, whereas the |
| * PGPROC entries do not require that (obviously there may still be locking |
| * requirements around the individual field, separate from the concerns |
| * here). That is particularly important for a backend to efficiently checks |
| * it own values, which it often can safely do without locking. Second, the |
| * PGPROC fields allow to avoid unnecessary accesses and modification to the |
| * dense arrays. A backend's own PGPROC is more likely to be in a local cache, |
| * whereas the cachelines for the dense array will be modified by other |
| * backends (often removing it from the cache for other cores/sockets). At |
| * commit/abort time a check of the PGPROC value can avoid accessing/dirtying |
| * the corresponding array value. |
| * |
| * Basically it makes sense to access the PGPROC variable when checking a |
| * single backend's data, especially when already looking at the PGPROC for |
| * other reasons already. It makes sense to look at the "dense" arrays if we |
| * need to look at many / most entries, because we then benefit from the |
| * reduced indirection and better cross-process cache-ability. |
| * |
| * When entering a PGPROC for 2PC transactions with ProcArrayAdd(), the data |
| * in the dense arrays is initialized from the PGPROC while it already holds |
| * ProcArrayLock. |
| */ |
| typedef struct PROC_HDR |
| { |
| /* Array of PGPROC structures (not including dummies for prepared txns) */ |
| PGPROC *allProcs; |
| /* Array of TMGXACT structures (not including dummies for prepared txns) */ |
| TMGXACT *allTmGxact; |
| |
| /* Array mirroring PGPROC.xid for each PGPROC currently in the procarray */ |
| TransactionId *xids; |
| |
| /* |
| * Array mirroring PGPROC.subxidStatus for each PGPROC currently in the |
| * procarray. |
| */ |
| XidCacheStatus *subxidStates; |
| |
| /* |
| * Array mirroring PGPROC.statusFlags for each PGPROC currently in the |
| * procarray. |
| */ |
| uint8 *statusFlags; |
| |
| /* Length of allProcs array */ |
| uint32 allProcCount; |
| /* Head of list of free PGPROC structures */ |
| PGPROC *freeProcs; |
| /* Head of list of autovacuum's free PGPROC structures */ |
| PGPROC *autovacFreeProcs; |
| /* Head of list of login monitor free PGPROC structures */ |
| PGPROC *lmFreeProcs; |
| /* Head of list of bgworker free PGPROC structures */ |
| PGPROC *bgworkerFreeProcs; |
| /* Head of list of walsender free PGPROC structures */ |
| PGPROC *walsenderFreeProcs; |
| /* First pgproc waiting for group XID clear */ |
| pg_atomic_uint32 procArrayGroupFirst; |
| /* First pgproc waiting for group transaction status update */ |
| pg_atomic_uint32 clogGroupFirst; |
| /* WALWriter process's latch */ |
| Latch *walwriterLatch; |
| /* Checkpointer process's latch */ |
| Latch *checkpointerLatch; |
| /* Current shared estimate of appropriate spins_per_delay value */ |
| int spins_per_delay; |
| /* The proc of the Startup process, since not in ProcArray */ |
| PGPROC *startupProc; |
| int startupProcPid; |
| /* Buffer id of the buffer that Startup process waits for pin on, or -1 */ |
| int startupBufferPinWaitBufId; |
| |
| /* Counter for assigning serial numbers to processes */ |
| int mppLocalProcessCounter; |
| } PROC_HDR; |
| |
| extern PGDLLIMPORT PROC_HDR *ProcGlobal; |
| |
| extern PGPROC *PreparedXactProcs; |
| |
| /* Accessor for PGPROC given a pgprocno. */ |
| #define GetPGProcByNumber(n) (&ProcGlobal->allProcs[(n)]) |
| |
| /* |
| * We set aside some extra PGPROC structures for auxiliary processes, |
| * ie things that aren't full-fledged backends but need shmem access. |
| * |
| * Background writer, checkpointer, WAL writer and archiver run during normal |
| * operation. Startup process and WAL receiver also consume 2 slots, but WAL |
| * writer is launched only after startup has exited, so we only need 5 slots. |
| */ |
| #define NUM_AUXILIARY_PROCS 5 |
| |
| /* configurable options */ |
| extern PGDLLIMPORT int DeadlockTimeout; |
| extern PGDLLIMPORT int StatementTimeout; |
| extern PGDLLIMPORT int LockTimeout; |
| extern PGDLLIMPORT int IdleInTransactionSessionTimeout; |
| extern PGDLLIMPORT int IdleSessionTimeout; |
| extern bool log_lock_waits; |
| |
| |
| /* |
| * Function Prototypes |
| */ |
| extern int ProcGlobalSemas(void); |
| extern Size ProcGlobalShmemSize(void); |
| extern void InitProcGlobal(void); |
| extern void InitProcess(void); |
| extern void InitProcessPhase2(void); |
| extern void InitAuxiliaryProcess(void); |
| |
| extern void PublishStartupProcessInformation(void); |
| extern void SetStartupBufferPinWaitBufId(int bufid); |
| extern int GetStartupBufferPinWaitBufId(void); |
| |
| extern bool HaveNFreeProcs(int n); |
| extern void ProcReleaseLocks(bool isCommit); |
| |
| extern void ProcQueueInit(PROC_QUEUE *queue); |
| extern ProcWaitStatus ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable); |
| extern PGPROC *ProcWakeup(PGPROC *proc, ProcWaitStatus waitStatus); |
| extern void ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock); |
| extern void CheckDeadLockAlert(void); |
| extern bool IsWaitingForLock(void); |
| extern void LockErrorCleanup(void); |
| |
| extern void ProcWaitForSignal(uint32 wait_event_info); |
| extern void ProcSendSignal(int pid); |
| |
| extern PGPROC *AuxiliaryPidGetProc(int pid); |
| |
| extern void BecomeLockGroupLeader(void); |
| extern bool BecomeLockGroupMember(PGPROC *leader, int pid); |
| |
| extern int ResProcSleep(LOCKMODE lockmode, LOCALLOCK *locallock, void *incrementSet); |
| |
| extern void ResLockWaitCancel(void); |
| extern bool ProcCanSetMppSessionId(void); |
| extern void ProcNewMppSessionId(int *newSessionId); |
| |
| /* |
| * session related hook types |
| */ |
| typedef int (*AllocSessionId_hook_type) (bool reset); |
| extern PGDLLIMPORT AllocSessionId_hook_type AllocSessionId_hook; |
| |
| typedef void (*NoticeSessionDB_hook_type) (Oid databaseid); |
| extern PGDLLIMPORT NoticeSessionDB_hook_type NoticeSessionDB_hook; |
| |
| typedef bool (*CountDBSession_hook_type) (Oid databaseid); |
| extern PGDLLIMPORT CountDBSession_hook_type CountDBSession_hook; |
| |
| |
| typedef void (*AuxProcCallbackFunction) (volatile PGPROC *proc, void *args); |
| extern void LoopAuxProc(AuxProcCallbackFunction func, void *args); |
| |
| #endif /* _PROC_H_ */ |