src/include/storage/predicate_internals.h - cloudberry - Git at Google

 /*-------------------------------------------------------------------------
  *
  * predicate_internals.h
  *	  POSTGRES internal predicate locking definitions.
  *
  *
  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * src/include/storage/predicate_internals.h
  *
  *-------------------------------------------------------------------------
  */
 #ifndef PREDICATE_INTERNALS_H
 #define PREDICATE_INTERNALS_H

 #include "storage/lock.h"
 #include "storage/lwlock.h"

 /*
  * Commit number.
  */
 typedef uint64 SerCommitSeqNo;

 /*
  * Reserved commit sequence numbers:
  *	- 0 is reserved to indicate a non-existent SLRU entry; it cannot be
  *	  used as a SerCommitSeqNo, even an invalid one
  *	- InvalidSerCommitSeqNo is used to indicate a transaction that
  *	  hasn't committed yet, so use a number greater than all valid
  *	  ones to make comparison do the expected thing
  *	- RecoverySerCommitSeqNo is used to refer to transactions that
  *	  happened before a crash/recovery, since we restart the sequence
  *	  at that point.  It's earlier than all normal sequence numbers,
  *	  and is only used by recovered prepared transactions
  */
 #define InvalidSerCommitSeqNo		((SerCommitSeqNo) PG_UINT64_MAX)
 #define RecoverySerCommitSeqNo		((SerCommitSeqNo) 1)
 #define FirstNormalSerCommitSeqNo	((SerCommitSeqNo) 2)

 /*
  * The SERIALIZABLEXACT struct contains information needed for each
  * serializable database transaction to support SSI techniques.
  *
  * A home-grown list is maintained in shared memory to manage these.
  * An entry is used when the serializable transaction acquires a snapshot.
  * Unless the transaction is rolled back, this entry must generally remain
  * until all concurrent transactions have completed.  (There are special
  * optimizations for READ ONLY transactions which often allow them to be
  * cleaned up earlier.)  A transaction which is rolled back is cleaned up
  * as soon as possible.
  *
  * Eligibility for cleanup of committed transactions is generally determined
  * by comparing the transaction's finishedBefore field to
  * SxactGlobalXmin.
  */
 typedef struct SERIALIZABLEXACT
 {
 	VirtualTransactionId vxid;	/* The executing process always has one of
 								 * these. */

 	/*
 	 * We use two numbers to track the order that transactions commit. Before
 	 * commit, a transaction is marked as prepared, and prepareSeqNo is set.
 	 * Shortly after commit, it's marked as committed, and commitSeqNo is set.
 	 * This doesn't give a strict commit order, but these two values together
 	 * are good enough for us, as we can always err on the safe side and
 	 * assume that there's a conflict, if we can't be sure of the exact
 	 * ordering of two commits.
 	 *
 	 * Note that a transaction is marked as prepared for a short period during
 	 * commit processing, even if two-phase commit is not used. But with
 	 * two-phase commit, a transaction can stay in prepared state for some
 	 * time.
 	 */
 	SerCommitSeqNo prepareSeqNo;
 	SerCommitSeqNo commitSeqNo;

 	/* these values are not both interesting at the same time */
 	union
 	{
 		SerCommitSeqNo earliestOutConflictCommit;	/* when committed with
 													 * conflict out */
 		SerCommitSeqNo lastCommitBeforeSnapshot;	/* when not committed or
 													 * no conflict out */
 	}			SeqNo;
 	SHM_QUEUE	outConflicts;	/* list of write transactions whose data we
 								 * couldn't read. */
 	SHM_QUEUE	inConflicts;	/* list of read transactions which couldn't
 								 * see our write. */
 	SHM_QUEUE	predicateLocks; /* list of associated PREDICATELOCK objects */
 	SHM_QUEUE	finishedLink;	/* list link in
 								 * FinishedSerializableTransactions */

 	/*
 	 * perXactPredicateListLock is only used in parallel queries: it protects
 	 * this SERIALIZABLEXACT's predicate lock list against other workers of
 	 * the same session.
 	 */
 	LWLock		perXactPredicateListLock;

 	/*
 	 * for r/o transactions: list of concurrent r/w transactions that we could
 	 * potentially have conflicts with, and vice versa for r/w transactions
 	 */
 	SHM_QUEUE	possibleUnsafeConflicts;

 	TransactionId topXid;		/* top level xid for the transaction, if one
 								 * exists; else invalid */
 	TransactionId finishedBefore;	/* invalid means still running; else the
 									 * struct expires when no serializable
 									 * xids are before this. */
 	TransactionId xmin;			/* the transaction's snapshot xmin */
 	uint32		flags;			/* OR'd combination of values defined below */
 	int			pid;			/* pid of associated process */
 } SERIALIZABLEXACT;

 #define SXACT_FLAG_COMMITTED			0x00000001	/* already committed */
 #define SXACT_FLAG_PREPARED				0x00000002	/* about to commit */
 #define SXACT_FLAG_ROLLED_BACK			0x00000004	/* already rolled back */
 #define SXACT_FLAG_DOOMED				0x00000008	/* will roll back */
 /*
  * The following flag actually means that the flagged transaction has a
  * conflict out *to a transaction which committed ahead of it*.  It's hard
  * to get that into a name of a reasonable length.
  */
 #define SXACT_FLAG_CONFLICT_OUT			0x00000010
 #define SXACT_FLAG_READ_ONLY			0x00000020
 #define SXACT_FLAG_DEFERRABLE_WAITING	0x00000040
 #define SXACT_FLAG_RO_SAFE				0x00000080
 #define SXACT_FLAG_RO_UNSAFE			0x00000100
 #define SXACT_FLAG_SUMMARY_CONFLICT_IN	0x00000200
 #define SXACT_FLAG_SUMMARY_CONFLICT_OUT 0x00000400
 /*
  * The following flag means the transaction has been partially released
  * already, but is being preserved because parallel workers might have a
  * reference to it.  It'll be recycled by the leader at end-of-transaction.
  */
 #define SXACT_FLAG_PARTIALLY_RELEASED	0x00000800

 /*
  * The following types are used to provide an ad hoc list for holding
  * SERIALIZABLEXACT objects.  An HTAB is overkill, since there is no need to
  * access these by key -- there are direct pointers to these objects where
  * needed.  If a shared memory list is created, these types can probably be
  * eliminated in favor of using the general solution.
  */
 typedef struct PredXactListElementData
 {
 	SHM_QUEUE	link;
 	SERIALIZABLEXACT sxact;
 }			PredXactListElementData;

 typedef struct PredXactListElementData *PredXactListElement;

 #define PredXactListElementDataSize \
 		((Size)MAXALIGN(sizeof(PredXactListElementData)))

 typedef struct PredXactListData
 {
 	SHM_QUEUE	availableList;
 	SHM_QUEUE	activeList;

 	/*
 	 * These global variables are maintained when registering and cleaning up
 	 * serializable transactions.  They must be global across all backends,
 	 * but are not needed outside the predicate.c source file. Protected by
 	 * SerializableXactHashLock.
 	 */
 	TransactionId SxactGlobalXmin;	/* global xmin for active serializable
 									 * transactions */
 	int			SxactGlobalXminCount;	/* how many active serializable
 										 * transactions have this xmin */
 	int			WritableSxactCount; /* how many non-read-only serializable
 									 * transactions are active */
 	SerCommitSeqNo LastSxactCommitSeqNo;	/* a strictly monotonically
 											 * increasing number for commits
 											 * of serializable transactions */
 	/* Protected by SerializableXactHashLock. */
 	SerCommitSeqNo CanPartialClearThrough;	/* can clear predicate locks and
 											 * inConflicts for committed
 											 * transactions through this seq
 											 * no */
 	/* Protected by SerializableFinishedListLock. */
 	SerCommitSeqNo HavePartialClearedThrough;	/* have cleared through this
 												 * seq no */
 	SERIALIZABLEXACT *OldCommittedSxact;	/* shared copy of dummy sxact */

 	PredXactListElement element;
 }			PredXactListData;

 typedef struct PredXactListData *PredXactList;

 #define PredXactListDataSize \
 		((Size)MAXALIGN(sizeof(PredXactListData)))


 /*
  * The following types are used to provide lists of rw-conflicts between
  * pairs of transactions.  Since exactly the same information is needed,
  * they are also used to record possible unsafe transaction relationships
  * for purposes of identifying safe snapshots for read-only transactions.
  *
  * When a RWConflictData is not in use to record either type of relationship
  * between a pair of transactions, it is kept on an "available" list.  The
  * outLink field is used for maintaining that list.
  */
 typedef struct RWConflictData
 {
 	SHM_QUEUE	outLink;		/* link for list of conflicts out from a sxact */
 	SHM_QUEUE	inLink;			/* link for list of conflicts in to a sxact */
 	SERIALIZABLEXACT *sxactOut;
 	SERIALIZABLEXACT *sxactIn;
 }			RWConflictData;

 typedef struct RWConflictData *RWConflict;

 #define RWConflictDataSize \
 		((Size)MAXALIGN(sizeof(RWConflictData)))

 typedef struct RWConflictPoolHeaderData
 {
 	SHM_QUEUE	availableList;
 	RWConflict	element;
 }			RWConflictPoolHeaderData;

 typedef struct RWConflictPoolHeaderData *RWConflictPoolHeader;

 #define RWConflictPoolHeaderDataSize \
 		((Size)MAXALIGN(sizeof(RWConflictPoolHeaderData)))


 /*
  * The SERIALIZABLEXIDTAG struct identifies an xid assigned to a serializable
  * transaction or any of its subtransactions.
  */
 typedef struct SERIALIZABLEXIDTAG
 {
 	TransactionId xid;
 } SERIALIZABLEXIDTAG;

 /*
  * The SERIALIZABLEXID struct provides a link from a TransactionId for a
  * serializable transaction to the related SERIALIZABLEXACT record, even if
  * the transaction has completed and its connection has been closed.
  *
  * These are created as new top level transaction IDs are first assigned to
  * transactions which are participating in predicate locking.  This may
  * never happen for a particular transaction if it doesn't write anything.
  * They are removed with their related serializable transaction objects.
  *
  * The SubTransGetTopmostTransaction method is used where necessary to get
  * from an XID which might be from a subtransaction to the top level XID.
  */
 typedef struct SERIALIZABLEXID
 {
 	/* hash key */
 	SERIALIZABLEXIDTAG tag;

 	/* data */
 	SERIALIZABLEXACT *myXact;	/* pointer to the top level transaction data */
 } SERIALIZABLEXID;


 /*
  * The PREDICATELOCKTARGETTAG struct identifies a database object which can
  * be the target of predicate locks.
  *
  * Note that the hash function being used doesn't properly respect tag
  * length -- if the length of the structure isn't a multiple of four bytes it
  * will go to a four byte boundary past the end of the tag.  If you change
  * this struct, make sure any slack space is initialized, so that any random
  * bytes in the middle or at the end are not included in the hash.
  *
  * TODO SSI: If we always use the same fields for the same type of value, we
  * should rename these.  Holding off until it's clear there are no exceptions.
  * Since indexes are relations with blocks and tuples, it's looking likely that
  * the rename will be possible.  If not, we may need to divide the last field
  * and use part of it for a target type, so that we know how to interpret the
  * data..
  */
 typedef struct PREDICATELOCKTARGETTAG
 {
 	uint32		locktag_field1; /* a 32-bit ID field */
 	uint32		locktag_field2; /* a 32-bit ID field */
 	uint32		locktag_field3; /* a 32-bit ID field */
 	uint32		locktag_field4; /* a 32-bit ID field */
 } PREDICATELOCKTARGETTAG;

 /*
  * The PREDICATELOCKTARGET struct represents a database object on which there
  * are predicate locks.
  *
  * A hash list of these objects is maintained in shared memory.  An entry is
  * added when a predicate lock is requested on an object which doesn't
  * already have one.  An entry is removed when the last lock is removed from
  * its list.
  */
 typedef struct PREDICATELOCKTARGET
 {
 	/* hash key */
 	PREDICATELOCKTARGETTAG tag; /* unique identifier of lockable object */

 	/* data */
 	SHM_QUEUE	predicateLocks; /* list of PREDICATELOCK objects assoc. with
 								 * predicate lock target */
 } PREDICATELOCKTARGET;


 /*
  * The PREDICATELOCKTAG struct identifies an individual predicate lock.
  *
  * It is the combination of predicate lock target (which is a lockable
  * object) and a serializable transaction which has acquired a lock on that
  * target.
  */
 typedef struct PREDICATELOCKTAG
 {
 	PREDICATELOCKTARGET *myTarget;
 	SERIALIZABLEXACT *myXact;
 } PREDICATELOCKTAG;

 /*
  * The PREDICATELOCK struct represents an individual lock.
  *
  * An entry can be created here when the related database object is read, or
  * by promotion of multiple finer-grained targets.  All entries related to a
  * serializable transaction are removed when that serializable transaction is
  * cleaned up.  Entries can also be removed when they are combined into a
  * single coarser-grained lock entry.
  */
 typedef struct PREDICATELOCK
 {
 	/* hash key */
 	PREDICATELOCKTAG tag;		/* unique identifier of lock */

 	/* data */
 	SHM_QUEUE	targetLink;		/* list link in PREDICATELOCKTARGET's list of
 								 * predicate locks */
 	SHM_QUEUE	xactLink;		/* list link in SERIALIZABLEXACT's list of
 								 * predicate locks */
 	SerCommitSeqNo commitSeqNo; /* only used for summarized predicate locks */
 } PREDICATELOCK;


 /*
  * The LOCALPREDICATELOCK struct represents a local copy of data which is
  * also present in the PREDICATELOCK table, organized for fast access without
  * needing to acquire a LWLock.  It is strictly for optimization.
  *
  * Each serializable transaction creates its own local hash table to hold a
  * collection of these.  This information is used to determine when a number
  * of fine-grained locks should be promoted to a single coarser-grained lock.
  * The information is maintained more-or-less in parallel to the
  * PREDICATELOCK data, but because this data is not protected by locks and is
  * only used in an optimization heuristic, it is allowed to drift in a few
  * corner cases where maintaining exact data would be expensive.
  *
  * The hash table is created when the serializable transaction acquires its
  * snapshot, and its memory is released upon completion of the transaction.
  */
 typedef struct LOCALPREDICATELOCK
 {
 	/* hash key */
 	PREDICATELOCKTARGETTAG tag; /* unique identifier of lockable object */

 	/* data */
 	bool		held;			/* is lock held, or just its children?	*/
 	int			childLocks;		/* number of child locks currently held */
 } LOCALPREDICATELOCK;


 /*
  * The types of predicate locks which can be acquired.
  */
 typedef enum PredicateLockTargetType
 {
 	PREDLOCKTAG_RELATION,
 	PREDLOCKTAG_PAGE,
 	PREDLOCKTAG_TUPLE
 	/* TODO SSI: Other types may be needed for index locking */
 } PredicateLockTargetType;


 /*
  * This structure is used to quickly capture a copy of all predicate
  * locks.  This is currently used only by the pg_lock_status function,
  * which in turn is used by the pg_locks view.
  */
 typedef struct PredicateLockData
 {
 	int			nelements;
 	PREDICATELOCKTARGETTAG *locktags;
 	SERIALIZABLEXACT *xacts;
 } PredicateLockData;


 /*
  * These macros define how we map logical IDs of lockable objects into the
  * physical fields of PREDICATELOCKTARGETTAG.   Use these to set up values,
  * rather than accessing the fields directly.  Note multiple eval of target!
  */
 #define SET_PREDICATELOCKTARGETTAG_RELATION(locktag,dboid,reloid) \
 	((locktag).locktag_field1 = (dboid), \
 	 (locktag).locktag_field2 = (reloid), \
 	 (locktag).locktag_field3 = InvalidBlockNumber, \
 	 (locktag).locktag_field4 = InvalidOffsetNumber)

 #define SET_PREDICATELOCKTARGETTAG_PAGE(locktag,dboid,reloid,blocknum) \
 	((locktag).locktag_field1 = (dboid), \
 	 (locktag).locktag_field2 = (reloid), \
 	 (locktag).locktag_field3 = (blocknum), \
 	 (locktag).locktag_field4 = InvalidOffsetNumber)

 #define SET_PREDICATELOCKTARGETTAG_TUPLE(locktag,dboid,reloid,blocknum,offnum) \
 	((locktag).locktag_field1 = (dboid), \
 	 (locktag).locktag_field2 = (reloid), \
 	 (locktag).locktag_field3 = (blocknum), \
 	 (locktag).locktag_field4 = (offnum))

 #define GET_PREDICATELOCKTARGETTAG_DB(locktag) \
 	((Oid) (locktag).locktag_field1)
 #define GET_PREDICATELOCKTARGETTAG_RELATION(locktag) \
 	((Oid) (locktag).locktag_field2)
 #define GET_PREDICATELOCKTARGETTAG_PAGE(locktag) \
 	((BlockNumber) (locktag).locktag_field3)
 #define GET_PREDICATELOCKTARGETTAG_OFFSET(locktag) \
 	((OffsetNumber) (locktag).locktag_field4)
 #define GET_PREDICATELOCKTARGETTAG_TYPE(locktag)							 \
 	(((locktag).locktag_field4 != InvalidOffsetNumber) ? PREDLOCKTAG_TUPLE : \
 	 (((locktag).locktag_field3 != InvalidBlockNumber) ? PREDLOCKTAG_PAGE :   \
 	  PREDLOCKTAG_RELATION))

 /*
  * Two-phase commit statefile records. There are two types: for each
  * transaction, we generate one per-transaction record and a variable
  * number of per-predicate-lock records.
  */
 typedef enum TwoPhasePredicateRecordType
 {
 	TWOPHASEPREDICATERECORD_XACT,
 	TWOPHASEPREDICATERECORD_LOCK
 } TwoPhasePredicateRecordType;

 /*
  * Per-transaction information to reconstruct a SERIALIZABLEXACT. Not
  * much is needed because most of it not meaningful for a recovered
  * prepared transaction.
  *
  * In particular, we do not record the in and out conflict lists for a
  * prepared transaction because the associated SERIALIZABLEXACTs will
  * not be available after recovery. Instead, we simply record the
  * existence of each type of conflict by setting the transaction's
  * summary conflict in/out flag.
  */
 typedef struct TwoPhasePredicateXactRecord
 {
 	TransactionId xmin;
 	uint32		flags;
 } TwoPhasePredicateXactRecord;

 /* Per-lock state */
 typedef struct TwoPhasePredicateLockRecord
 {
 	PREDICATELOCKTARGETTAG target;
 	uint32		filler;			/* to avoid length change in back-patched fix */
 } TwoPhasePredicateLockRecord;

 typedef struct TwoPhasePredicateRecord
 {
 	TwoPhasePredicateRecordType type;
 	union
 	{
 		TwoPhasePredicateXactRecord xactRecord;
 		TwoPhasePredicateLockRecord lockRecord;
 	}			data;
 } TwoPhasePredicateRecord;

 /*
  * Define a macro to use for an "empty" SERIALIZABLEXACT reference.
  */
 #define InvalidSerializableXact ((SERIALIZABLEXACT *) NULL)


 /*
  * Function definitions for functions needing awareness of predicate
  * locking internals.
  */
 extern PredicateLockData *GetPredicateLockStatusData(void);
 extern int	GetSafeSnapshotBlockingPids(int blocked_pid,
 										int *output, int output_size);

 #endif							/* PREDICATE_INTERNALS_H */
	/*-------------------------------------------------------------------------
	*
	* predicate_internals.h
	* POSTGRES internal predicate locking definitions.
	*
	*
	* Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
	* Portions Copyright (c) 1994, Regents of the University of California
	*
	* src/include/storage/predicate_internals.h
	*
	*-------------------------------------------------------------------------
	*/
	#ifndef PREDICATE_INTERNALS_H
	#define PREDICATE_INTERNALS_H

	#include "storage/lock.h"
	#include "storage/lwlock.h"

	/*
	* Commit number.
	*/
	typedef uint64 SerCommitSeqNo;

	/*
	* Reserved commit sequence numbers:
	* - 0 is reserved to indicate a non-existent SLRU entry; it cannot be
	* used as a SerCommitSeqNo, even an invalid one
	* - InvalidSerCommitSeqNo is used to indicate a transaction that
	* hasn't committed yet, so use a number greater than all valid
	* ones to make comparison do the expected thing
	* - RecoverySerCommitSeqNo is used to refer to transactions that
	* happened before a crash/recovery, since we restart the sequence
	* at that point. It's earlier than all normal sequence numbers,
	* and is only used by recovered prepared transactions
	*/
	#define InvalidSerCommitSeqNo ((SerCommitSeqNo) PG_UINT64_MAX)
	#define RecoverySerCommitSeqNo ((SerCommitSeqNo) 1)
	#define FirstNormalSerCommitSeqNo ((SerCommitSeqNo) 2)

	/*
	* The SERIALIZABLEXACT struct contains information needed for each
	* serializable database transaction to support SSI techniques.
	*
	* A home-grown list is maintained in shared memory to manage these.
	* An entry is used when the serializable transaction acquires a snapshot.
	* Unless the transaction is rolled back, this entry must generally remain
	* until all concurrent transactions have completed. (There are special
	* optimizations for READ ONLY transactions which often allow them to be
	* cleaned up earlier.) A transaction which is rolled back is cleaned up
	* as soon as possible.
	*
	* Eligibility for cleanup of committed transactions is generally determined
	* by comparing the transaction's finishedBefore field to
	* SxactGlobalXmin.
	*/
	typedef struct SERIALIZABLEXACT
	{
	VirtualTransactionId vxid; /* The executing process always has one of
	* these. */

	/*
	* We use two numbers to track the order that transactions commit. Before
	* commit, a transaction is marked as prepared, and prepareSeqNo is set.
	* Shortly after commit, it's marked as committed, and commitSeqNo is set.
	* This doesn't give a strict commit order, but these two values together
	* are good enough for us, as we can always err on the safe side and
	* assume that there's a conflict, if we can't be sure of the exact
	* ordering of two commits.
	*
	* Note that a transaction is marked as prepared for a short period during
	* commit processing, even if two-phase commit is not used. But with
	* two-phase commit, a transaction can stay in prepared state for some
	* time.
	*/
	SerCommitSeqNo prepareSeqNo;
	SerCommitSeqNo commitSeqNo;

	/* these values are not both interesting at the same time */
	union
	{
	SerCommitSeqNo earliestOutConflictCommit; /* when committed with
	* conflict out */
	SerCommitSeqNo lastCommitBeforeSnapshot; /* when not committed or
	* no conflict out */
	} SeqNo;
	SHM_QUEUE outConflicts; /* list of write transactions whose data we
	* couldn't read. */
	SHM_QUEUE inConflicts; /* list of read transactions which couldn't
	* see our write. */
	SHM_QUEUE predicateLocks; /* list of associated PREDICATELOCK objects */
	SHM_QUEUE finishedLink; /* list link in
	* FinishedSerializableTransactions */

	/*
	* perXactPredicateListLock is only used in parallel queries: it protects
	* this SERIALIZABLEXACT's predicate lock list against other workers of
	* the same session.
	*/
	LWLock perXactPredicateListLock;

	/*
	* for r/o transactions: list of concurrent r/w transactions that we could
	* potentially have conflicts with, and vice versa for r/w transactions
	*/
	SHM_QUEUE possibleUnsafeConflicts;

	TransactionId topXid; /* top level xid for the transaction, if one
	* exists; else invalid */
	TransactionId finishedBefore; /* invalid means still running; else the
	* struct expires when no serializable
	* xids are before this. */
	TransactionId xmin; /* the transaction's snapshot xmin */
	uint32 flags; /* OR'd combination of values defined below */
	int pid; /* pid of associated process */
	} SERIALIZABLEXACT;

	#define SXACT_FLAG_COMMITTED 0x00000001 /* already committed */
	#define SXACT_FLAG_PREPARED 0x00000002 /* about to commit */
	#define SXACT_FLAG_ROLLED_BACK 0x00000004 /* already rolled back */
	#define SXACT_FLAG_DOOMED 0x00000008 /* will roll back */
	/*
	* The following flag actually means that the flagged transaction has a
	* conflict out to a transaction which committed ahead of it. It's hard
	* to get that into a name of a reasonable length.
	*/
	#define SXACT_FLAG_CONFLICT_OUT 0x00000010
	#define SXACT_FLAG_READ_ONLY 0x00000020
	#define SXACT_FLAG_DEFERRABLE_WAITING 0x00000040
	#define SXACT_FLAG_RO_SAFE 0x00000080
	#define SXACT_FLAG_RO_UNSAFE 0x00000100
	#define SXACT_FLAG_SUMMARY_CONFLICT_IN 0x00000200
	#define SXACT_FLAG_SUMMARY_CONFLICT_OUT 0x00000400
	/*
	* The following flag means the transaction has been partially released
	* already, but is being preserved because parallel workers might have a
	* reference to it. It'll be recycled by the leader at end-of-transaction.
	*/
	#define SXACT_FLAG_PARTIALLY_RELEASED 0x00000800

	/*
	* The following types are used to provide an ad hoc list for holding
	* SERIALIZABLEXACT objects. An HTAB is overkill, since there is no need to
	* access these by key -- there are direct pointers to these objects where
	* needed. If a shared memory list is created, these types can probably be
	* eliminated in favor of using the general solution.
	*/
	typedef struct PredXactListElementData
	{
	SHM_QUEUE link;
	SERIALIZABLEXACT sxact;
	} PredXactListElementData;

	typedef struct PredXactListElementData *PredXactListElement;

	#define PredXactListElementDataSize \
	((Size)MAXALIGN(sizeof(PredXactListElementData)))

	typedef struct PredXactListData
	{
	SHM_QUEUE availableList;
	SHM_QUEUE activeList;

	/*
	* These global variables are maintained when registering and cleaning up
	* serializable transactions. They must be global across all backends,
	* but are not needed outside the predicate.c source file. Protected by
	* SerializableXactHashLock.
	*/
	TransactionId SxactGlobalXmin; /* global xmin for active serializable
	* transactions */
	int SxactGlobalXminCount; /* how many active serializable
	* transactions have this xmin */
	int WritableSxactCount; /* how many non-read-only serializable
	* transactions are active */
	SerCommitSeqNo LastSxactCommitSeqNo; /* a strictly monotonically
	* increasing number for commits
	* of serializable transactions */
	/* Protected by SerializableXactHashLock. */
	SerCommitSeqNo CanPartialClearThrough; /* can clear predicate locks and
	* inConflicts for committed
	* transactions through this seq
	* no */
	/* Protected by SerializableFinishedListLock. */
	SerCommitSeqNo HavePartialClearedThrough; /* have cleared through this
	* seq no */
	SERIALIZABLEXACT OldCommittedSxact; / shared copy of dummy sxact */

	PredXactListElement element;
	} PredXactListData;

	typedef struct PredXactListData *PredXactList;

	#define PredXactListDataSize \
	((Size)MAXALIGN(sizeof(PredXactListData)))


	/*
	* The following types are used to provide lists of rw-conflicts between
	* pairs of transactions. Since exactly the same information is needed,
	* they are also used to record possible unsafe transaction relationships
	* for purposes of identifying safe snapshots for read-only transactions.
	*
	* When a RWConflictData is not in use to record either type of relationship
	* between a pair of transactions, it is kept on an "available" list. The
	* outLink field is used for maintaining that list.
	*/
	typedef struct RWConflictData
	{
	SHM_QUEUE outLink; /* link for list of conflicts out from a sxact */
	SHM_QUEUE inLink; /* link for list of conflicts in to a sxact */
	SERIALIZABLEXACT *sxactOut;
	SERIALIZABLEXACT *sxactIn;
	} RWConflictData;

	typedef struct RWConflictData *RWConflict;

	#define RWConflictDataSize \
	((Size)MAXALIGN(sizeof(RWConflictData)))

	typedef struct RWConflictPoolHeaderData
	{
	SHM_QUEUE availableList;
	RWConflict element;
	} RWConflictPoolHeaderData;

	typedef struct RWConflictPoolHeaderData *RWConflictPoolHeader;

	#define RWConflictPoolHeaderDataSize \
	((Size)MAXALIGN(sizeof(RWConflictPoolHeaderData)))


	/*
	* The SERIALIZABLEXIDTAG struct identifies an xid assigned to a serializable
	* transaction or any of its subtransactions.
	*/
	typedef struct SERIALIZABLEXIDTAG
	{
	TransactionId xid;
	} SERIALIZABLEXIDTAG;

	/*
	* The SERIALIZABLEXID struct provides a link from a TransactionId for a
	* serializable transaction to the related SERIALIZABLEXACT record, even if
	* the transaction has completed and its connection has been closed.
	*
	* These are created as new top level transaction IDs are first assigned to
	* transactions which are participating in predicate locking. This may
	* never happen for a particular transaction if it doesn't write anything.
	* They are removed with their related serializable transaction objects.
	*
	* The SubTransGetTopmostTransaction method is used where necessary to get
	* from an XID which might be from a subtransaction to the top level XID.
	*/
	typedef struct SERIALIZABLEXID
	{
	/* hash key */
	SERIALIZABLEXIDTAG tag;

	/* data */
	SERIALIZABLEXACT myXact; / pointer to the top level transaction data */
	} SERIALIZABLEXID;


	/*
	* The PREDICATELOCKTARGETTAG struct identifies a database object which can
	* be the target of predicate locks.
	*
	* Note that the hash function being used doesn't properly respect tag
	* length -- if the length of the structure isn't a multiple of four bytes it
	* will go to a four byte boundary past the end of the tag. If you change
	* this struct, make sure any slack space is initialized, so that any random
	* bytes in the middle or at the end are not included in the hash.
	*
	* TODO SSI: If we always use the same fields for the same type of value, we
	* should rename these. Holding off until it's clear there are no exceptions.
	* Since indexes are relations with blocks and tuples, it's looking likely that
	* the rename will be possible. If not, we may need to divide the last field
	* and use part of it for a target type, so that we know how to interpret the
	* data..
	*/
	typedef struct PREDICATELOCKTARGETTAG
	{
	uint32 locktag_field1; /* a 32-bit ID field */
	uint32 locktag_field2; /* a 32-bit ID field */
	uint32 locktag_field3; /* a 32-bit ID field */
	uint32 locktag_field4; /* a 32-bit ID field */
	} PREDICATELOCKTARGETTAG;

	/*
	* The PREDICATELOCKTARGET struct represents a database object on which there
	* are predicate locks.
	*
	* A hash list of these objects is maintained in shared memory. An entry is
	* added when a predicate lock is requested on an object which doesn't
	* already have one. An entry is removed when the last lock is removed from
	* its list.
	*/
	typedef struct PREDICATELOCKTARGET
	{
	/* hash key */
	PREDICATELOCKTARGETTAG tag; /* unique identifier of lockable object */

	/* data */
	SHM_QUEUE predicateLocks; /* list of PREDICATELOCK objects assoc. with
	* predicate lock target */
	} PREDICATELOCKTARGET;


	/*
	* The PREDICATELOCKTAG struct identifies an individual predicate lock.
	*
	* It is the combination of predicate lock target (which is a lockable
	* object) and a serializable transaction which has acquired a lock on that
	* target.
	*/
	typedef struct PREDICATELOCKTAG
	{
	PREDICATELOCKTARGET *myTarget;
	SERIALIZABLEXACT *myXact;
	} PREDICATELOCKTAG;

	/*
	* The PREDICATELOCK struct represents an individual lock.
	*
	* An entry can be created here when the related database object is read, or
	* by promotion of multiple finer-grained targets. All entries related to a
	* serializable transaction are removed when that serializable transaction is
	* cleaned up. Entries can also be removed when they are combined into a
	* single coarser-grained lock entry.
	*/
	typedef struct PREDICATELOCK
	{
	/* hash key */
	PREDICATELOCKTAG tag; /* unique identifier of lock */

	/* data */
	SHM_QUEUE targetLink; /* list link in PREDICATELOCKTARGET's list of
	* predicate locks */
	SHM_QUEUE xactLink; /* list link in SERIALIZABLEXACT's list of
	* predicate locks */
	SerCommitSeqNo commitSeqNo; /* only used for summarized predicate locks */
	} PREDICATELOCK;


	/*
	* The LOCALPREDICATELOCK struct represents a local copy of data which is
	* also present in the PREDICATELOCK table, organized for fast access without
	* needing to acquire a LWLock. It is strictly for optimization.
	*
	* Each serializable transaction creates its own local hash table to hold a
	* collection of these. This information is used to determine when a number
	* of fine-grained locks should be promoted to a single coarser-grained lock.
	* The information is maintained more-or-less in parallel to the
	* PREDICATELOCK data, but because this data is not protected by locks and is
	* only used in an optimization heuristic, it is allowed to drift in a few
	* corner cases where maintaining exact data would be expensive.
	*
	* The hash table is created when the serializable transaction acquires its
	* snapshot, and its memory is released upon completion of the transaction.
	*/
	typedef struct LOCALPREDICATELOCK
	{
	/* hash key */
	PREDICATELOCKTARGETTAG tag; /* unique identifier of lockable object */

	/* data */
	bool held; /* is lock held, or just its children? */
	int childLocks; /* number of child locks currently held */
	} LOCALPREDICATELOCK;


	/*
	* The types of predicate locks which can be acquired.
	*/
	typedef enum PredicateLockTargetType
	{
	PREDLOCKTAG_RELATION,
	PREDLOCKTAG_PAGE,
	PREDLOCKTAG_TUPLE
	/* TODO SSI: Other types may be needed for index locking */
	} PredicateLockTargetType;


	/*
	* This structure is used to quickly capture a copy of all predicate
	* locks. This is currently used only by the pg_lock_status function,
	* which in turn is used by the pg_locks view.
	*/
	typedef struct PredicateLockData
	{
	int nelements;
	PREDICATELOCKTARGETTAG *locktags;
	SERIALIZABLEXACT *xacts;
	} PredicateLockData;


	/*
	* These macros define how we map logical IDs of lockable objects into the
	* physical fields of PREDICATELOCKTARGETTAG. Use these to set up values,
	* rather than accessing the fields directly. Note multiple eval of target!
	*/
	#define SET_PREDICATELOCKTARGETTAG_RELATION(locktag,dboid,reloid) \
	((locktag).locktag_field1 = (dboid), \
	(locktag).locktag_field2 = (reloid), \
	(locktag).locktag_field3 = InvalidBlockNumber, \
	(locktag).locktag_field4 = InvalidOffsetNumber)

	#define SET_PREDICATELOCKTARGETTAG_PAGE(locktag,dboid,reloid,blocknum) \
	((locktag).locktag_field1 = (dboid), \
	(locktag).locktag_field2 = (reloid), \
	(locktag).locktag_field3 = (blocknum), \
	(locktag).locktag_field4 = InvalidOffsetNumber)

	#define SET_PREDICATELOCKTARGETTAG_TUPLE(locktag,dboid,reloid,blocknum,offnum) \
	((locktag).locktag_field1 = (dboid), \
	(locktag).locktag_field2 = (reloid), \
	(locktag).locktag_field3 = (blocknum), \
	(locktag).locktag_field4 = (offnum))

	#define GET_PREDICATELOCKTARGETTAG_DB(locktag) \
	((Oid) (locktag).locktag_field1)
	#define GET_PREDICATELOCKTARGETTAG_RELATION(locktag) \
	((Oid) (locktag).locktag_field2)
	#define GET_PREDICATELOCKTARGETTAG_PAGE(locktag) \
	((BlockNumber) (locktag).locktag_field3)
	#define GET_PREDICATELOCKTARGETTAG_OFFSET(locktag) \
	((OffsetNumber) (locktag).locktag_field4)
	#define GET_PREDICATELOCKTARGETTAG_TYPE(locktag) \
	(((locktag).locktag_field4 != InvalidOffsetNumber) ? PREDLOCKTAG_TUPLE : \
	(((locktag).locktag_field3 != InvalidBlockNumber) ? PREDLOCKTAG_PAGE : \
	PREDLOCKTAG_RELATION))

	/*
	* Two-phase commit statefile records. There are two types: for each
	* transaction, we generate one per-transaction record and a variable
	* number of per-predicate-lock records.
	*/
	typedef enum TwoPhasePredicateRecordType
	{
	TWOPHASEPREDICATERECORD_XACT,
	TWOPHASEPREDICATERECORD_LOCK
	} TwoPhasePredicateRecordType;

	/*
	* Per-transaction information to reconstruct a SERIALIZABLEXACT. Not
	* much is needed because most of it not meaningful for a recovered
	* prepared transaction.
	*
	* In particular, we do not record the in and out conflict lists for a
	* prepared transaction because the associated SERIALIZABLEXACTs will
	* not be available after recovery. Instead, we simply record the
	* existence of each type of conflict by setting the transaction's
	* summary conflict in/out flag.
	*/
	typedef struct TwoPhasePredicateXactRecord
	{
	TransactionId xmin;
	uint32 flags;
	} TwoPhasePredicateXactRecord;

	/* Per-lock state */
	typedef struct TwoPhasePredicateLockRecord
	{
	PREDICATELOCKTARGETTAG target;
	uint32 filler; /* to avoid length change in back-patched fix */
	} TwoPhasePredicateLockRecord;

	typedef struct TwoPhasePredicateRecord
	{
	TwoPhasePredicateRecordType type;
	union
	{
	TwoPhasePredicateXactRecord xactRecord;
	TwoPhasePredicateLockRecord lockRecord;
	} data;
	} TwoPhasePredicateRecord;

	/*
	* Define a macro to use for an "empty" SERIALIZABLEXACT reference.
	*/
	#define InvalidSerializableXact ((SERIALIZABLEXACT *) NULL)


	/*
	* Function definitions for functions needing awareness of predicate
	* locking internals.
	*/
	extern PredicateLockData *GetPredicateLockStatusData(void);
	extern int GetSafeSnapshotBlockingPids(int blocked_pid,
	int *output, int output_size);

	#endif /* PREDICATE_INTERNALS_H */