| /*------------------------------------------------------------------------- |
| * |
| * shmem.c |
| * create shared memory and initialize shared memory data structures. |
| * |
| * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group |
| * Portions Copyright (c) 1994, Regents of the University of California |
| * |
| * |
| * IDENTIFICATION |
| * src/backend/storage/ipc/shmem.c |
| * |
| *------------------------------------------------------------------------- |
| */ |
| /* |
| * POSTGRES processes share one or more regions of shared memory. |
| * The shared memory is created by a postmaster and is inherited |
| * by each backend via fork() (or, in some ports, via other OS-specific |
| * methods). The routines in this file are used for allocating and |
| * binding to shared memory data structures. |
| * |
| * NOTES: |
| * (a) There are three kinds of shared memory data structures |
| * available to POSTGRES: fixed-size structures, queues and hash |
| * tables. Fixed-size structures contain things like global variables |
| * for a module and should never be allocated after the shared memory |
| * initialization phase. Hash tables have a fixed maximum size, but |
| * their actual size can vary dynamically. When entries are added |
| * to the table, more space is allocated. Queues link data structures |
| * that have been allocated either within fixed-size structures or as hash |
| * buckets. Each shared data structure has a string name to identify |
| * it (assigned in the module that declares it). |
| * |
| * (b) During initialization, each module looks for its |
| * shared data structures in a hash table called the "Shmem Index". |
| * If the data structure is not present, the caller can allocate |
| * a new one and initialize it. If the data structure is present, |
| * the caller "attaches" to the structure by initializing a pointer |
| * in the local address space. |
| * The shmem index has two purposes: first, it gives us |
| * a simple model of how the world looks when a backend process |
| * initializes. If something is present in the shmem index, |
| * it is initialized. If it is not, it is uninitialized. Second, |
| * the shmem index allows us to allocate shared memory on demand |
| * instead of trying to preallocate structures and hard-wire the |
| * sizes and locations in header files. If you are using a lot |
| * of shared memory in a lot of different places (and changing |
| * things during development), this is important. |
| * |
| * (c) In standard Unix-ish environments, individual backends do not |
| * need to re-establish their local pointers into shared memory, because |
| * they inherit correct values of those variables via fork() from the |
| * postmaster. However, this does not work in the EXEC_BACKEND case. |
| * In ports using EXEC_BACKEND, new backends have to set up their local |
| * pointers using the method described in (b) above. |
| * |
| * (d) memory allocation model: shared memory can never be |
| * freed, once allocated. Each hash table has its own free list, |
| * so hash buckets can be reused when an item is deleted. However, |
| * if one hash table grows very large and then shrinks, its space |
| * cannot be redistributed to other tables. We could build a simple |
| * hash bucket garbage collector if need be. Right now, it seems |
| * unnecessary. |
| */ |
| |
| #include "postgres.h" |
| |
| #include <unistd.h> |
| |
| #include "access/transam.h" |
| #include "fmgr.h" |
| #include "funcapi.h" |
| #include "miscadmin.h" |
| #include "storage/lwlock.h" |
| #include "storage/pg_shmem.h" |
| #include "storage/shmem.h" |
| #include "storage/spin.h" |
| #include "utils/builtins.h" |
| |
| static void *ShmemAllocRaw(Size size, Size *allocated_size); |
| |
| /* shared memory global variables */ |
| |
| static PGShmemHeader *ShmemSegHdr; /* shared mem segment header */ |
| |
| static void *ShmemBase; /* start address of shared memory */ |
| |
| static void *ShmemEnd; /* end+1 address of shared memory */ |
| |
| slock_t *ShmemLock; /* spinlock for shared memory and LWLock |
| * allocation */ |
| |
| static HTAB *ShmemIndex = NULL; /* primary index hashtable for shmem */ |
| |
| static int ShmemSystemPageSize = 0; /* system's page size */ |
| |
| /* |
| * InitShmemAccess() --- set up basic pointers to shared memory. |
| * |
| * Note: the argument should be declared "PGShmemHeader *seghdr", |
| * but we use void to avoid having to include ipc.h in shmem.h. |
| */ |
| void |
| InitShmemAccess(void *seghdr) |
| { |
| PGShmemHeader *shmhdr = (PGShmemHeader *) seghdr; |
| |
| ShmemSegHdr = shmhdr; |
| ShmemBase = (void *) shmhdr; |
| ShmemEnd = (char *) ShmemBase + shmhdr->totalsize; |
| } |
| |
| /* |
| * InitShmemAllocation() --- set up shared-memory space allocation. |
| * |
| * This should be called only in the postmaster or a standalone backend. |
| */ |
| void |
| InitShmemAllocation(void) |
| { |
| PGShmemHeader *shmhdr = ShmemSegHdr; |
| char *aligned; |
| |
| Assert(shmhdr != NULL); |
| |
| #ifdef WIN32 |
| ShmemSystemPageSize = 4096; /* Need a way to get this on Win32 */ |
| #else |
| ShmemSystemPageSize = sysconf(_SC_PAGESIZE); |
| #endif |
| if ( ShmemSystemPageSize <= 1 || |
| (ShmemSystemPageSize & ( ShmemSystemPageSize - 1))) // checks for power of 2 |
| { |
| ereport(ERROR, |
| (errcode(ERRCODE_INTERNAL_ERROR), |
| errmsg("invalid page size %d; must be a power of two and not an error", ShmemSystemPageSize))); |
| } |
| /* |
| * Initialize the spinlock used by ShmemAlloc. We must use |
| * ShmemAllocUnlocked, since obviously ShmemAlloc can't be called yet. |
| */ |
| ShmemLock = (slock_t *) ShmemAllocUnlocked(sizeof(slock_t)); |
| |
| SpinLockInit(ShmemLock); |
| |
| /* |
| * Allocations after this point should go through ShmemAlloc, which |
| * expects to allocate everything on cache line boundaries. Make sure the |
| * first allocation begins on a cache line boundary. |
| */ |
| aligned = (char *) |
| (CACHELINEALIGN((((char *) shmhdr) + shmhdr->freeoffset))); |
| shmhdr->freeoffset = aligned - (char *) shmhdr; |
| |
| /* ShmemIndex can't be set up yet (need LWLocks first) */ |
| shmhdr->index = NULL; |
| ShmemIndex = (HTAB *) NULL; |
| |
| /* |
| * Initialize ShmemVariableCache for transaction manager. (This doesn't |
| * really belong here, but not worth moving.) |
| */ |
| ShmemVariableCache = (VariableCache) |
| ShmemAlloc(sizeof(*ShmemVariableCache)); |
| memset(ShmemVariableCache, 0, sizeof(*ShmemVariableCache)); |
| } |
| |
| /* |
| * ShmemAlloc -- allocate max-aligned chunk from shared memory |
| * |
| * Throws error if request cannot be satisfied. |
| * |
| * Assumes ShmemLock and ShmemSegHdr are initialized. |
| */ |
| void * |
| ShmemAlloc(Size size) |
| { |
| void *newSpace; |
| Size allocated_size; |
| |
| newSpace = ShmemAllocRaw(size, &allocated_size); |
| if (!newSpace) |
| ereport(ERROR, |
| (errcode(ERRCODE_OUT_OF_MEMORY), |
| errmsg("out of shared memory (%zu bytes requested)", |
| size))); |
| return newSpace; |
| } |
| |
| /* |
| * ShmemAllocNoError -- allocate max-aligned chunk from shared memory |
| * |
| * As ShmemAlloc, but returns NULL if out of space, rather than erroring. |
| */ |
| void * |
| ShmemAllocNoError(Size size) |
| { |
| Size allocated_size; |
| |
| return ShmemAllocRaw(size, &allocated_size); |
| } |
| |
| /* |
| * ShmemAllocRaw -- allocate align chunk and return allocated size |
| * |
| * Also sets *allocated_size to the number of bytes allocated, which will |
| * be equal to the number requested plus any padding we choose to add. |
| */ |
| static void * |
| ShmemAllocRaw(Size size, Size *allocated_size) |
| { |
| Size newStart; |
| Size newFree; |
| void *newSpace; |
| |
| /* |
| * Better to return NULL for this else caller could still use memory that |
| * does not belong to it. |
| */ |
| if (size == 0) |
| return NULL; |
| |
| /* |
| * Ensure all space is adequately aligned. We used to only MAXALIGN this |
| * space but experience has proved that on modern systems that is not good |
| * enough. Many parts of the system are very sensitive to critical data |
| * structures getting split across cache line boundaries. To avoid that, |
| * attempt to align the beginning of the allocation to a cache line |
| * boundary. The calling code will still need to be careful about how it |
| * uses the allocated space - e.g. by padding each element in an array of |
| * structures out to a power-of-two size - but without this, even that |
| * won't be sufficient. |
| */ |
| size = CACHELINEALIGN(size); |
| *allocated_size = size; |
| |
| Assert(ShmemSegHdr != NULL); |
| |
| SpinLockAcquire(ShmemLock); |
| |
| newStart = ShmemSegHdr->freeoffset; |
| |
| /* |
| * Extra alignment for large requests, since they are probably buffers. |
| * This is also needed for mprotect based shared buffer debugging |
| * (-DMPROTECT_BUFFERS). |
| */ |
| if (size >= BLCKSZ) |
| { |
| newStart = TYPEALIGN(ShmemSystemPageSize, newStart); |
| } |
| |
| newFree = newStart + size; |
| if (newFree <= ShmemSegHdr->totalsize) |
| { |
| newSpace = (void *) ((char *) ShmemBase + newStart); |
| ShmemSegHdr->freeoffset = newFree; |
| } |
| else |
| newSpace = NULL; |
| |
| SpinLockRelease(ShmemLock); |
| |
| /* note this assert is okay with newSpace == NULL */ |
| Assert(newSpace == (void *) CACHELINEALIGN(newSpace)); |
| |
| return newSpace; |
| } |
| |
| /* |
| * ShmemAllocUnlocked -- allocate max-aligned chunk from shared memory |
| * |
| * Allocate space without locking ShmemLock. This should be used for, |
| * and only for, allocations that must happen before ShmemLock is ready. |
| * |
| * We consider maxalign, rather than cachealign, sufficient here. |
| */ |
| void * |
| ShmemAllocUnlocked(Size size) |
| { |
| Size newStart; |
| Size newFree; |
| void *newSpace; |
| |
| /* |
| * Ensure allocated space is adequately aligned. |
| */ |
| size = MAXALIGN(size); |
| |
| Assert(ShmemSegHdr != NULL); |
| |
| newStart = ShmemSegHdr->freeoffset; |
| |
| newFree = newStart + size; |
| if (newFree > ShmemSegHdr->totalsize) |
| ereport(ERROR, |
| (errcode(ERRCODE_OUT_OF_MEMORY), |
| errmsg("out of shared memory (%zu bytes requested)", |
| size))); |
| ShmemSegHdr->freeoffset = newFree; |
| |
| newSpace = (void *) ((char *) ShmemBase + newStart); |
| |
| Assert(newSpace == (void *) MAXALIGN(newSpace)); |
| |
| return newSpace; |
| } |
| |
| /* |
| * ShmemAddrIsValid -- test if an address refers to shared memory |
| * |
| * Returns true if the pointer points within the shared memory segment. |
| */ |
| bool |
| ShmemAddrIsValid(const void *addr) |
| { |
| return (addr >= ShmemBase) && (addr < ShmemEnd); |
| } |
| |
| /* |
| * InitShmemIndex() --- set up or attach to shmem index table. |
| */ |
| void |
| InitShmemIndex(void) |
| { |
| HASHCTL info; |
| |
| /* |
| * Create the shared memory shmem index. |
| * |
| * Since ShmemInitHash calls ShmemInitStruct, which expects the ShmemIndex |
| * hashtable to exist already, we have a bit of a circularity problem in |
| * initializing the ShmemIndex itself. The special "ShmemIndex" hash |
| * table name will tell ShmemInitStruct to fake it. |
| */ |
| info.keysize = SHMEM_INDEX_KEYSIZE; |
| info.entrysize = sizeof(ShmemIndexEnt); |
| |
| ShmemIndex = ShmemInitHash("ShmemIndex", |
| SHMEM_INDEX_SIZE, SHMEM_INDEX_SIZE, |
| &info, |
| HASH_ELEM | HASH_STRINGS); |
| } |
| |
| /* |
| * ShmemInitHash -- Create and initialize, or attach to, a |
| * shared memory hash table. |
| * |
| * We assume caller is doing some kind of synchronization |
| * so that two processes don't try to create/initialize the same |
| * table at once. (In practice, all creations are done in the postmaster |
| * process; child processes should always be attaching to existing tables.) |
| * |
| * max_size is the estimated maximum number of hashtable entries. This is |
| * not a hard limit, but the access efficiency will degrade if it is |
| * exceeded substantially (since it's used to compute directory size and |
| * the hash table buckets will get overfull). |
| * |
| * init_size is the number of hashtable entries to preallocate. For a table |
| * whose maximum size is certain, this should be equal to max_size; that |
| * ensures that no run-time out-of-shared-memory failures can occur. |
| * |
| * *infoP and hash_flags must specify at least the entry sizes and key |
| * comparison semantics (see hash_create()). Flag bits and values specific |
| * to shared-memory hash tables are added here, except that callers may |
| * choose to specify HASH_PARTITION and/or HASH_FIXED_SIZE. |
| * |
| * Note: before Postgres 9.0, this function returned NULL for some failure |
| * cases. Now, it always throws error instead, so callers need not check |
| * for NULL. |
| */ |
| HTAB * |
| ShmemInitHash(const char *name, /* table string name for shmem index */ |
| long init_size, /* initial table size */ |
| long max_size, /* max size of the table */ |
| HASHCTL *infoP, /* info about key and bucket size */ |
| int hash_flags) /* info about infoP */ |
| { |
| bool found; |
| void *location; |
| |
| /* |
| * Hash tables allocated in shared memory have a fixed directory; it can't |
| * grow or other backends wouldn't be able to find it. So, make sure we |
| * make it big enough to start with. |
| * |
| * The shared memory allocator must be specified too. |
| */ |
| infoP->dsize = infoP->max_dsize = hash_select_dirsize(max_size); |
| infoP->alloc = ShmemAllocNoError; |
| hash_flags |= HASH_SHARED_MEM | HASH_ALLOC | HASH_DIRSIZE; |
| |
| /* look it up in the shmem index */ |
| location = ShmemInitStruct(name, |
| hash_get_shared_size(infoP, hash_flags), |
| &found); |
| |
| /* |
| * if it already exists, attach to it rather than allocate and initialize |
| * new space |
| */ |
| if (found) |
| hash_flags |= HASH_ATTACH; |
| |
| /* Pass location of hashtable header to hash_create */ |
| infoP->hctl = (HASHHDR *) location; |
| |
| return hash_create(name, init_size, infoP, hash_flags); |
| } |
| |
| /* |
| * ShmemInitStruct -- Create/attach to a structure in shared memory. |
| * |
| * This is called during initialization to find or allocate |
| * a data structure in shared memory. If no other process |
| * has created the structure, this routine allocates space |
| * for it. If it exists already, a pointer to the existing |
| * structure is returned. |
| * |
| * Returns: pointer to the object. *foundPtr is set true if the object was |
| * already in the shmem index (hence, already initialized). |
| * |
| * Note: before Postgres 9.0, this function returned NULL for some failure |
| * cases. Now, it always throws error instead, so callers need not check |
| * for NULL. |
| */ |
| void * |
| ShmemInitStruct(const char *name, Size size, bool *foundPtr) |
| { |
| ShmemIndexEnt *result; |
| void *structPtr; |
| |
| LWLockAcquire(ShmemIndexLock, LW_EXCLUSIVE); |
| |
| if (!ShmemIndex) |
| { |
| PGShmemHeader *shmemseghdr = ShmemSegHdr; |
| |
| /* Must be trying to create/attach to ShmemIndex itself */ |
| Assert(strcmp(name, "ShmemIndex") == 0); |
| |
| if (IsUnderPostmaster) |
| { |
| /* Must be initializing a (non-standalone) backend */ |
| Assert(shmemseghdr->index != NULL); |
| structPtr = shmemseghdr->index; |
| *foundPtr = true; |
| } |
| else |
| { |
| /* |
| * If the shmem index doesn't exist, we are bootstrapping: we must |
| * be trying to init the shmem index itself. |
| * |
| * Notice that the ShmemIndexLock is released before the shmem |
| * index has been initialized. This should be OK because no other |
| * process can be accessing shared memory yet. |
| */ |
| Assert(shmemseghdr->index == NULL); |
| structPtr = ShmemAlloc(size); |
| shmemseghdr->index = structPtr; |
| *foundPtr = false; |
| } |
| LWLockRelease(ShmemIndexLock); |
| return structPtr; |
| } |
| |
| Assert(strlen(name) < SHMEM_INDEX_KEYSIZE); |
| /* look it up in the shmem index */ |
| result = (ShmemIndexEnt *) |
| hash_search(ShmemIndex, name, HASH_ENTER_NULL, foundPtr); |
| |
| if (!result) |
| { |
| LWLockRelease(ShmemIndexLock); |
| ereport(ERROR, |
| (errcode(ERRCODE_OUT_OF_MEMORY), |
| errmsg("could not create ShmemIndex entry for data structure \"%s\"", |
| name))); |
| } |
| |
| if (*foundPtr) |
| { |
| /* |
| * Structure is in the shmem index so someone else has allocated it |
| * already. The size better be the same as the size we are trying to |
| * initialize to, or there is a name conflict (or worse). |
| */ |
| if (result->size != size) |
| { |
| LWLockRelease(ShmemIndexLock); |
| ereport(ERROR, |
| (errmsg("ShmemIndex entry size is wrong for data structure" |
| " \"%s\": expected %zu, actual %zu", |
| name, size, result->size))); |
| } |
| structPtr = result->location; |
| } |
| else |
| { |
| Size allocated_size; |
| |
| /* It isn't in the table yet. allocate and initialize it */ |
| structPtr = ShmemAllocRaw(size, &allocated_size); |
| if (structPtr == NULL) |
| { |
| /* out of memory; remove the failed ShmemIndex entry */ |
| hash_search(ShmemIndex, name, HASH_REMOVE, NULL); |
| LWLockRelease(ShmemIndexLock); |
| ereport(ERROR, |
| (errcode(ERRCODE_OUT_OF_MEMORY), |
| errmsg("not enough shared memory for data structure" |
| " \"%s\" (%zu bytes requested)", |
| name, size))); |
| } |
| result->size = size; |
| result->allocated_size = allocated_size; |
| result->location = structPtr; |
| } |
| |
| LWLockRelease(ShmemIndexLock); |
| |
| Assert(ShmemAddrIsValid(structPtr)); |
| |
| Assert(structPtr == (void *) CACHELINEALIGN(structPtr)); |
| |
| return structPtr; |
| } |
| |
| |
| /* |
| * Add two Size values, checking for overflow |
| */ |
| Size |
| add_size(Size s1, Size s2) |
| { |
| Size result; |
| |
| result = s1 + s2; |
| /* We are assuming Size is an unsigned type here... */ |
| if (result < s1 || result < s2) |
| ereport(ERROR, |
| (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), |
| errmsg("requested shared memory size overflows size_t"))); |
| return result; |
| } |
| |
| /* |
| * Multiply two Size values, checking for overflow |
| */ |
| Size |
| mul_size(Size s1, Size s2) |
| { |
| Size result; |
| |
| if (s1 == 0 || s2 == 0) |
| return 0; |
| result = s1 * s2; |
| /* We are assuming Size is an unsigned type here... */ |
| if (result / s2 != s1) |
| ereport(ERROR, |
| (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), |
| errmsg("requested shared memory size overflows size_t"))); |
| return result; |
| } |
| |
| /* SQL SRF showing allocated shared memory */ |
| Datum |
| pg_get_shmem_allocations(PG_FUNCTION_ARGS) |
| { |
| #define PG_GET_SHMEM_SIZES_COLS 4 |
| ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; |
| HASH_SEQ_STATUS hstat; |
| ShmemIndexEnt *ent; |
| Size named_allocated = 0; |
| Datum values[PG_GET_SHMEM_SIZES_COLS]; |
| bool nulls[PG_GET_SHMEM_SIZES_COLS]; |
| |
| InitMaterializedSRF(fcinfo, 0); |
| |
| LWLockAcquire(ShmemIndexLock, LW_SHARED); |
| |
| hash_seq_init(&hstat, ShmemIndex); |
| |
| /* output all allocated entries */ |
| memset(nulls, 0, sizeof(nulls)); |
| while ((ent = (ShmemIndexEnt *) hash_seq_search(&hstat)) != NULL) |
| { |
| values[0] = CStringGetTextDatum(ent->key); |
| values[1] = Int64GetDatum((char *) ent->location - (char *) ShmemSegHdr); |
| values[2] = Int64GetDatum(ent->size); |
| values[3] = Int64GetDatum(ent->allocated_size); |
| named_allocated += ent->allocated_size; |
| |
| tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, |
| values, nulls); |
| } |
| |
| /* output shared memory allocated but not counted via the shmem index */ |
| values[0] = CStringGetTextDatum("<anonymous>"); |
| nulls[1] = true; |
| values[2] = Int64GetDatum(ShmemSegHdr->freeoffset - named_allocated); |
| values[3] = values[2]; |
| tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls); |
| |
| /* output as-of-yet unused shared memory */ |
| nulls[0] = true; |
| values[1] = Int64GetDatum(ShmemSegHdr->freeoffset); |
| nulls[1] = false; |
| values[2] = Int64GetDatum(ShmemSegHdr->totalsize - ShmemSegHdr->freeoffset); |
| values[3] = values[2]; |
| tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls); |
| |
| LWLockRelease(ShmemIndexLock); |
| |
| return (Datum) 0; |
| } |