| /*------------------------------------------------------------------------- |
| * |
| * shm_toc.c |
| * shared memory segment table of contents |
| * |
| * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group |
| * Portions Copyright (c) 1994, Regents of the University of California |
| * |
| * src/backend/storage/ipc/shm_toc.c |
| * |
| *------------------------------------------------------------------------- |
| */ |
| |
| #include "postgres.h" |
| |
| #include "port/atomics.h" |
| #include "storage/shm_toc.h" |
| #include "storage/spin.h" |
| |
| typedef struct shm_toc_entry |
| { |
| uint64 key; /* Arbitrary identifier */ |
| Size offset; /* Offset, in bytes, from TOC start */ |
| } shm_toc_entry; |
| |
| struct shm_toc |
| { |
| uint64 toc_magic; /* Magic number identifying this TOC */ |
| slock_t toc_mutex; /* Spinlock for mutual exclusion */ |
| Size toc_total_bytes; /* Bytes managed by this TOC */ |
| Size toc_allocated_bytes; /* Bytes allocated of those managed */ |
| uint32 toc_nentry; /* Number of entries in TOC */ |
| shm_toc_entry toc_entry[FLEXIBLE_ARRAY_MEMBER]; |
| }; |
| |
| /* |
| * Initialize a region of shared memory with a table of contents. |
| */ |
| shm_toc * |
| shm_toc_create(uint64 magic, void *address, Size nbytes) |
| { |
| shm_toc *toc = (shm_toc *) address; |
| |
| Assert(nbytes > offsetof(shm_toc, toc_entry)); |
| toc->toc_magic = magic; |
| SpinLockInit(&toc->toc_mutex); |
| |
| /* |
| * The alignment code in shm_toc_allocate() assumes that the starting |
| * value is buffer-aligned. |
| */ |
| toc->toc_total_bytes = BUFFERALIGN_DOWN(nbytes); |
| toc->toc_allocated_bytes = 0; |
| toc->toc_nentry = 0; |
| |
| return toc; |
| } |
| |
| /* |
| * Attach to an existing table of contents. If the magic number found at |
| * the target address doesn't match our expectations, return NULL. |
| */ |
| shm_toc * |
| shm_toc_attach(uint64 magic, void *address) |
| { |
| shm_toc *toc = (shm_toc *) address; |
| |
| if (toc->toc_magic != magic) |
| return NULL; |
| |
| Assert(toc->toc_total_bytes >= toc->toc_allocated_bytes); |
| Assert(toc->toc_total_bytes > offsetof(shm_toc, toc_entry)); |
| |
| return toc; |
| } |
| |
| /* |
| * Allocate shared memory from a segment managed by a table of contents. |
| * |
| * This is not a full-blown allocator; there's no way to free memory. It's |
| * just a way of dividing a single physical shared memory segment into logical |
| * chunks that may be used for different purposes. |
| * |
| * We allocate backwards from the end of the segment, so that the TOC entries |
| * can grow forward from the start of the segment. |
| */ |
| void * |
| shm_toc_allocate(shm_toc *toc, Size nbytes) |
| { |
| volatile shm_toc *vtoc = toc; |
| Size total_bytes; |
| Size allocated_bytes; |
| Size nentry; |
| Size toc_bytes; |
| |
| /* |
| * Make sure request is well-aligned. XXX: MAXALIGN is not enough, |
| * because atomic ops might need a wider alignment. We don't have a |
| * proper definition for the minimum to make atomic ops safe, but |
| * BUFFERALIGN ought to be enough. |
| */ |
| nbytes = BUFFERALIGN(nbytes); |
| |
| SpinLockAcquire(&toc->toc_mutex); |
| |
| total_bytes = vtoc->toc_total_bytes; |
| allocated_bytes = vtoc->toc_allocated_bytes; |
| nentry = vtoc->toc_nentry; |
| toc_bytes = offsetof(shm_toc, toc_entry) + nentry * sizeof(shm_toc_entry) |
| + allocated_bytes; |
| |
| /* Check for memory exhaustion and overflow. */ |
| if (toc_bytes + nbytes > total_bytes || toc_bytes + nbytes < toc_bytes) |
| { |
| SpinLockRelease(&toc->toc_mutex); |
| ereport(ERROR, |
| (errcode(ERRCODE_OUT_OF_MEMORY), |
| errmsg("out of shared memory"))); |
| } |
| vtoc->toc_allocated_bytes += nbytes; |
| |
| SpinLockRelease(&toc->toc_mutex); |
| |
| return ((char *) toc) + (total_bytes - allocated_bytes - nbytes); |
| } |
| |
| /* |
| * Return the number of bytes that can still be allocated. |
| */ |
| Size |
| shm_toc_freespace(shm_toc *toc) |
| { |
| volatile shm_toc *vtoc = toc; |
| Size total_bytes; |
| Size allocated_bytes; |
| Size nentry; |
| Size toc_bytes; |
| |
| SpinLockAcquire(&toc->toc_mutex); |
| total_bytes = vtoc->toc_total_bytes; |
| allocated_bytes = vtoc->toc_allocated_bytes; |
| nentry = vtoc->toc_nentry; |
| SpinLockRelease(&toc->toc_mutex); |
| |
| toc_bytes = offsetof(shm_toc, toc_entry) + nentry * sizeof(shm_toc_entry); |
| Assert(allocated_bytes + BUFFERALIGN(toc_bytes) <= total_bytes); |
| return total_bytes - (allocated_bytes + BUFFERALIGN(toc_bytes)); |
| } |
| |
| /* |
| * Insert a TOC entry. |
| * |
| * The idea here is that the process setting up the shared memory segment will |
| * register the addresses of data structures within the segment using this |
| * function. Each data structure will be identified using a 64-bit key, which |
| * is assumed to be a well-known or discoverable integer. Other processes |
| * accessing the shared memory segment can pass the same key to |
| * shm_toc_lookup() to discover the addresses of those data structures. |
| * |
| * Since the shared memory segment may be mapped at different addresses within |
| * different backends, we store relative rather than absolute pointers. |
| * |
| * This won't scale well to a large number of keys. Hopefully, that isn't |
| * necessary; if it proves to be, we might need to provide a more sophisticated |
| * data structure here. But the real idea here is just to give someone mapping |
| * a dynamic shared memory the ability to find the bare minimum number of |
| * pointers that they need to bootstrap. If you're storing a lot of stuff in |
| * the TOC, you're doing it wrong. |
| */ |
| void |
| shm_toc_insert(shm_toc *toc, uint64 key, void *address) |
| { |
| volatile shm_toc *vtoc = toc; |
| Size total_bytes; |
| Size allocated_bytes; |
| Size nentry; |
| Size toc_bytes; |
| Size offset; |
| |
| /* Relativize pointer. */ |
| Assert(address > (void *) toc); |
| offset = ((char *) address) - (char *) toc; |
| |
| SpinLockAcquire(&toc->toc_mutex); |
| |
| total_bytes = vtoc->toc_total_bytes; |
| allocated_bytes = vtoc->toc_allocated_bytes; |
| nentry = vtoc->toc_nentry; |
| toc_bytes = offsetof(shm_toc, toc_entry) + nentry * sizeof(shm_toc_entry) |
| + allocated_bytes; |
| |
| /* Check for memory exhaustion and overflow. */ |
| if (toc_bytes + sizeof(shm_toc_entry) > total_bytes || |
| toc_bytes + sizeof(shm_toc_entry) < toc_bytes || |
| nentry >= PG_UINT32_MAX) |
| { |
| SpinLockRelease(&toc->toc_mutex); |
| ereport(ERROR, |
| (errcode(ERRCODE_OUT_OF_MEMORY), |
| errmsg("out of shared memory"))); |
| } |
| |
| Assert(offset < total_bytes); |
| vtoc->toc_entry[nentry].key = key; |
| vtoc->toc_entry[nentry].offset = offset; |
| |
| /* |
| * By placing a write barrier after filling in the entry and before |
| * updating the number of entries, we make it safe to read the TOC |
| * unlocked. |
| */ |
| pg_write_barrier(); |
| |
| vtoc->toc_nentry++; |
| |
| SpinLockRelease(&toc->toc_mutex); |
| } |
| |
| /* |
| * Look up a TOC entry. |
| * |
| * If the key is not found, returns NULL if noError is true, otherwise |
| * throws elog(ERROR). |
| * |
| * Unlike the other functions in this file, this operation acquires no lock; |
| * it uses only barriers. It probably wouldn't hurt concurrency very much even |
| * if it did get a lock, but since it's reasonably likely that a group of |
| * worker processes could each read a series of entries from the same TOC |
| * right around the same time, there seems to be some value in avoiding it. |
| */ |
| void * |
| shm_toc_lookup(shm_toc *toc, uint64 key, bool noError) |
| { |
| uint32 nentry; |
| uint32 i; |
| |
| /* |
| * Read the number of entries before we examine any entry. We assume that |
| * reading a uint32 is atomic. |
| */ |
| nentry = toc->toc_nentry; |
| pg_read_barrier(); |
| |
| /* Now search for a matching entry. */ |
| for (i = 0; i < nentry; ++i) |
| { |
| if (toc->toc_entry[i].key == key) |
| return ((char *) toc) + toc->toc_entry[i].offset; |
| } |
| |
| /* No matching entry was found. */ |
| if (!noError) |
| elog(ERROR, "could not find key " UINT64_FORMAT " in shm TOC at %p", |
| key, toc); |
| |
| return NULL; |
| } |
| |
| /* |
| * Estimate how much shared memory will be required to store a TOC and its |
| * dependent data structures. |
| */ |
| Size |
| shm_toc_estimate(shm_toc_estimator *e) |
| { |
| Size sz; |
| |
| sz = offsetof(shm_toc, toc_entry); |
| sz = add_size(sz, mul_size(e->number_of_keys, sizeof(shm_toc_entry))); |
| sz = add_size(sz, e->space_for_chunks); |
| |
| return BUFFERALIGN(sz); |
| } |