| /*------------------------------------------------------------------------- |
| * |
| * posix_sema.c |
| * Implement PGSemaphores using POSIX semaphore facilities |
| * |
| * We prefer the unnamed style of POSIX semaphore (the kind made with |
| * sem_init). We can cope with the kind made with sem_open, however. |
| * |
| * In either implementation, typedef PGSemaphore is equivalent to "sem_t *". |
| * With unnamed semaphores, the sem_t structs live in an array in shared |
| * memory. With named semaphores, that's not true because we cannot persuade |
| * sem_open to do its allocation there. Therefore, the named-semaphore code |
| * *does not cope with EXEC_BACKEND*. The sem_t structs will just be in the |
| * postmaster's private memory, where they are successfully inherited by |
| * forked backends, but they could not be accessed by exec'd backends. |
| * |
| * |
| * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group |
| * Portions Copyright (c) 1994, Regents of the University of California |
| * |
| * IDENTIFICATION |
| * src/backend/port/posix_sema.c |
| * |
| *------------------------------------------------------------------------- |
| */ |
| #include "postgres.h" |
| |
| #include <fcntl.h> |
| #include <semaphore.h> |
| #include <signal.h> |
| #include <unistd.h> |
| #include <sys/stat.h> |
| |
| #include "miscadmin.h" |
| #include "storage/ipc.h" |
| #include "storage/pg_sema.h" |
| #include "storage/shmem.h" |
| |
| |
| /* see file header comment */ |
| #if defined(USE_NAMED_POSIX_SEMAPHORES) && defined(EXEC_BACKEND) |
| #error cannot use named POSIX semaphores with EXEC_BACKEND |
| #endif |
| |
| typedef union SemTPadded |
| { |
| sem_t pgsem; |
| char pad[PG_CACHE_LINE_SIZE]; |
| } SemTPadded; |
| |
| /* typedef PGSemaphore is equivalent to pointer to sem_t */ |
| typedef struct PGSemaphoreData |
| { |
| SemTPadded sem_padded; |
| } PGSemaphoreData; |
| |
| #define PG_SEM_REF(x) (&(x)->sem_padded.pgsem) |
| |
| #define IPCProtection (0600) /* access/modify by user only */ |
| |
| #ifdef USE_NAMED_POSIX_SEMAPHORES |
| static sem_t **mySemPointers; /* keep track of created semaphores */ |
| #else |
| static PGSemaphore sharedSemas; /* array of PGSemaphoreData in shared memory */ |
| #endif |
| static int numSems; /* number of semas acquired so far */ |
| static int maxSems; /* allocated size of above arrays */ |
| static int nextSemKey; /* next name to try */ |
| |
| |
| static void ReleaseSemaphores(int status, Datum arg); |
| |
| |
| #ifdef USE_NAMED_POSIX_SEMAPHORES |
| |
| /* |
| * PosixSemaphoreCreate |
| * |
| * Attempt to create a new named semaphore. |
| * |
| * If we fail with a failure code other than collision-with-existing-sema, |
| * print out an error and abort. Other types of errors suggest nonrecoverable |
| * problems. |
| */ |
| static sem_t * |
| PosixSemaphoreCreate(void) |
| { |
| int semKey; |
| char semname[64]; |
| sem_t *mySem; |
| |
| for (;;) |
| { |
| semKey = nextSemKey++; |
| |
| snprintf(semname, sizeof(semname), "/pgsql-%d", semKey); |
| |
| mySem = sem_open(semname, O_CREAT | O_EXCL, |
| (mode_t) IPCProtection, (unsigned) 1); |
| |
| #ifdef SEM_FAILED |
| if (mySem != (sem_t *) SEM_FAILED) |
| break; |
| #else |
| if (mySem != (sem_t *) (-1)) |
| break; |
| #endif |
| |
| /* Loop if error indicates a collision */ |
| if (errno == EEXIST || errno == EACCES || errno == EINTR) |
| continue; |
| |
| /* |
| * Else complain and abort |
| */ |
| elog(FATAL, "sem_open(\"%s\") failed: %m", semname); |
| } |
| |
| /* |
| * Unlink the semaphore immediately, so it can't be accessed externally. |
| * This also ensures that it will go away if we crash. |
| */ |
| sem_unlink(semname); |
| |
| return mySem; |
| } |
| #else /* !USE_NAMED_POSIX_SEMAPHORES */ |
| |
| /* |
| * PosixSemaphoreCreate |
| * |
| * Attempt to create a new unnamed semaphore. |
| */ |
| static void |
| PosixSemaphoreCreate(sem_t *sem) |
| { |
| if (sem_init(sem, 1, 1) < 0) |
| elog(FATAL, "sem_init failed: %m"); |
| } |
| #endif /* USE_NAMED_POSIX_SEMAPHORES */ |
| |
| |
| /* |
| * PosixSemaphoreKill - removes a semaphore |
| */ |
| static void |
| PosixSemaphoreKill(sem_t *sem) |
| { |
| #ifdef USE_NAMED_POSIX_SEMAPHORES |
| /* Got to use sem_close for named semaphores */ |
| if (sem_close(sem) < 0) |
| elog(LOG, "sem_close failed: %m"); |
| #else |
| /* Got to use sem_destroy for unnamed semaphores */ |
| if (sem_destroy(sem) < 0) |
| elog(LOG, "sem_destroy failed: %m"); |
| #endif |
| } |
| |
| |
| /* |
| * Report amount of shared memory needed for semaphores |
| */ |
| Size |
| PGSemaphoreShmemSize(int maxSemas) |
| { |
| #ifdef USE_NAMED_POSIX_SEMAPHORES |
| /* No shared memory needed in this case */ |
| return 0; |
| #else |
| /* Need a PGSemaphoreData per semaphore */ |
| return mul_size(maxSemas, sizeof(PGSemaphoreData)); |
| #endif |
| } |
| |
| /* |
| * PGReserveSemaphores --- initialize semaphore support |
| * |
| * This is called during postmaster start or shared memory reinitialization. |
| * It should do whatever is needed to be able to support up to maxSemas |
| * subsequent PGSemaphoreCreate calls. Also, if any system resources |
| * are acquired here or in PGSemaphoreCreate, register an on_shmem_exit |
| * callback to release them. |
| * |
| * In the Posix implementation, we acquire semaphores on-demand; the |
| * maxSemas parameter is just used to size the arrays. For unnamed |
| * semaphores, there is an array of PGSemaphoreData structs in shared memory. |
| * For named semaphores, we keep a postmaster-local array of sem_t pointers, |
| * which we use for releasing the semaphores when done. |
| * (This design minimizes the dependency of postmaster shutdown on the |
| * contents of shared memory, which a failed backend might have clobbered. |
| * We can't do much about the possibility of sem_destroy() crashing, but |
| * we don't have to expose the counters to other processes.) |
| */ |
| void |
| PGReserveSemaphores(int maxSemas) |
| { |
| struct stat statbuf; |
| |
| /* |
| * We use the data directory's inode number to seed the search for free |
| * semaphore keys. This minimizes the odds of collision with other |
| * postmasters, while maximizing the odds that we will detect and clean up |
| * semaphores left over from a crashed postmaster in our own directory. |
| */ |
| if (stat(DataDir, &statbuf) < 0) |
| ereport(FATAL, |
| (errcode_for_file_access(), |
| errmsg("could not stat data directory \"%s\": %m", |
| DataDir))); |
| |
| #ifdef USE_NAMED_POSIX_SEMAPHORES |
| mySemPointers = (sem_t **) malloc(maxSemas * sizeof(sem_t *)); |
| if (mySemPointers == NULL) |
| elog(PANIC, "out of memory"); |
| #else |
| |
| /* |
| * We must use ShmemAllocUnlocked(), since the spinlock protecting |
| * ShmemAlloc() won't be ready yet. (This ordering is necessary when we |
| * are emulating spinlocks with semaphores.) |
| */ |
| sharedSemas = (PGSemaphore) |
| ShmemAllocUnlocked(PGSemaphoreShmemSize(maxSemas)); |
| #endif |
| |
| numSems = 0; |
| maxSems = maxSemas; |
| nextSemKey = statbuf.st_ino; |
| |
| on_shmem_exit(ReleaseSemaphores, 0); |
| } |
| |
| /* |
| * Release semaphores at shutdown or shmem reinitialization |
| * |
| * (called as an on_shmem_exit callback, hence funny argument list) |
| */ |
| static void |
| ReleaseSemaphores(int status, Datum arg) |
| { |
| int i; |
| |
| #ifdef USE_NAMED_POSIX_SEMAPHORES |
| for (i = 0; i < numSems; i++) |
| PosixSemaphoreKill(mySemPointers[i]); |
| free(mySemPointers); |
| #endif |
| |
| #ifdef USE_UNNAMED_POSIX_SEMAPHORES |
| for (i = 0; i < numSems; i++) |
| PosixSemaphoreKill(PG_SEM_REF(sharedSemas + i)); |
| #endif |
| } |
| |
| /* |
| * PGSemaphoreCreate |
| * |
| * Allocate a PGSemaphore structure with initial count 1 |
| */ |
| PGSemaphore |
| PGSemaphoreCreate(void) |
| { |
| PGSemaphore sema; |
| sem_t *newsem; |
| |
| /* Can't do this in a backend, because static state is postmaster's */ |
| Assert(!IsUnderPostmaster); |
| |
| if (numSems >= maxSems) |
| elog(PANIC, "too many semaphores created"); |
| |
| #ifdef USE_NAMED_POSIX_SEMAPHORES |
| newsem = PosixSemaphoreCreate(); |
| /* Remember new sema for ReleaseSemaphores */ |
| mySemPointers[numSems] = newsem; |
| sema = (PGSemaphore) newsem; |
| #else |
| sema = &sharedSemas[numSems]; |
| newsem = PG_SEM_REF(sema); |
| PosixSemaphoreCreate(newsem); |
| #endif |
| |
| numSems++; |
| |
| return sema; |
| } |
| |
| /* |
| * PGSemaphoreReset |
| * |
| * Reset a previously-initialized PGSemaphore to have count 0 |
| */ |
| void |
| PGSemaphoreReset(PGSemaphore sema) |
| { |
| /* |
| * There's no direct API for this in POSIX, so we have to ratchet the |
| * semaphore down to 0 with repeated trywait's. |
| */ |
| for (;;) |
| { |
| if (sem_trywait(PG_SEM_REF(sema)) < 0) |
| { |
| if (errno == EAGAIN || errno == EDEADLK) |
| break; /* got it down to 0 */ |
| if (errno == EINTR) |
| continue; /* can this happen? */ |
| elog(FATAL, "sem_trywait failed: %m"); |
| } |
| } |
| } |
| |
| /* |
| * PGSemaphoreLock |
| * |
| * Lock a semaphore (decrement count), blocking if count would be < 0 |
| */ |
| void |
| PGSemaphoreLock(PGSemaphore sema) |
| { |
| int errStatus; |
| |
| /* See notes in sysv_sema.c's implementation of PGSemaphoreLock. */ |
| do |
| { |
| errStatus = sem_wait(PG_SEM_REF(sema)); |
| } while (errStatus < 0 && errno == EINTR); |
| |
| if (errStatus < 0) |
| elog(FATAL, "sem_wait failed: %m"); |
| } |
| |
| /* |
| * PGSemaphoreLockInterruptable |
| * |
| * Lock a semaphore (decrement count), blocking if count would be < 0. |
| * Return true if the lock obtained or false if an interrupt occurred. |
| */ |
| bool |
| PGSemaphoreLockInterruptable(PGSemaphore sema) |
| { |
| int errStatus; |
| |
| errStatus = sem_wait(PG_SEM_REF(sema)); |
| if (errStatus < 0) |
| { |
| if (errno == EINTR) |
| return false; |
| elog(FATAL, "sem_wait failed: %m"); |
| } |
| |
| return true; |
| } |
| |
| /* |
| * PGSemaphoreUnlock |
| * |
| * Unlock a semaphore (increment count) |
| */ |
| void |
| PGSemaphoreUnlock(PGSemaphore sema) |
| { |
| int errStatus; |
| |
| /* |
| * Note: if errStatus is -1 and errno == EINTR then it means we returned |
| * from the operation prematurely because we were sent a signal. So we |
| * try and unlock the semaphore again. Not clear this can really happen, |
| * but might as well cope. |
| */ |
| do |
| { |
| errStatus = sem_post(PG_SEM_REF(sema)); |
| } while (errStatus < 0 && errno == EINTR); |
| |
| if (errStatus < 0) |
| elog(FATAL, "sem_post failed: %m"); |
| } |
| |
| /* |
| * PGSemaphoreTryLock |
| * |
| * Lock a semaphore only if able to do so without blocking |
| */ |
| bool |
| PGSemaphoreTryLock(PGSemaphore sema) |
| { |
| int errStatus; |
| |
| /* |
| * Note: if errStatus is -1 and errno == EINTR then it means we returned |
| * from the operation prematurely because we were sent a signal. So we |
| * try and lock the semaphore again. |
| */ |
| do |
| { |
| errStatus = sem_trywait(PG_SEM_REF(sema)); |
| } while (errStatus < 0 && errno == EINTR); |
| |
| if (errStatus < 0) |
| { |
| if (errno == EAGAIN || errno == EDEADLK) |
| return false; /* failed to lock it */ |
| /* Otherwise we got trouble */ |
| elog(FATAL, "sem_trywait failed: %m"); |
| } |
| |
| return true; |
| } |