| /*------------------------------------------------------------------------- |
| * |
| * postmaster.c |
| * This program acts as a clearing house for requests to the |
| * POSTGRES system. Frontend programs send a startup message |
| * to the Postmaster and the postmaster uses the info in the |
| * message to setup a backend process. |
| * |
| * The postmaster also manages system-wide operations such as |
| * startup and shutdown. The postmaster itself doesn't do those |
| * operations, mind you --- it just forks off a subprocess to do them |
| * at the right times. It also takes care of resetting the system |
| * if a backend crashes. |
| * |
| * The postmaster process creates the shared memory and semaphore |
| * pools during startup, but as a rule does not touch them itself. |
| * In particular, it is not a member of the PGPROC array of backends |
| * and so it cannot participate in lock-manager operations. Keeping |
| * the postmaster away from shared memory operations makes it simpler |
| * and more reliable. The postmaster is almost always able to recover |
| * from crashes of individual backends by resetting shared memory; |
| * if it did much with shared memory then it would be prone to crashing |
| * along with the backends. |
| * |
| * When a request message is received, we now fork() immediately. |
| * The child process performs authentication of the request, and |
| * then becomes a backend if successful. This allows the auth code |
| * to be written in a simple single-threaded style (as opposed to the |
| * crufty "poor man's multitasking" code that used to be needed). |
| * More importantly, it ensures that blockages in non-multithreaded |
| * libraries like SSL or PAM cannot cause denial of service to other |
| * clients. |
| * |
| * |
| * Portions Copyright (c) 2005-2009, Greenplum inc |
| * Portions Copyright (c) 2012-Present VMware, Inc. or its affiliates. |
| * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group |
| * Portions Copyright (c) 1994, Regents of the University of California |
| * |
| * |
| * IDENTIFICATION |
| * src/backend/postmaster/postmaster.c |
| * |
| * NOTES |
| * |
| * Initialization: |
| * The Postmaster sets up shared memory data structures |
| * for the backends. |
| * |
| * Synchronization: |
| * The Postmaster shares memory with the backends but should avoid |
| * touching shared memory, so as not to become stuck if a crashing |
| * backend screws up locks or shared memory. Likewise, the Postmaster |
| * should never block on messages from frontend clients. |
| * |
| * Garbage Collection: |
| * The Postmaster cleans up after backends if they have an emergency |
| * exit and/or core dump. |
| * |
| * Error Reporting: |
| * Use write_stderr() only for reporting "interactive" errors |
| * (essentially, bogus arguments on the command line). Once the |
| * postmaster is launched, use ereport(). |
| * |
| *------------------------------------------------------------------------- |
| */ |
| |
| #include "postgres.h" |
| |
| #include <unistd.h> |
| #include <signal.h> |
| #include <time.h> |
| #include <sys/wait.h> |
| #include <ctype.h> |
| #include <sys/stat.h> |
| #include <sys/socket.h> |
| #include <fcntl.h> |
| #include <sys/param.h> |
| #include <netdb.h> |
| #include <limits.h> |
| #include "access/xlog.h" |
| /* headers required for process affinity bindings */ |
| #ifdef HAVE_NUMA_H |
| #define NUMA_VERSION1_COMPATIBILITY 1 |
| #include <numa.h> |
| #endif |
| |
| #ifdef HAVE_SYS_SELECT_H |
| #include <sys/select.h> |
| #endif |
| |
| #ifdef USE_BONJOUR |
| #include <dns_sd.h> |
| #endif |
| |
| #ifdef USE_SYSTEMD |
| #include <systemd/sd-daemon.h> |
| #endif |
| |
| #ifdef HAVE_PTHREAD_IS_THREADED_NP |
| #include <pthread.h> |
| #endif |
| |
| #include "access/transam.h" |
| #include "access/xlog.h" |
| #include "bootstrap/bootstrap.h" |
| #include "catalog/pg_control.h" |
| #include "common/file_perm.h" |
| #include "common/ip.h" |
| #include "common/string.h" |
| #include "crypto/bufenc.h" |
| #include "crypto/kmgr.h" |
| #include "lib/ilist.h" |
| #include "libpq/auth.h" |
| #include "libpq/libpq.h" |
| #include "libpq/pqformat.h" |
| #include "libpq/pqsignal.h" |
| #include "pg_getopt.h" |
| #include "pgstat.h" |
| #include "port/pg_bswap.h" |
| #include "postmaster/autovacuum.h" |
| #include "postmaster/bgworker_internals.h" |
| #include "postmaster/bgwriter.h" |
| #include "postmaster/fork_process.h" |
| #include "postmaster/interrupt.h" |
| #include "postmaster/pgarch.h" |
| #include "postmaster/postmaster.h" |
| #include "postmaster/fts.h" |
| #include "postmaster/syslogger.h" |
| #include "postmaster/backoff.h" |
| #include "postmaster/bgworker.h" |
| #include "postmaster/loginmonitor.h" |
| #include "replication/logicallauncher.h" |
| #include "replication/walsender.h" |
| #include "storage/fd.h" |
| #include "storage/ipc.h" |
| #include "storage/pg_shmem.h" |
| #include "storage/pmsignal.h" |
| #include "storage/proc.h" |
| #include "storage/procsignal.h" |
| #include "task/pg_cron.h" |
| #include "tcop/tcopprot.h" |
| #include "utils/builtins.h" |
| #include "utils/datetime.h" |
| #include "utils/faultinjector.h" |
| #include "utils/gdd.h" |
| #include "utils/memutils.h" |
| #include "utils/pidfile.h" |
| #include "utils/ps_status.h" |
| #include "utils/queryjumble.h" |
| #include "utils/timeout.h" |
| #include "utils/timestamp.h" |
| #include "utils/varlena.h" |
| |
| #include "cdb/cdbgang.h" /* cdbgang_parse_gpqeid_params */ |
| #include "cdb/cdbtm.h" |
| #include "cdb/cdbvars.h" |
| #include "cdb/cdbendpoint.h" |
| #include "cdb/ic_proxy_bgworker.h" |
| #include "cdb/ml_ipc.h" |
| #include "utils/metrics_utils.h" |
| #include "utils/resgroup.h" |
| #include "utils/resource_manager.h" |
| |
| /* |
| * This is set in backends that are handling a GPDB specific message (FTS or |
| * fault injector) on mirror. |
| */ |
| bool am_mirror = false; |
| /* GPDB specific flag to handle deadlocks during parallel segment start. */ |
| volatile bool *pm_launch_walreceiver = NULL; |
| |
| /* |
| * Possible types of a backend. Beyond being the possible bkend_type values in |
| * struct bkend, these are OR-able request flag bits for SignalSomeChildren() |
| * and CountChildren(). |
| */ |
| #define BACKEND_TYPE_NORMAL 0x0001 /* normal backend */ |
| #define BACKEND_TYPE_AUTOVAC 0x0002 /* autovacuum worker process */ |
| #define BACKEND_TYPE_WALSND 0x0004 /* walsender process */ |
| #define BACKEND_TYPE_BGWORKER 0x0008 /* bgworker process */ |
| #define BACKEND_TYPE_ALL 0x000F /* OR of all the above */ |
| |
| /* |
| * List of active backends (or child processes anyway; we don't actually |
| * know whether a given child has become a backend or is still in the |
| * authorization phase). This is used mainly to keep track of how many |
| * children we have and send them appropriate signals when necessary. |
| * |
| * As shown in the above set of backend types, this list includes not only |
| * "normal" client sessions, but also autovacuum workers, walsenders, and |
| * background workers. (Note that at the time of launch, walsenders are |
| * labeled BACKEND_TYPE_NORMAL; we relabel them to BACKEND_TYPE_WALSND |
| * upon noticing they've changed their PMChildFlags entry. Hence that check |
| * must be done before any operation that needs to distinguish walsenders |
| * from normal backends.) |
| * |
| * Also, "dead_end" children are in it: these are children launched just for |
| * the purpose of sending a friendly rejection message to a would-be client. |
| * We must track them because they are attached to shared memory, but we know |
| * they will never become live backends. dead_end children are not assigned a |
| * PMChildSlot. dead_end children have bkend_type NORMAL. |
| * |
| * "Special" children such as the startup, bgwriter and autovacuum launcher |
| * tasks are not in this list. They are tracked via StartupPID and other |
| * pid_t variables below. (Thus, there can't be more than one of any given |
| * "special" child process type. We use BackendList entries for any child |
| * process there can be more than one of.) |
| */ |
| typedef struct bkend |
| { |
| pid_t pid; /* process id of backend */ |
| int32 cancel_key; /* cancel key for cancels for this backend */ |
| int child_slot; /* PMChildSlot for this backend, if any */ |
| int bkend_type; /* child process flavor, see above */ |
| bool dead_end; /* is it going to send an error and quit? */ |
| bool bgworker_notify; /* gets bgworker start/stop notifications */ |
| dlist_node elem; /* list link in BackendList */ |
| } Backend; |
| |
| static dlist_head BackendList = DLIST_STATIC_INIT(BackendList); |
| |
| #ifdef EXEC_BACKEND |
| static Backend *ShmemBackendArray; |
| #endif |
| |
| BackgroundWorker *MyBgworkerEntry = NULL; |
| |
| /* The socket number we are listening for connections on */ |
| int PostPortNumber; |
| |
| /* The directory names for Unix socket(s) */ |
| char *Unix_socket_directories; |
| |
| /* The TCP listen address(es) */ |
| char *ListenAddresses; |
| |
| /* |
| * The interconnect address. We assume the interconnect is the address |
| * in gp_segment_configuration. And it's never changed at runtime. |
| */ |
| char *interconnect_address = NULL; |
| |
| /* |
| * ReservedBackends is the number of backends reserved for superuser use. |
| * This number is taken out of the pool size given by MaxConnections so |
| * number of backend slots available to non-superusers is |
| * (MaxConnections - ReservedBackends). Note what this really means is |
| * "if there are <= ReservedBackends connections available, only superusers |
| * can make new connections" --- pre-existing superuser connections don't |
| * count against the limit. |
| */ |
| int ReservedBackends; |
| |
| /* The socket(s) we're listening to. */ |
| #define MAXLISTEN 64 |
| static pgsocket ListenSocket[MAXLISTEN]; |
| |
| /* |
| * These globals control the behavior of the postmaster in case some |
| * backend dumps core. Normally, it kills all peers of the dead backend |
| * and reinitializes shared memory. By specifying -s or -n, we can have |
| * the postmaster stop (rather than kill) peers and not reinitialize |
| * shared data structures. (Reinit is currently dead code, though.) |
| */ |
| static bool Reinit = true; |
| static int SendStop = false; |
| |
| /* still more option variables */ |
| bool EnableSSL = false; |
| int terminal_fd = -1; |
| |
| int PreAuthDelay = 0; |
| int AuthenticationTimeout = 60; |
| |
| bool log_hostname; /* for ps display and logging */ |
| bool Log_connections = false; |
| bool Db_user_namespace = false; |
| |
| bool enable_bonjour = false; |
| char *bonjour_name; |
| bool restart_after_crash = true; |
| bool remove_temp_files_after_crash = true; |
| bool enable_password_profile = true; |
| |
| /* Hook for plugins to start background workers */ |
| start_bgworkers_hook_type start_bgworkers_hook = NULL; |
| |
| /* |
| * PIDs of special child processes; 0 when not running. When adding a new PID |
| * to the list, remember to add the process title to GetServerProcessTitle() |
| * as well. |
| */ |
| static pid_t StartupPID = 0, |
| BgWriterPID = 0, |
| CheckpointerPID = 0, |
| WalWriterPID = 0, |
| WalReceiverPID = 0, |
| AutoVacPID = 0, |
| PgArchPID = 0, |
| PgStatPID = 0, |
| SysLoggerPID = 0, |
| LoginMonitorPID = 0; |
| |
| /* Startup process's status */ |
| typedef enum |
| { |
| STARTUP_NOT_RUNNING, |
| STARTUP_RUNNING, |
| STARTUP_SIGNALED, /* we sent it a SIGQUIT or SIGKILL */ |
| STARTUP_CRASHED |
| } StartupStatusEnum; |
| |
| static StartupStatusEnum StartupStatus = STARTUP_NOT_RUNNING; |
| |
| /* Startup/shutdown state */ |
| #define NoShutdown 0 |
| #define SmartShutdown 1 |
| #define FastShutdown 2 |
| #define ImmediateShutdown 3 |
| |
| static int Shutdown = NoShutdown; |
| |
| static bool FatalError = false; /* T if recovering from backend crash */ |
| |
| /* |
| * We use a simple state machine to control startup, shutdown, and |
| * crash recovery (which is rather like shutdown followed by startup). |
| * |
| * After doing all the postmaster initialization work, we enter PM_STARTUP |
| * state and the startup process is launched. The startup process begins by |
| * reading the control file and other preliminary initialization steps. |
| * In a normal startup, or after crash recovery, the startup process exits |
| * with exit code 0 and we switch to PM_RUN state. However, archive recovery |
| * is handled specially since it takes much longer and we would like to support |
| * hot standby during archive recovery. |
| * |
| * When the startup process is ready to start archive recovery, it signals the |
| * postmaster, and we switch to PM_RECOVERY state. The background writer and |
| * checkpointer are launched, while the startup process continues applying WAL. |
| * If Hot Standby is enabled, then, after reaching a consistent point in WAL |
| * redo, startup process signals us again, and we switch to PM_HOT_STANDBY |
| * state and begin accepting connections to perform read-only queries. When |
| * archive recovery is finished, the startup process exits with exit code 0 |
| * and we switch to PM_RUN state. |
| * |
| * Normal child backends can only be launched when we are in PM_RUN or |
| * PM_HOT_STANDBY state. (connsAllowed can also restrict launching.) |
| * In other states we handle connection requests by launching "dead_end" |
| * child processes, which will simply send the client an error message and |
| * quit. (We track these in the BackendList so that we can know when they |
| * are all gone; this is important because they're still connected to shared |
| * memory, and would interfere with an attempt to destroy the shmem segment, |
| * possibly leading to SHMALL failure when we try to make a new one.) |
| * In PM_WAIT_DEAD_END state we are waiting for all the dead_end children |
| * to drain out of the system, and therefore stop accepting connection |
| * requests at all until the last existing child has quit (which hopefully |
| * will not be very long). |
| * |
| * Notice that this state variable does not distinguish *why* we entered |
| * states later than PM_RUN --- Shutdown and FatalError must be consulted |
| * to find that out. FatalError is never true in PM_RECOVERY, PM_HOT_STANDBY, |
| * or PM_RUN states, nor in PM_SHUTDOWN states (because we don't enter those |
| * states when trying to recover from a crash). It can be true in PM_STARTUP |
| * state, because we don't clear it until we've successfully started WAL redo. |
| */ |
| typedef enum |
| { |
| PM_INIT, /* postmaster starting */ |
| PM_STARTUP, /* waiting for startup subprocess */ |
| PM_RECOVERY, /* in archive recovery mode */ |
| PM_HOT_STANDBY, /* in hot standby mode */ |
| PM_RUN, /* normal "database is alive" state */ |
| PM_STOP_BACKENDS, /* need to stop remaining backends */ |
| PM_WAIT_BACKENDS, /* waiting for live backends to exit */ |
| PM_SHUTDOWN, /* waiting for checkpointer to do shutdown |
| * ckpt */ |
| PM_SHUTDOWN_2, /* waiting for archiver and walsenders to |
| * finish */ |
| PM_WAIT_DEAD_END, /* waiting for dead_end children to exit */ |
| PM_NO_CHILDREN /* all important children have exited */ |
| } PMState; |
| |
| static PMState pmState = PM_INIT; |
| |
| /* |
| * While performing a "smart shutdown", we restrict new connections but stay |
| * in PM_RUN or PM_HOT_STANDBY state until all the client backends are gone. |
| * connsAllowed is a sub-state indicator showing the active restriction. |
| * It is of no interest unless pmState is PM_RUN or PM_HOT_STANDBY. |
| */ |
| typedef enum |
| { |
| ALLOW_ALL_CONNS, /* normal not-shutting-down state */ |
| ALLOW_SUPERUSER_CONNS, /* only superusers can connect */ |
| ALLOW_NO_CONNS /* no new connections allowed, period */ |
| } ConnsAllowedState; |
| |
| static ConnsAllowedState connsAllowed = ALLOW_ALL_CONNS; |
| |
| /* Start time of SIGKILL timeout during immediate shutdown or child crash */ |
| /* Zero means timeout is not running */ |
| static time_t AbortStartTime = 0; |
| |
| /* Length of said timeout */ |
| #define SIGKILL_CHILDREN_AFTER_SECS 5 |
| |
| /* Set at database system is ready to accept connections */ |
| pg_time_t PMAcceptingConnectionsStartTime = 0; |
| |
| static BackgroundWorker PMAuxProcList[MaxPMAuxProc] = |
| { |
| #ifdef USE_INTERNAL_FTS |
| {"ftsprobe process", "ftsprobe process", |
| BGWORKER_SHMEM_ACCESS | BGWORKER_BACKEND_DATABASE_CONNECTION, |
| BgWorkerStart_DtxRecovering, /* no need to wait dtx recovery */ |
| 0, /* restart immediately if ftsprobe exits with non-zero code */ |
| "postgres", "FtsProbeMain", 0, {0}, 0, |
| FtsProbeStartRule |
| }, |
| #endif |
| {"global deadlock detector process", "global deadlock detector process", |
| BGWORKER_SHMEM_ACCESS | BGWORKER_BACKEND_DATABASE_CONNECTION, |
| BgWorkerStart_RecoveryFinished, |
| 0, /* restart immediately if gdd exits with non-zero code */ |
| "postgres", "GlobalDeadLockDetectorMain", 0, {0}, 0, |
| GlobalDeadLockDetectorStartRule}, |
| |
| {"dtx recovery process", "dtx recovery process", |
| BGWORKER_SHMEM_ACCESS | BGWORKER_BACKEND_DATABASE_CONNECTION, |
| BgWorkerStart_DtxRecovering, /* no need to wait dtx recovery */ |
| 0, /* restart immediately if dtx recovery process exits with non-zero code */ |
| "postgres", "DtxRecoveryMain", 0, {0}, 0, |
| DtxRecoveryStartRule}, |
| |
| {"sweeper process", "sweeper process", |
| BGWORKER_SHMEM_ACCESS, |
| BgWorkerStart_RecoveryFinished, |
| 0, /* restart immediately if sweeper process exits with non-zero code */ |
| "postgres", "BackoffSweeperMain", 0, {0}, 0, |
| BackoffSweeperStartRule}, |
| |
| {"pg_cron launcher", "pg_cron launcher", |
| BGWORKER_SHMEM_ACCESS | BGWORKER_BACKEND_DATABASE_CONNECTION, |
| BgWorkerStart_RecoveryFinished, |
| 1, |
| "postgres", "PgCronLauncherMain", 0, {0}, 0, |
| PgCronStartRule}, |
| |
| #ifdef ENABLE_IC_PROXY |
| {"ic proxy process", "ic proxy process", |
| 0, |
| BgWorkerStart_RecoveryFinished, |
| 0, /* restart immediately if ic proxy process exits with non-zero code */ |
| "postgres", "ICProxyMain", 0, {0}, 0, |
| ICProxyStartRule}, |
| #endif /* ENABLE_IC_PROXY */ |
| |
| /* |
| * Remember to set the MaxPMAuxProc to the number of items in this list |
| * |
| * It's used as a number at other places, so end-of-list marker doesn't |
| * work here. |
| */ |
| }; |
| |
| static bool ReachedNormalRunning = false; /* T if we've reached PM_RUN */ |
| |
| bool ClientAuthInProgress = false; /* T during new-client |
| * authentication */ |
| |
| bool redirection_done = false; /* stderr redirected for syslogger? */ |
| |
| /* received START_AUTOVAC_LAUNCHER signal */ |
| static volatile sig_atomic_t start_autovac_launcher = false; |
| |
| /* the launcher needs to be signaled to communicate some condition */ |
| static volatile bool avlauncher_needs_signal = false; |
| |
| /* received START_WALRECEIVER signal */ |
| static volatile sig_atomic_t WalReceiverRequested = false; |
| |
| /* set when there's a worker that needs to be started up */ |
| static volatile bool StartWorkerNeeded = true; |
| static volatile bool HaveCrashedWorker = false; |
| |
| #ifdef USE_SSL |
| /* Set when and if SSL has been initialized properly */ |
| static bool LoadedSSL = false; |
| #endif |
| |
| /* some GUC values used in fetching status from status transition */ |
| extern char *locale_monetary; |
| extern char *locale_numeric; |
| |
| #ifdef USE_BONJOUR |
| static DNSServiceRef bonjour_sdref = NULL; |
| #endif |
| |
| /* |
| * postmaster.c - function prototypes |
| */ |
| static void CloseServerPorts(int status, Datum arg); |
| static void unlink_external_pid_file(int status, Datum arg); |
| static void getInstallationPaths(const char *argv0); |
| static void checkControlFile(void); |
| static void checkPgDir(const char *dir); |
| static Port *ConnCreate(int serverFd); |
| static void ConnFree(Port *port); |
| static void reset_shared(void); |
| static void SIGHUP_handler(SIGNAL_ARGS); |
| static void pmdie(SIGNAL_ARGS); |
| static void reaper(SIGNAL_ARGS); |
| static void sigusr1_handler(SIGNAL_ARGS); |
| static void process_startup_packet_die(SIGNAL_ARGS); |
| static void dummy_handler(SIGNAL_ARGS); |
| static void StartupPacketTimeoutHandler(void); |
| static void CleanupBackend(int pid, int exitstatus); |
| static bool CleanupBackgroundWorker(int pid, int exitstatus); |
| static void HandleChildCrash(int pid, int exitstatus, const char *procname); |
| static void LogChildExit(int lev, const char *procname, |
| int pid, int exitstatus); |
| static void PostmasterStateMachine(void); |
| static void BackendInitialize(Port *port); |
| static void BackendRun(Port *port) pg_attribute_noreturn(); |
| static void ExitPostmaster(int status) pg_attribute_noreturn(); |
| static int ServerLoop(void); |
| static int BackendStartup(Port *port); |
| static int ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done); |
| static void SendNegotiateProtocolVersion(List *unrecognized_protocol_options); |
| static void processCancelRequest(Port *port, void *pkt, MsgType code); |
| static int initMasks(fd_set *rmask); |
| static void report_fork_failure_to_client(Port *port, int errnum); |
| static CAC_state canAcceptConnections(int backend_type); |
| static bool RandomCancelKey(int32 *cancel_key); |
| static void signal_child(pid_t pid, int signal); |
| static bool SignalSomeChildren(int signal, int targets); |
| static void TerminateChildren(int signal); |
| |
| #define SignalChildren(sig) SignalSomeChildren(sig, BACKEND_TYPE_ALL) |
| |
| static int CountChildren(int target); |
| static bool assign_backendlist_entry(RegisteredBgWorker *rw); |
| static void maybe_start_bgworkers(void); |
| static bool CreateOptsFile(int argc, char *argv[], char *fullprogname); |
| static bool do_start_bgworker(RegisteredBgWorker *rw); |
| static pid_t StartChildProcess(AuxProcType type); |
| static void StartAutovacuumWorker(void); |
| static void StartLMWorker(void); |
| static void MaybeStartWalReceiver(void); |
| static void InitPostmasterDeathWatchHandle(void); |
| |
| static void setProcAffinity(int id); |
| |
| /* |
| * Archiver is allowed to start up at the current postmaster state? |
| * |
| * If WAL archiving is enabled always, we are allowed to start archiver |
| * even during recovery. |
| */ |
| #define PgArchStartupAllowed() \ |
| (((XLogArchivingActive() && pmState == PM_RUN) || \ |
| (XLogArchivingAlways() && \ |
| (pmState == PM_RECOVERY || pmState == PM_HOT_STANDBY))) && \ |
| PgArchCanRestart()) |
| |
| bool isAuxiliaryBgWorker(BackgroundWorker *worker); |
| |
| #ifdef EXEC_BACKEND |
| |
| #ifdef WIN32 |
| #define WNOHANG 0 /* ignored, so any integer value will do */ |
| |
| static pid_t waitpid(pid_t pid, int *exitstatus, int options); |
| static void WINAPI pgwin32_deadchild_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired); |
| |
| static HANDLE win32ChildQueue; |
| |
| typedef struct |
| { |
| HANDLE waitHandle; |
| HANDLE procHandle; |
| DWORD procId; |
| } win32_deadchild_waitinfo; |
| #endif /* WIN32 */ |
| |
| static pid_t backend_forkexec(Port *port); |
| static pid_t internal_forkexec(int argc, char *argv[], Port *port); |
| |
| /* Type for a socket that can be inherited to a client process */ |
| #ifdef WIN32 |
| typedef struct |
| { |
| SOCKET origsocket; /* Original socket value, or PGINVALID_SOCKET |
| * if not a socket */ |
| WSAPROTOCOL_INFO wsainfo; |
| } InheritableSocket; |
| #else |
| typedef int InheritableSocket; |
| #endif |
| |
| /* |
| * Structure contains all variables passed to exec:ed backends |
| */ |
| typedef struct |
| { |
| Port port; |
| InheritableSocket portsocket; |
| char DataDir[MAXPGPATH]; |
| pgsocket ListenSocket[MAXLISTEN]; |
| int32 MyCancelKey; |
| int MyPMChildSlot; |
| #ifndef WIN32 |
| unsigned long UsedShmemSegID; |
| #else |
| void *ShmemProtectiveRegion; |
| HANDLE UsedShmemSegID; |
| #endif |
| void *UsedShmemSegAddr; |
| slock_t *ShmemLock; |
| VariableCache ShmemVariableCache; |
| Backend *ShmemBackendArray; |
| #ifndef HAVE_SPINLOCKS |
| PGSemaphore *SpinlockSemaArray; |
| #endif |
| int NamedLWLockTrancheRequests; |
| NamedLWLockTranche *NamedLWLockTrancheArray; |
| LWLockPadded *MainLWLockArray; |
| slock_t *ProcStructLock; |
| PROC_HDR *ProcGlobal; |
| PGPROC *AuxiliaryProcs; |
| PGPROC *PreparedXactProcs; |
| PMSignalData *PMSignalState; |
| InheritableSocket pgStatSock; |
| pid_t PostmasterPid; |
| TimestampTz PgStartTime; |
| TimestampTz PgReloadTime; |
| pg_time_t first_syslogger_file_time; |
| bool redirection_done; |
| bool IsBinaryUpgrade; |
| bool ConvertMasterDataDirToSegment; |
| bool query_id_enabled; |
| int max_safe_fds; |
| int MaxBackends; |
| #ifdef WIN32 |
| HANDLE PostmasterHandle; |
| HANDLE initial_signal_pipe; |
| HANDLE syslogPipe[2]; |
| #else |
| int postmaster_alive_fds[2]; |
| int syslogPipe[2]; |
| #endif |
| char my_exec_path[MAXPGPATH]; |
| char pkglib_path[MAXPGPATH]; |
| } BackendParameters; |
| |
| static void read_backend_variables(char *id, Port *port); |
| static void restore_backend_variables(BackendParameters *param, Port *port); |
| |
| #ifndef WIN32 |
| static bool save_backend_variables(BackendParameters *param, Port *port); |
| #else |
| static bool save_backend_variables(BackendParameters *param, Port *port, |
| HANDLE childProcess, pid_t childPid); |
| #endif |
| |
| static void ShmemBackendArrayAdd(Backend *bn); |
| static void ShmemBackendArrayRemove(Backend *bn); |
| #endif /* EXEC_BACKEND */ |
| |
| #define StartupDataBase() StartChildProcess(StartupProcess) |
| #define StartArchiver() StartChildProcess(ArchiverProcess) |
| #define StartBackgroundWriter() StartChildProcess(BgWriterProcess) |
| #define StartCheckpointer() StartChildProcess(CheckpointerProcess) |
| #define StartWalWriter() StartChildProcess(WalWriterProcess) |
| #define StartWalReceiver() StartChildProcess(WalReceiverProcess) |
| |
| /* Macros to check exit status of a child process */ |
| #define EXIT_STATUS_0(st) ((st) == 0) |
| #define EXIT_STATUS_1(st) (WIFEXITED(st) && WEXITSTATUS(st) == 1) |
| #define EXIT_STATUS_3(st) (WIFEXITED(st) && WEXITSTATUS(st) == 3) |
| |
| #ifndef WIN32 |
| /* |
| * File descriptors for pipe used to monitor if postmaster is alive. |
| * First is POSTMASTER_FD_WATCH, second is POSTMASTER_FD_OWN. |
| */ |
| int postmaster_alive_fds[2] = {-1, -1}; |
| #else |
| /* Process handle of postmaster used for the same purpose on Windows */ |
| HANDLE PostmasterHandle; |
| #endif |
| |
| /* |
| * query info collector hook |
| * Use this hook to collect real-time query information and status data. |
| */ |
| query_info_collect_hook_type query_info_collect_hook = NULL; |
| |
| /* |
| * Postmaster main entry point |
| */ |
| void |
| PostmasterMain(int argc, char *argv[]) |
| { |
| int opt; |
| int status; |
| char *userDoption = NULL; |
| bool listen_addr_saved = false; |
| int i; |
| char *output_config_variable = NULL; |
| |
| InitProcessGlobals(); |
| |
| PostmasterPid = MyProcPid; |
| |
| IsPostmasterEnvironment = true; |
| |
| /* |
| * We should not be creating any files or directories before we check the |
| * data directory (see checkDataDir()), but just in case set the umask to |
| * the most restrictive (owner-only) permissions. |
| * |
| * checkDataDir() will reset the umask based on the data directory |
| * permissions. |
| */ |
| umask(PG_MODE_MASK_OWNER); |
| |
| /* |
| * Initialize random(3) so we don't get the same values in every run. |
| * |
| * Note: the seed is pretty predictable from externally-visible facts such |
| * as postmaster start time, so avoid using random() for security-critical |
| * random values during postmaster startup. At the time of first |
| * connection, PostmasterRandom will select a hopefully-more-random seed. |
| */ |
| srandom((unsigned int) (MyProcPid ^ MyStartTime)); |
| |
| /* |
| * By default, palloc() requests in the postmaster will be allocated in |
| * the PostmasterContext, which is space that can be recycled by backends. |
| * Allocated data that needs to be available to backends should be |
| * allocated in TopMemoryContext. |
| */ |
| PostmasterContext = AllocSetContextCreate(TopMemoryContext, |
| "Postmaster", |
| ALLOCSET_DEFAULT_SIZES); |
| MemoryContextSwitchTo(PostmasterContext); |
| |
| /* Initialize paths to installation files */ |
| getInstallationPaths(argv[0]); |
| |
| /* |
| * Set up signal handlers for the postmaster process. |
| * |
| * In the postmaster, we use pqsignal_pm() rather than pqsignal() (which |
| * is used by all child processes and client processes). That has a |
| * couple of special behaviors: |
| * |
| * 1. We tell sigaction() to block all signals for the duration of the |
| * signal handler. This is faster than our old approach of |
| * blocking/unblocking explicitly in the signal handler, and it should also |
| * prevent excessive stack consumption if signals arrive quickly. |
| * |
| * 2. We do not set the SA_RESTART flag. This is because signals will be |
| * blocked at all times except when ServerLoop is waiting for something to |
| * happen, and during that window, we want signals to exit the select(2) |
| * wait so that ServerLoop can respond if anything interesting happened. |
| * On some platforms, signals marked SA_RESTART would not cause the |
| * select() wait to end. |
| * |
| * Child processes will generally want SA_RESTART, so pqsignal() sets that |
| * flag. We expect children to set up their own handlers before |
| * unblocking signals. |
| * |
| * CAUTION: when changing this list, check for side-effects on the signal |
| * handling setup of child processes. See tcop/postgres.c, |
| * bootstrap/bootstrap.c, postmaster/bgwriter.c, postmaster/walwriter.c, |
| * postmaster/autovacuum.c, postmaster/pgarch.c, postmaster/pgstat.c, |
| * postmaster/syslogger.c, postmaster/bgworker.c and |
| * postmaster/checkpointer.c. |
| */ |
| pqinitmask(); |
| PG_SETMASK(&BlockSig); |
| |
| pqsignal_pm(SIGHUP, SIGHUP_handler); /* reread config file and have |
| * children do same */ |
| pqsignal_pm(SIGINT, pmdie); /* send SIGTERM and shut down */ |
| pqsignal_pm(SIGQUIT, pmdie); /* send SIGQUIT and die */ |
| pqsignal_pm(SIGTERM, pmdie); /* wait for children and shut down */ |
| pqsignal_pm(SIGALRM, SIG_IGN); /* ignored */ |
| pqsignal_pm(SIGPIPE, SIG_IGN); /* ignored */ |
| pqsignal_pm(SIGUSR1, sigusr1_handler); /* message from child process */ |
| pqsignal_pm(SIGUSR2, dummy_handler); /* unused, reserve for children */ |
| pqsignal_pm(SIGCHLD, reaper); /* handle child termination */ |
| |
| #ifdef SIGURG |
| |
| /* |
| * Ignore SIGURG for now. Child processes may change this (see |
| * InitializeLatchSupport), but they will not receive any such signals |
| * until they wait on a latch. |
| */ |
| pqsignal_pm(SIGURG, SIG_IGN); /* ignored */ |
| #endif |
| |
| /* |
| * No other place in Postgres should touch SIGTTIN/SIGTTOU handling. We |
| * ignore those signals in a postmaster environment, so that there is no |
| * risk of a child process freezing up due to writing to stderr. But for |
| * a standalone backend, their default handling is reasonable. Hence, all |
| * child processes should just allow the inherited settings to stand. |
| */ |
| #ifdef SIGTTIN |
| pqsignal_pm(SIGTTIN, SIG_IGN); /* ignored */ |
| #endif |
| #ifdef SIGTTOU |
| pqsignal_pm(SIGTTOU, SIG_IGN); /* ignored */ |
| #endif |
| |
| /* ignore SIGXFSZ, so that ulimit violations work like disk full */ |
| #ifdef SIGXFSZ |
| pqsignal_pm(SIGXFSZ, SIG_IGN); /* ignored */ |
| #endif |
| |
| /* |
| * Options setup |
| */ |
| InitializeGUCOptions(); |
| |
| opterr = 1; |
| |
| /* |
| * Parse command-line options. CAUTION: keep this in sync with |
| * tcop/postgres.c (the option sets should not conflict) and with the |
| * common help() function in main/main.c. |
| */ |
| while ((opt = getopt(argc, argv, "B:bc:C:D:d:EeFf:h:ijk:lMmN:nOo:Pp:r:R:S:sTt:W:-:")) != -1) |
| { |
| switch (opt) |
| { |
| case 'B': |
| SetConfigOption("shared_buffers", optarg, PGC_POSTMASTER, PGC_S_ARGV); |
| break; |
| |
| case 'b': |
| /* Undocumented flag used for binary upgrades */ |
| IsBinaryUpgrade = true; |
| break; |
| |
| case 'C': |
| output_config_variable = strdup(optarg); |
| break; |
| |
| case 'D': |
| userDoption = strdup(optarg); |
| break; |
| |
| case 'd': |
| set_debug_options(atoi(optarg), PGC_POSTMASTER, PGC_S_ARGV); |
| break; |
| |
| case 'E': |
| SetConfigOption("log_statement", "all", PGC_POSTMASTER, PGC_S_ARGV); |
| break; |
| |
| case 'e': |
| SetConfigOption("datestyle", "euro", PGC_POSTMASTER, PGC_S_ARGV); |
| break; |
| |
| case 'F': |
| SetConfigOption("fsync", "false", PGC_POSTMASTER, PGC_S_ARGV); |
| break; |
| |
| case 'f': |
| if (!set_plan_disabling_options(optarg, PGC_POSTMASTER, PGC_S_ARGV)) |
| { |
| write_stderr("%s: invalid argument for option -f: \"%s\"\n", |
| progname, optarg); |
| ExitPostmaster(1); |
| } |
| break; |
| |
| case 'h': |
| SetConfigOption("listen_addresses", optarg, PGC_POSTMASTER, PGC_S_ARGV); |
| break; |
| |
| case 'i': |
| SetConfigOption("listen_addresses", "*", PGC_POSTMASTER, PGC_S_ARGV); |
| break; |
| |
| case 'j': |
| /* only used by interactive backend */ |
| break; |
| |
| case 'k': |
| SetConfigOption("unix_socket_directories", optarg, PGC_POSTMASTER, PGC_S_ARGV); |
| break; |
| |
| case 'l': |
| SetConfigOption("ssl", "true", PGC_POSTMASTER, PGC_S_ARGV); |
| break; |
| |
| case 'M': |
| /* Undocumented flag used for mutating a directory that was a copy of a |
| * master data directory and needs to now be a segment directory. Only |
| * use on the first time the segment is started, and only use in |
| * utility mode, as changes will be destructive, and will assume that |
| * the segment has never participated in a distributed |
| * transaction.*/ |
| ConvertMasterDataDirToSegment = true; |
| break; |
| |
| case 'm': |
| /* |
| * In maintenance mode: |
| * 1. allow DML on catalog table |
| * 2. allow DML on segments |
| */ |
| SetConfigOption("maintenance_mode", "true", PGC_POSTMASTER, PGC_S_ARGV); |
| SetConfigOption("allow_segment_DML", "true", PGC_POSTMASTER, PGC_S_ARGV); |
| SetConfigOption("allow_system_table_mods", "true", PGC_POSTMASTER, PGC_S_ARGV); |
| break; |
| |
| case 'N': |
| SetConfigOption("max_connections", optarg, PGC_POSTMASTER, PGC_S_ARGV); |
| break; |
| |
| case 'n': |
| /* Don't reinit shared mem after abnormal exit */ |
| Reinit = false; |
| break; |
| |
| case 'O': |
| /* Only use in single user mode */ |
| SetConfigOption("allow_system_table_mods", "true", PGC_POSTMASTER, PGC_S_ARGV); |
| break; |
| |
| case 'P': |
| SetConfigOption("ignore_system_indexes", "true", PGC_POSTMASTER, PGC_S_ARGV); |
| break; |
| |
| case 'p': |
| SetConfigOption("port", optarg, PGC_POSTMASTER, PGC_S_ARGV); |
| break; |
| |
| case 'r': |
| /* only used by single-user backend */ |
| break; |
| |
| case 'R': |
| terminal_fd = atoi(optarg); |
| break; |
| |
| case 'S': |
| SetConfigOption("work_mem", optarg, PGC_POSTMASTER, PGC_S_ARGV); |
| break; |
| |
| case 's': |
| SetConfigOption("log_statement_stats", "true", PGC_POSTMASTER, PGC_S_ARGV); |
| break; |
| |
| case 'T': |
| |
| /* |
| * In the event that some backend dumps core, send SIGSTOP, |
| * rather than SIGQUIT, to all its peers. This lets the wily |
| * post_hacker collect core dumps from everyone. |
| */ |
| SendStop = true; |
| break; |
| |
| case 't': |
| { |
| const char *tmp = get_stats_option_name(optarg); |
| |
| if (tmp) |
| { |
| SetConfigOption(tmp, "true", PGC_POSTMASTER, PGC_S_ARGV); |
| } |
| else |
| { |
| write_stderr("%s: invalid argument for option -t: \"%s\"\n", |
| progname, optarg); |
| ExitPostmaster(1); |
| } |
| break; |
| } |
| |
| case 'W': |
| SetConfigOption("post_auth_delay", optarg, PGC_POSTMASTER, PGC_S_ARGV); |
| break; |
| |
| case 'c': |
| case '-': |
| { |
| char *name, |
| *value; |
| |
| ParseLongOption(optarg, &name, &value); |
| if (!value) |
| { |
| if (opt == '-') |
| ereport(ERROR, |
| (errcode(ERRCODE_SYNTAX_ERROR), |
| errmsg("--%s requires a value", |
| optarg))); |
| else |
| ereport(ERROR, |
| (errcode(ERRCODE_SYNTAX_ERROR), |
| errmsg("-c %s requires a value", |
| optarg))); |
| } |
| |
| SetConfigOption(name, value, PGC_POSTMASTER, PGC_S_ARGV); |
| free(name); |
| if (value) |
| free(value); |
| break; |
| } |
| |
| default: |
| write_stderr("Try \"%s --help\" for more information.\n", |
| progname); |
| ExitPostmaster(1); |
| } |
| } |
| |
| /* |
| * Postmaster accepts no non-option switch arguments. |
| */ |
| if (optind < argc) |
| { |
| write_stderr("%s: invalid argument: \"%s\"\n", |
| progname, argv[optind]); |
| write_stderr("Try \"%s --help\" for more information.\n", |
| progname); |
| ExitPostmaster(1); |
| } |
| |
| /* |
| * If gp_role is not set, use utility role instead. |
| * |
| * A single node coordinator or segment is set to utility, which is not |
| * before Greenplum 7. |
| */ |
| if (Gp_role == GP_ROLE_UNDEFINED) |
| SetConfigOption("gp_role", "utility", PGC_POSTMASTER, PGC_S_DYNAMIC_DEFAULT); |
| |
| /* |
| * Locate the proper configuration files and data directory, and read |
| * postgresql.conf for the first time. |
| */ |
| if (!SelectConfigFiles(userDoption, progname)) |
| ExitPostmaster(2); |
| |
| /* |
| * CDB/MPP/GPDB: Set the processor affinity (may be a no-op on |
| * some platforms). The port number is nice to use because we know |
| * that different segments on a single host will not have the same |
| * port numbers. |
| * |
| * We want to do this as early as we can -- so that the OS knows |
| * about our binding, and all of our child processes will inherit |
| * the same binding. |
| */ |
| if (gp_set_proc_affinity) |
| setProcAffinity(PostPortNumber); |
| |
| if (output_config_variable != NULL) |
| { |
| /* |
| * "-C guc" was specified, so print GUC's value and exit. No extra |
| * permission check is needed because the user is reading inside the |
| * data dir. |
| */ |
| const char *config_val = GetConfigOption(output_config_variable, |
| false, false); |
| |
| puts(config_val ? config_val : ""); |
| ExitPostmaster(0); |
| } |
| |
| /* Verify that DataDir looks reasonable */ |
| checkDataDir(); |
| |
| /* Check that pg_control exists */ |
| checkControlFile(); |
| |
| /* And switch working directory into it */ |
| ChangeToDataDir(); |
| |
| /* |
| * CDB: Decouple NBuffers from MaxBackends. The entry db doesn't benefit |
| * from buffers in excess of the global catalog size; this is typically |
| * small and unrelated to the number of clients. Segment dbs need enough |
| * buffers to accommodate the QEs concurrently accessing the database; but |
| * non-leaf QEs don't necessarily access the database (some are used only |
| * for sorting, hashing, etc); so again the number of buffers need not be |
| * in proportion to the number of connections. |
| */ |
| if (NBuffers < 16) |
| { |
| /* |
| * Do not accept -B so small that backends are likely to starve for |
| * lack of buffers. The specific choices here are somewhat arbitrary. |
| */ |
| write_stderr("%s: the number of buffers (-B) must be at least 16\n", progname); |
| ExitPostmaster(1); |
| } |
| |
| /* |
| * Check for invalid combinations of GUC settings. |
| * |
| * In GPDB, restricted mode gpstart uses superuser_reserved_connections == |
| * max_connections. Hence, in gpdb below check is modified from upstream to |
| * allow equal setting. |
| */ |
| if (ReservedBackends > MaxConnections) |
| { |
| write_stderr("%s: superuser_reserved_connections (%d) must be less than max_connections (%d)\n", |
| progname, |
| ReservedBackends, MaxConnections); |
| ExitPostmaster(1); |
| } |
| if (XLogArchiveMode > ARCHIVE_MODE_OFF && wal_level == WAL_LEVEL_MINIMAL) |
| ereport(ERROR, |
| (errmsg("WAL archival cannot be enabled when wal_level is \"minimal\""))); |
| if (max_wal_senders > 0 && wal_level == WAL_LEVEL_MINIMAL) |
| ereport(ERROR, |
| (errmsg("WAL streaming (max_wal_senders > 0) requires wal_level \"replica\" or \"logical\""))); |
| |
| if ( GpIdentity.dbid == -1 && Gp_role == GP_ROLE_UTILITY) |
| { |
| /** |
| * okay in utility mode! -- when starting the master in utility mode to fetch the configuration contents, |
| * we don't actually know the dbid. |
| */ |
| } |
| else if ( GpIdentity.dbid < 0 ) |
| { |
| ereport(FATAL, |
| (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
| errmsg("dbid (from -b option) is not specified or is invalid. This value must be >= 0, or >= -1 in utility mode. " |
| "The dbid value to pass can be determined from this server's entry in the segment configuration; it may be -1 if running in utility mode."))); |
| } |
| |
| if ( GpIdentity.segindex < -1 ) /* -1 is okay -- that means the master */ |
| { |
| ereport(FATAL, |
| (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
| errmsg("contentid (from -C option) is not specified or is invalid. This value must be >= -1. " |
| "The contentid value to pass can be determined this server's entry in the segment configuration; it may be -1 for a master, or in utility mode." |
| ))); |
| } |
| |
| /* |
| * Other one-time internal sanity checks can go here, if they are fast. |
| * (Put any slow processing further down, after postmaster.pid creation.) |
| */ |
| if (!CheckDateTokenTables()) |
| { |
| write_stderr("%s: invalid datetoken tables, please fix\n", progname); |
| ExitPostmaster(1); |
| } |
| |
| /* |
| * Now that we are done processing the postmaster arguments, reset |
| * getopt(3) library so that it will work correctly in subprocesses. |
| */ |
| optind = 1; |
| #if defined(HAVE_INT_OPTRESET) || !defined(HAVE_GETOPT) |
| optreset = 1; /* some systems need this too */ |
| #endif |
| |
| /* For debugging: display postmaster environment */ |
| { |
| extern char **environ; |
| char **p; |
| |
| ereport(DEBUG3, |
| (errmsg_internal("%s: PostmasterMain: initial environment dump:", |
| progname))); |
| ereport(DEBUG3, |
| (errmsg_internal("-----------------------------------------"))); |
| for (p = environ; *p; ++p) |
| ereport(DEBUG3, |
| (errmsg_internal("\t%s", *p))); |
| ereport(DEBUG3, |
| (errmsg_internal("-----------------------------------------"))); |
| } |
| |
| /* |
| * Create lockfile for data directory. |
| * |
| * We want to do this before we try to grab the input sockets, because the |
| * data directory interlock is more reliable than the socket-file |
| * interlock (thanks to whoever decided to put socket files in /tmp :-(). |
| * For the same reason, it's best to grab the TCP socket(s) before the |
| * Unix socket(s). |
| * |
| * Also note that this internally sets up the on_proc_exit function that |
| * is responsible for removing both data directory and socket lockfiles; |
| * so it must happen before opening sockets so that at exit, the socket |
| * lockfiles go away after CloseServerPorts runs. |
| */ |
| CreateDataDirLockFile(true); |
| |
| /* |
| * Remember postmaster startup time |
| * CDB: Moved this code up from below for use in error message headers. |
| */ |
| PgStartTime = GetCurrentTimestamp(); |
| |
| /* |
| * Read the control file (for error checking and config info). |
| * |
| * Since we verify the control file's CRC, this has a useful side effect |
| * on machines where we need a run-time test for CRC support instructions. |
| * The postmaster will do the test once at startup, and then its child |
| * processes will inherit the correct function pointer and not need to |
| * repeat the test. |
| */ |
| LocalProcessControlFile(false); |
| |
| /* |
| * CDB: gpdb auxilary process like fts probe, dtx recovery process is |
| * essential, we need to load them ahead of custom shared preload libraries |
| * to avoid exceeding max_worker_processes. |
| */ |
| load_auxiliary_libraries(); |
| |
| /* |
| * Register the apply launcher. Since it registers a background worker, |
| * it needs to be called before InitializeMaxBackends(), and it's probably |
| * a good idea to call it before any modules had chance to take the |
| * background worker slots. |
| */ |
| ApplyLauncherRegister(); |
| |
| /* |
| * process any libraries that should be preloaded at postmaster start |
| */ |
| process_shared_preload_libraries(); |
| |
| /* |
| * Set CurrentMotionIPCLayer after interconnect.so is loaded. |
| */ |
| InitializeCurrentMotionIPCLayer(); |
| |
| /* |
| * Initialize SSL library, if specified. |
| */ |
| #ifdef USE_SSL |
| if (EnableSSL) |
| { |
| (void) secure_initialize(true); |
| LoadedSSL = true; |
| } |
| #endif |
| |
| /* |
| * Now that loadable modules have had their chance to register background |
| * workers, calculate MaxBackends. |
| */ |
| InitializeMaxBackends(); |
| |
| /* |
| * Now that modules have been loaded, we can process any custom resource |
| * managers specified in the wal_consistency_checking GUC. |
| */ |
| InitializeWalConsistencyChecking(); |
| |
| /* |
| * Set up shared memory and semaphores. |
| */ |
| reset_shared(); |
| |
| /* |
| * Estimate number of openable files. This must happen after setting up |
| * semaphores, because on some platforms semaphores count as open files. |
| */ |
| set_max_safe_fds(); |
| |
| /* |
| * Set reference point for stack-depth checking. |
| */ |
| (void) set_stack_base(); |
| |
| /* |
| * Initialize pipe (or process handle on Windows) that allows children to |
| * wake up from sleep on postmaster death. |
| */ |
| InitPostmasterDeathWatchHandle(); |
| |
| #ifdef WIN32 |
| |
| /* |
| * Initialize I/O completion port used to deliver list of dead children. |
| */ |
| win32ChildQueue = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 1); |
| if (win32ChildQueue == NULL) |
| ereport(FATAL, |
| (errmsg("could not create I/O completion port for child queue"))); |
| #endif |
| |
| #ifdef EXEC_BACKEND |
| /* Write out nondefault GUC settings for child processes to use */ |
| write_nondefault_variables(PGC_POSTMASTER); |
| |
| /* |
| * Clean out the temp directory used to transmit parameters to child |
| * processes (see internal_forkexec, below). We must do this before |
| * launching any child processes, else we have a race condition: we could |
| * remove a parameter file before the child can read it. It should be |
| * safe to do so now, because we verified earlier that there are no |
| * conflicting Postgres processes in this data directory. |
| */ |
| RemovePgTempFilesInDir(PG_TEMP_FILES_DIR, true, false); |
| #endif |
| |
| /* |
| * Forcibly remove the files signaling a standby promotion request. |
| * Otherwise, the existence of those files triggers a promotion too early, |
| * whether a user wants that or not. |
| * |
| * This removal of files is usually unnecessary because they can exist |
| * only during a few moments during a standby promotion. However there is |
| * a race condition: if pg_ctl promote is executed and creates the files |
| * during a promotion, the files can stay around even after the server is |
| * brought up to be the primary. Then, if a new standby starts by using |
| * the backup taken from the new primary, the files can exist at server |
| * startup and must be removed in order to avoid an unexpected promotion. |
| * |
| * Note that promotion signal files need to be removed before the startup |
| * process is invoked. Because, after that, they can be used by |
| * postmaster's SIGUSR1 signal handler. |
| */ |
| RemovePromoteSignalFiles(); |
| |
| /* Do the same for logrotate signal file */ |
| RemoveLogrotateSignalFiles(); |
| |
| /* Remove any outdated file holding the current log filenames. */ |
| if (unlink(LOG_METAINFO_DATAFILE) < 0 && errno != ENOENT) |
| ereport(LOG, |
| (errcode_for_file_access(), |
| errmsg("could not remove file \"%s\": %m", |
| LOG_METAINFO_DATAFILE))); |
| |
| /* |
| * If enabled, start up syslogger collection subprocess |
| */ |
| SysLoggerPID = SysLogger_Start(); |
| |
| /* |
| * Reset whereToSendOutput from DestDebug (its starting state) to |
| * DestNone. This stops ereport from sending log messages to stderr unless |
| * Log_destination permits. We don't do this until the postmaster is |
| * fully launched, since startup failures may as well be reported to |
| * stderr. |
| * |
| * If we are in fact disabling logging to stderr, first emit a log message |
| * saying so, to provide a breadcrumb trail for users who may not remember |
| * that their logging is configured to go somewhere else. |
| */ |
| if (!(Log_destination & LOG_DESTINATION_STDERR)) |
| ereport(LOG, |
| (errmsg("ending log output to stderr"), |
| errhint("Future log output will go to log destination \"%s\".", |
| Log_destination_string))); |
| |
| whereToSendOutput = DestNone; |
| |
| /* |
| * Report server startup in log. While we could emit this much earlier, |
| * it seems best to do so after starting the log collector, if we intend |
| * to use one. |
| */ |
| ereport(LOG, |
| (errmsg("starting %s", PG_VERSION_STR))); |
| |
| /* |
| * Establish input sockets. |
| * |
| * First, mark them all closed, and set up an on_proc_exit function that's |
| * charged with closing the sockets again at postmaster shutdown. |
| */ |
| for (i = 0; i < MAXLISTEN; i++) |
| ListenSocket[i] = PGINVALID_SOCKET; |
| |
| on_proc_exit(CloseServerPorts, 0); |
| |
| if (ListenAddresses) |
| { |
| char *rawstring; |
| List *elemlist; |
| ListCell *l; |
| int success = 0; |
| |
| /* Need a modifiable copy of ListenAddresses */ |
| rawstring = pstrdup(ListenAddresses); |
| |
| /* Parse string into list of hostnames */ |
| if (!SplitGUCList(rawstring, ',', &elemlist)) |
| { |
| /* syntax error in list */ |
| ereport(FATAL, |
| (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
| errmsg("invalid list syntax in parameter \"%s\"", |
| "listen_addresses"))); |
| } |
| |
| foreach(l, elemlist) |
| { |
| char *curhost = (char *) lfirst(l); |
| |
| if (strcmp(curhost, "*") == 0) |
| status = StreamServerPort(AF_UNSPEC, NULL, |
| (unsigned short) PostPortNumber, |
| NULL, |
| ListenSocket, MAXLISTEN); |
| else |
| status = StreamServerPort(AF_UNSPEC, curhost, |
| (unsigned short) PostPortNumber, |
| NULL, |
| ListenSocket, MAXLISTEN); |
| |
| if (status == STATUS_OK) |
| { |
| success++; |
| /* record the first successful host addr in lockfile */ |
| if (!listen_addr_saved) |
| { |
| AddToDataDirLockFile(LOCK_FILE_LINE_LISTEN_ADDR, curhost); |
| listen_addr_saved = true; |
| } |
| } |
| else |
| ereport(WARNING, |
| (errmsg("could not create listen socket for \"%s\"", |
| curhost))); |
| } |
| |
| if (!success && elemlist != NIL) |
| ereport(FATAL, |
| (errmsg("could not create any TCP/IP sockets"))); |
| |
| list_free(elemlist); |
| pfree(rawstring); |
| } |
| |
| #ifdef USE_BONJOUR |
| /* Register for Bonjour only if we opened TCP socket(s) */ |
| if (enable_bonjour && ListenSocket[0] != PGINVALID_SOCKET) |
| { |
| DNSServiceErrorType err; |
| |
| /* |
| * We pass 0 for interface_index, which will result in registering on |
| * all "applicable" interfaces. It's not entirely clear from the |
| * DNS-SD docs whether this would be appropriate if we have bound to |
| * just a subset of the available network interfaces. |
| */ |
| err = DNSServiceRegister(&bonjour_sdref, |
| 0, |
| 0, |
| bonjour_name, |
| "_postgresql._tcp.", |
| NULL, |
| NULL, |
| pg_hton16(PostPortNumber), |
| 0, |
| NULL, |
| NULL, |
| NULL); |
| if (err != kDNSServiceErr_NoError) |
| ereport(LOG, |
| (errmsg("DNSServiceRegister() failed: error code %ld", |
| (long) err))); |
| |
| /* |
| * We don't bother to read the mDNS daemon's reply, and we expect that |
| * it will automatically terminate our registration when the socket is |
| * closed at postmaster termination. So there's nothing more to be |
| * done here. However, the bonjour_sdref is kept around so that |
| * forked children can close their copies of the socket. |
| */ |
| } |
| #endif |
| |
| #ifdef HAVE_UNIX_SOCKETS |
| if (Unix_socket_directories) |
| { |
| char *rawstring; |
| List *elemlist; |
| ListCell *l; |
| int success = 0; |
| |
| /* Need a modifiable copy of Unix_socket_directories */ |
| rawstring = pstrdup(Unix_socket_directories); |
| |
| /* Parse string into list of directories */ |
| if (!SplitDirectoriesString(rawstring, ',', &elemlist)) |
| { |
| /* syntax error in list */ |
| ereport(FATAL, |
| (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
| errmsg("invalid list syntax in parameter \"%s\"", |
| "unix_socket_directories"))); |
| } |
| |
| foreach(l, elemlist) |
| { |
| char *socketdir = (char *) lfirst(l); |
| |
| status = StreamServerPort(AF_UNIX, NULL, |
| (unsigned short) PostPortNumber, |
| socketdir, |
| ListenSocket, MAXLISTEN); |
| |
| if (status == STATUS_OK) |
| { |
| success++; |
| /* record the first successful Unix socket in lockfile */ |
| if (success == 1) |
| AddToDataDirLockFile(LOCK_FILE_LINE_SOCKET_DIR, socketdir); |
| } |
| else |
| ereport(WARNING, |
| (errmsg("could not create Unix-domain socket in directory \"%s\"", |
| socketdir))); |
| } |
| |
| if (!success && elemlist != NIL) |
| ereport(FATAL, |
| (errmsg("could not create any Unix-domain sockets"))); |
| |
| list_free_deep(elemlist); |
| pfree(rawstring); |
| } |
| #endif |
| |
| if (terminal_fd != -1) |
| close(terminal_fd); |
| |
| /* |
| * check that we have some socket to listen on |
| */ |
| if (ListenSocket[0] == PGINVALID_SOCKET) |
| ereport(FATAL, |
| (errmsg("no socket created for listening"))); |
| |
| /* |
| * If no valid TCP ports, write an empty line for listen address, |
| * indicating the Unix socket must be used. Note that this line is not |
| * added to the lock file until there is a socket backing it. |
| */ |
| if (!listen_addr_saved) |
| AddToDataDirLockFile(LOCK_FILE_LINE_LISTEN_ADDR, ""); |
| |
| /* |
| * Record postmaster options. We delay this till now to avoid recording |
| * bogus options (eg, unusable port number). |
| */ |
| if (!CreateOptsFile(argc, argv, my_exec_path)) |
| ExitPostmaster(1); |
| |
| /* |
| * Write the external PID file if requested |
| */ |
| if (external_pid_file) |
| { |
| FILE *fpidfile = fopen(external_pid_file, "w"); |
| |
| if (fpidfile) |
| { |
| fprintf(fpidfile, "%d\n", MyProcPid); |
| fclose(fpidfile); |
| |
| /* Make PID file world readable */ |
| if (chmod(external_pid_file, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH) != 0) |
| write_stderr("%s: could not change permissions of external PID file \"%s\": %s\n", |
| progname, external_pid_file, strerror(errno)); |
| } |
| else |
| write_stderr("%s: could not write external PID file \"%s\": %s\n", |
| progname, external_pid_file, strerror(errno)); |
| |
| on_proc_exit(unlink_external_pid_file, 0); |
| } |
| |
| /* |
| * Remove old temporary files. At this point there can be no other |
| * Postgres processes running in this directory, so this should be safe. |
| */ |
| RemovePgTempFiles(); |
| |
| /* If enabled, init cgroup */ |
| if (IsResGroupEnabled()) |
| initCgroup(); |
| |
| /* |
| * Initialize stats collection subsystem (this does NOT start the |
| * collector process!) |
| */ |
| pgstat_init(); |
| |
| /* |
| * Initialize the autovacuum subsystem (again, no process start yet) |
| */ |
| autovac_init(); |
| |
| /* |
| * Load configuration files for client authentication. |
| */ |
| if (!load_hba()) |
| { |
| /* |
| * It makes no sense to continue if we fail to load the HBA file, |
| * since there is no way to connect to the database in this case. |
| */ |
| ereport(FATAL, |
| (errcode(ERRCODE_CONFIG_FILE_ERROR), |
| (errmsg("could not load pg_hba.conf")))); |
| } |
| if (!load_ident()) |
| { |
| /* |
| * We can start up without the IDENT file, although it means that you |
| * cannot log in using any of the authentication methods that need a |
| * user name mapping. load_ident() already logged the details of error |
| * to the log. |
| */ |
| } |
| |
| #ifdef HAVE_PTHREAD_IS_THREADED_NP |
| |
| /* |
| * On macOS, libintl replaces setlocale() with a version that calls |
| * CFLocaleCopyCurrent() when its second argument is "" and every relevant |
| * environment variable is unset or empty. CFLocaleCopyCurrent() makes |
| * the process multithreaded. The postmaster calls sigprocmask() and |
| * calls fork() without an immediate exec(), both of which have undefined |
| * behavior in a multithreaded program. A multithreaded postmaster is the |
| * normal case on Windows, which offers neither fork() nor sigprocmask(). |
| */ |
| if (pthread_is_threaded_np() != 0) |
| ereport(FATAL, |
| (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), |
| errmsg("postmaster became multithreaded during startup"), |
| errhint("Set the LC_ALL environment variable to a valid locale."))); |
| #endif |
| |
| /* |
| * Remember postmaster startup time |
| */ |
| PgStartTime = GetCurrentTimestamp(); |
| |
| /* |
| * Report postmaster status in the postmaster.pid file, to allow pg_ctl to |
| * see what's happening. |
| */ |
| AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STARTING); |
| |
| /* |
| * We're ready to rock and roll... |
| */ |
| StartupPID = StartupDataBase(); |
| Assert(StartupPID != 0); |
| StartupStatus = STARTUP_RUNNING; |
| pmState = PM_STARTUP; |
| |
| /* Some workers may be scheduled to start now */ |
| maybe_start_bgworkers(); |
| |
| status = ServerLoop(); |
| |
| /* |
| * ServerLoop probably shouldn't ever return, but if it does, close down. |
| */ |
| ExitPostmaster(status != STATUS_OK); |
| |
| abort(); /* not reached */ |
| } |
| |
| |
| /* |
| * on_proc_exit callback to close server's listen sockets |
| */ |
| static void |
| CloseServerPorts(int status, Datum arg) |
| { |
| int i; |
| |
| /* |
| * First, explicitly close all the socket FDs. We used to just let this |
| * happen implicitly at postmaster exit, but it's better to close them |
| * before we remove the postmaster.pid lockfile; otherwise there's a race |
| * condition if a new postmaster wants to re-use the TCP port number. |
| */ |
| for (i = 0; i < MAXLISTEN; i++) |
| { |
| if (ListenSocket[i] != PGINVALID_SOCKET) |
| { |
| StreamClose(ListenSocket[i]); |
| ListenSocket[i] = PGINVALID_SOCKET; |
| } |
| } |
| |
| /* |
| * Next, remove any filesystem entries for Unix sockets. To avoid race |
| * conditions against incoming postmasters, this must happen after closing |
| * the sockets and before removing lock files. |
| */ |
| RemoveSocketFiles(); |
| |
| /* |
| * We don't do anything about socket lock files here; those will be |
| * removed in a later on_proc_exit callback. |
| */ |
| } |
| |
| /* |
| * on_proc_exit callback to delete external_pid_file |
| */ |
| static void |
| unlink_external_pid_file(int status, Datum arg) |
| { |
| if (external_pid_file) |
| unlink(external_pid_file); |
| } |
| |
| |
| /* |
| * Compute and check the directory paths to files that are part of the |
| * installation (as deduced from the postgres executable's own location) |
| */ |
| static void |
| getInstallationPaths(const char *argv0) |
| { |
| DIR *pdir; |
| |
| /* Locate the postgres executable itself */ |
| if (find_my_exec(argv0, my_exec_path) < 0) |
| ereport(FATAL, |
| (errmsg("%s: could not locate my own executable path", argv0))); |
| |
| #ifdef EXEC_BACKEND |
| /* Locate executable backend before we change working directory */ |
| if (find_other_exec(argv0, "postgres", PG_BACKEND_VERSIONSTR, |
| postgres_exec_path) < 0) |
| ereport(FATAL, |
| (errmsg("%s: could not locate matching postgres executable", |
| argv0))); |
| #endif |
| |
| /* |
| * Locate the pkglib directory --- this has to be set early in case we try |
| * to load any modules from it in response to postgresql.conf entries. |
| */ |
| get_pkglib_path(my_exec_path, pkglib_path); |
| |
| /* |
| * Verify that there's a readable directory there; otherwise the Postgres |
| * installation is incomplete or corrupt. (A typical cause of this |
| * failure is that the postgres executable has been moved or hardlinked to |
| * some directory that's not a sibling of the installation lib/ |
| * directory.) |
| */ |
| pdir = AllocateDir(pkglib_path); |
| if (pdir == NULL) |
| ereport(ERROR, |
| (errcode_for_file_access(), |
| errmsg("could not open directory \"%s\": %m", |
| pkglib_path), |
| errhint("This may indicate an incomplete PostgreSQL installation, or that the file \"%s\" has been moved away from its proper location.", |
| my_exec_path))); |
| FreeDir(pdir); |
| |
| /* |
| * XXX is it worth similarly checking the share/ directory? If the lib/ |
| * directory is there, then share/ probably is too. |
| */ |
| } |
| |
| /* |
| * Check that pg_control exists in the correct location in the data directory. |
| * |
| * No attempt is made to validate the contents of pg_control here. This is |
| * just a sanity check to see if we are looking at a real data directory. |
| */ |
| static void |
| checkControlFile(void) |
| { |
| char path[MAXPGPATH]; |
| FILE *fp; |
| |
| snprintf(path, sizeof(path), "%s/global/pg_control", DataDir); |
| |
| fp = AllocateFile(path, PG_BINARY_R); |
| if (fp == NULL) |
| { |
| write_stderr("%s: could not find the database system\n" |
| "Expected to find it in the directory \"%s\",\n" |
| "but could not open file \"%s\": %s\n", |
| progname, DataDir, path, strerror(errno)); |
| ExitPostmaster(2); |
| } |
| FreeFile(fp); |
| } |
| |
| |
| /* |
| * check if file or directory under "DataDir" exists and is accessible |
| */ |
| static void |
| checkPgDir(const char *dir) |
| { |
| struct stat st; |
| /* |
| * DataDir is known to be smaller than MAXPGPATH, and 'dir' argument is always |
| * a short constant. |
| */ |
| char buf[MAXPGPATH + MAXPGPATH]; |
| |
| snprintf(buf, sizeof(buf), "%s%s", DataDir, dir); |
| |
| if (stat(buf, &st) != 0) |
| { |
| /* check if log is there */ |
| snprintf(buf, sizeof(buf), "%s%s", DataDir, "/log"); |
| if (stat(buf, &st) == 0) |
| elog(LOG, "System file or directory missing (%s), shutting down segment", dir); |
| |
| /* quit all processes and exit */ |
| pmdie(SIGQUIT); |
| } |
| } |
| |
| /* |
| * Determine how long should we let ServerLoop sleep. |
| * |
| * In normal conditions we wait at most one minute, to ensure that the other |
| * background tasks handled by ServerLoop get done even when no requests are |
| * arriving. However, if there are background workers waiting to be started, |
| * we don't actually sleep so that they are quickly serviced. Other exception |
| * cases are as shown in the code. |
| */ |
| static void |
| DetermineSleepTime(struct timeval *timeout) |
| { |
| TimestampTz next_wakeup = 0; |
| |
| /* |
| * Normal case: either there are no background workers at all, or we're in |
| * a shutdown sequence (during which we ignore bgworkers altogether). |
| */ |
| if (Shutdown > NoShutdown || |
| (!StartWorkerNeeded && !HaveCrashedWorker)) |
| { |
| if (AbortStartTime != 0) |
| { |
| /* time left to abort; clamp to 0 in case it already expired */ |
| timeout->tv_sec = SIGKILL_CHILDREN_AFTER_SECS - |
| (time(NULL) - AbortStartTime); |
| timeout->tv_sec = Max(timeout->tv_sec, 0); |
| timeout->tv_usec = 0; |
| } |
| else |
| { |
| timeout->tv_sec = 60; |
| timeout->tv_usec = 0; |
| } |
| return; |
| } |
| |
| if (StartWorkerNeeded) |
| { |
| timeout->tv_sec = 0; |
| timeout->tv_usec = 0; |
| return; |
| } |
| |
| if (HaveCrashedWorker) |
| { |
| slist_mutable_iter siter; |
| |
| /* |
| * When there are crashed bgworkers, we sleep just long enough that |
| * they are restarted when they request to be. Scan the list to |
| * determine the minimum of all wakeup times according to most recent |
| * crash time and requested restart interval. |
| */ |
| slist_foreach_modify(siter, &BackgroundWorkerList) |
| { |
| RegisteredBgWorker *rw; |
| TimestampTz this_wakeup; |
| |
| rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur); |
| |
| if (rw->rw_crashed_at == 0) |
| continue; |
| |
| if (rw->rw_worker.bgw_restart_time == BGW_NEVER_RESTART |
| || rw->rw_terminate) |
| { |
| ForgetBackgroundWorker(&siter); |
| continue; |
| } |
| |
| this_wakeup = TimestampTzPlusMilliseconds(rw->rw_crashed_at, |
| 1000L * rw->rw_worker.bgw_restart_time); |
| if (next_wakeup == 0 || this_wakeup < next_wakeup) |
| next_wakeup = this_wakeup; |
| } |
| } |
| |
| if (next_wakeup != 0) |
| { |
| long secs; |
| int microsecs; |
| |
| TimestampDifference(GetCurrentTimestamp(), next_wakeup, |
| &secs, µsecs); |
| timeout->tv_sec = secs; |
| timeout->tv_usec = microsecs; |
| |
| /* Ensure we don't exceed one minute */ |
| if (timeout->tv_sec > 60) |
| { |
| timeout->tv_sec = 60; |
| timeout->tv_usec = 0; |
| } |
| } |
| else |
| { |
| timeout->tv_sec = 60; |
| timeout->tv_usec = 0; |
| } |
| } |
| |
| /* |
| * Main idle loop of postmaster |
| * |
| * NB: Needs to be called with signals blocked |
| */ |
| static int |
| ServerLoop(void) |
| { |
| fd_set readmask; |
| int nSockets; |
| time_t last_lockfile_recheck_time, |
| last_touch_time; |
| |
| last_lockfile_recheck_time = last_touch_time = time(NULL); |
| |
| nSockets = initMasks(&readmask); |
| |
| for (;;) |
| { |
| fd_set rmask; |
| int selres; |
| time_t now; |
| |
| /* |
| * Wait for a connection request to arrive. |
| * |
| * We block all signals except while sleeping. That makes it safe for |
| * signal handlers, which again block all signals while executing, to |
| * do nontrivial work. |
| * |
| * If we are in PM_WAIT_DEAD_END state, then we don't want to accept |
| * any new connections, so we don't call select(), and just sleep. |
| */ |
| memcpy((char *) &rmask, (char *) &readmask, sizeof(fd_set)); |
| |
| if (pmState == PM_WAIT_DEAD_END) |
| { |
| PG_SETMASK(&UnBlockSig); |
| |
| pg_usleep(100000L); /* 100 msec seems reasonable */ |
| selres = 0; |
| |
| PG_SETMASK(&BlockSig); |
| } |
| else |
| { |
| /* must set timeout each time; some OSes change it! */ |
| struct timeval timeout; |
| |
| /* Needs to run with blocked signals! */ |
| DetermineSleepTime(&timeout); |
| |
| PG_SETMASK(&UnBlockSig); |
| |
| selres = select(nSockets, &rmask, NULL, NULL, &timeout); |
| |
| PG_SETMASK(&BlockSig); |
| } |
| |
| /* Now check the select() result */ |
| if (selres < 0) |
| { |
| if (errno != EINTR && errno != EWOULDBLOCK) |
| { |
| ereport(LOG, |
| (errcode_for_socket_access(), |
| errmsg("select() failed in postmaster: %m"))); |
| return STATUS_ERROR; |
| } |
| } |
| |
| /* Sanity check for system directories on all segments */ |
| checkPgDir(""); |
| checkPgDir("/base"); |
| checkPgDir("/global"); |
| checkPgDir("/pg_twophase"); |
| checkPgDir("/pg_distributedlog"); |
| checkPgDir("/pg_multixact"); |
| checkPgDir("/pg_subtrans"); |
| checkPgDir("/pg_wal"); |
| checkPgDir("/pg_xact"); |
| checkPgDir("/pg_multixact/members"); |
| checkPgDir("/pg_multixact/offsets"); |
| |
| /* |
| * Block all signals until we wait again. (This makes it safe for our |
| * signal handlers to do nontrivial work.) |
| */ |
| PG_SETMASK(&BlockSig); |
| |
| /* |
| * New connection pending on any of our sockets? If so, fork a child |
| * process to deal with it. |
| */ |
| if (selres > 0) |
| { |
| int i; |
| |
| for (i = 0; i < MAXLISTEN; i++) |
| { |
| if (ListenSocket[i] == PGINVALID_SOCKET) |
| break; |
| if (FD_ISSET(ListenSocket[i], &rmask)) |
| { |
| Port *port; |
| |
| port = ConnCreate(ListenSocket[i]); |
| if (port) |
| { |
| BackendStartup(port); |
| |
| /* |
| * We no longer need the open socket or port structure |
| * in this process |
| */ |
| StreamClose(port->sock); |
| ConnFree(port); |
| } |
| } |
| } |
| } |
| |
| /* If we have lost the log collector, try to start a new one */ |
| if (SysLoggerPID == 0 && Logging_collector) |
| SysLoggerPID = SysLogger_Start(); |
| |
| /* |
| * If no background writer process is running, and we are not in a |
| * state that prevents it, start one. It doesn't matter if this |
| * fails, we'll just try again later. Likewise for the checkpointer. |
| */ |
| if (pmState == PM_RUN || pmState == PM_RECOVERY || |
| pmState == PM_HOT_STANDBY) |
| { |
| if (CheckpointerPID == 0) |
| CheckpointerPID = StartCheckpointer(); |
| if (BgWriterPID == 0) |
| BgWriterPID = StartBackgroundWriter(); |
| } |
| |
| /* |
| * Likewise, if we have lost the walwriter process, try to start a new |
| * one. But this is needed only in normal operation (else we cannot |
| * be writing any new WAL). |
| */ |
| if (WalWriterPID == 0 && pmState == PM_RUN) |
| WalWriterPID = StartWalWriter(); |
| |
| /* |
| * If we have lost the autovacuum launcher, try to start a new one. We |
| * don't want autovacuum to run in binary upgrade mode because |
| * autovacuum might update relfrozenxid for empty tables before the |
| * physical files are put in place. |
| */ |
| if (!IsBinaryUpgrade && AutoVacPID == 0 && |
| (AutoVacuumingActive() || start_autovac_launcher) && |
| pmState == PM_RUN) |
| { |
| AutoVacPID = StartAutoVacLauncher(); |
| if (AutoVacPID != 0) |
| start_autovac_launcher = false; /* signal processed */ |
| } |
| |
| /* If we have lost the stats collector, try to start a new one */ |
| if (PgStatPID == 0 && |
| (pmState == PM_RUN || pmState == PM_HOT_STANDBY)) |
| PgStatPID = pgstat_start(); |
| |
| /* If we have lost the archiver, try to start a new one. */ |
| if (PgArchPID == 0 && PgArchStartupAllowed()) |
| PgArchPID = StartArchiver(); |
| |
| if (enable_password_profile && LoginMonitorPID == 0 && pmState == PM_RUN) |
| LoginMonitorPID = StartLoginMonitorLauncher(); |
| |
| /* If we need to signal the autovacuum launcher, do so now */ |
| if (avlauncher_needs_signal) |
| { |
| avlauncher_needs_signal = false; |
| if (AutoVacPID != 0) |
| kill(AutoVacPID, SIGUSR2); |
| } |
| |
| /* If we need to start a WAL receiver, try to do that now */ |
| if (WalReceiverRequested) |
| MaybeStartWalReceiver(); |
| |
| /* Get other worker processes running, if needed */ |
| if (StartWorkerNeeded || HaveCrashedWorker) |
| maybe_start_bgworkers(); |
| |
| #ifdef HAVE_PTHREAD_IS_THREADED_NP |
| |
| /* |
| * With assertions enabled, check regularly for appearance of |
| * additional threads. All builds check at start and exit. |
| */ |
| Assert(pthread_is_threaded_np() == 0); |
| #endif |
| |
| /* |
| * Lastly, check to see if it's time to do some things that we don't |
| * want to do every single time through the loop, because they're a |
| * bit expensive. Note that there's up to a minute of slop in when |
| * these tasks will be performed, since DetermineSleepTime() will let |
| * us sleep at most that long; except for SIGKILL timeout which has |
| * special-case logic there. |
| */ |
| now = time(NULL); |
| |
| /* |
| * If we already sent SIGQUIT to children and they are slow to shut |
| * down, it's time to send them SIGKILL. This doesn't happen |
| * normally, but under certain conditions backends can get stuck while |
| * shutting down. This is a last measure to get them unwedged. |
| * |
| * Note we also do this during recovery from a process crash. |
| */ |
| if ((Shutdown >= ImmediateShutdown || (FatalError && !SendStop)) && |
| AbortStartTime != 0 && |
| (now - AbortStartTime) >= SIGKILL_CHILDREN_AFTER_SECS) |
| { |
| #ifdef FAULT_INJECTOR |
| if (SIMPLE_FAULT_INJECTOR("postmaster_server_loop_no_sigkill") == FaultInjectorTypeSkip) |
| { |
| /* |
| * This prevents sending SIGKILL to child processes for testing purpose. |
| * Since each time hitting this fault will print a log, let's wait 0.1s just |
| * not to overwhelm the logs. Reaching here means we are shutting down so |
| * making postmaster slower should be OK (only for testing anyway). |
| */ |
| pg_usleep(100000L); |
| continue; |
| } |
| #endif |
| /* We were gentle with them before. Not anymore */ |
| ereport(LOG, |
| (errmsg("issuing SIGKILL to recalcitrant children"))); |
| TerminateChildren(SIGKILL); |
| /* reset flag so we don't SIGKILL again */ |
| AbortStartTime = 0; |
| } |
| |
| /* |
| * Once a minute, verify that postmaster.pid hasn't been removed or |
| * overwritten. If it has, we force a shutdown. This avoids having |
| * postmasters and child processes hanging around after their database |
| * is gone, and maybe causing problems if a new database cluster is |
| * created in the same place. It also provides some protection |
| * against a DBA foolishly removing postmaster.pid and manually |
| * starting a new postmaster. Data corruption is likely to ensue from |
| * that anyway, but we can minimize the damage by aborting ASAP. |
| */ |
| if (now - last_lockfile_recheck_time >= 1 * SECS_PER_MINUTE) |
| { |
| if (!RecheckDataDirLockFile()) |
| { |
| ereport(LOG, |
| (errmsg("performing immediate shutdown because data directory lock file is invalid"))); |
| kill(MyProcPid, SIGQUIT); |
| } |
| last_lockfile_recheck_time = now; |
| } |
| |
| /* |
| * Touch Unix socket and lock files every 58 minutes, to ensure that |
| * they are not removed by overzealous /tmp-cleaning tasks. We assume |
| * no one runs cleaners with cutoff times of less than an hour ... |
| */ |
| if (now - last_touch_time >= 58 * SECS_PER_MINUTE) |
| { |
| TouchSocketFiles(); |
| TouchSocketLockFiles(); |
| last_touch_time = now; |
| } |
| } |
| } |
| |
| /* |
| * Initialise the masks for select() for the ports we are listening on. |
| * Return the number of sockets to listen on. |
| */ |
| static int |
| initMasks(fd_set *rmask) |
| { |
| int maxsock = -1; |
| int i; |
| |
| FD_ZERO(rmask); |
| |
| for (i = 0; i < MAXLISTEN; i++) |
| { |
| int fd = ListenSocket[i]; |
| |
| if (fd == PGINVALID_SOCKET) |
| break; |
| FD_SET(fd, rmask); |
| |
| if (fd > maxsock) |
| maxsock = fd; |
| } |
| |
| return maxsock + 1; |
| } |
| |
| /* |
| * Once the flag is reset, libpq connections (e.g. FTS probe requests) should |
| * not get CAC_MIRROR_READY response. This flag is needed during GPDB startup |
| * to enable "pg_ctl -w". It need not interfere during or after promotion. |
| * This function is called right after removing RECOVERY_COMMAND_FILE upon |
| * receiving a promotion request. |
| */ |
| void inline |
| ResetMirrorReadyFlag(void) |
| { |
| *pm_launch_walreceiver = false; |
| } |
| |
| static inline void |
| SetMirrorReadyFlag(void) |
| { |
| *pm_launch_walreceiver = true; |
| } |
| |
| static inline bool |
| GetMirrorReadyFlag(void) |
| { |
| return *pm_launch_walreceiver; |
| } |
| |
| /* |
| * Read a client's startup packet and do something according to it. |
| * |
| * Returns STATUS_OK or STATUS_ERROR, or might call ereport(FATAL) and |
| * not return at all. |
| * |
| * (Note that ereport(FATAL) stuff is sent to the client, so only use it |
| * if that's what you want. Return STATUS_ERROR if you don't want to |
| * send anything to the client, which would typically be appropriate |
| * if we detect a communications failure.) |
| * |
| * Set ssl_done and/or gss_done when negotiation of an encrypted layer |
| * (currently, TLS or GSSAPI) is completed. A successful negotiation of either |
| * encryption layer sets both flags, but a rejected negotiation sets only the |
| * flag for that layer, since the client may wish to try the other one. We |
| * should make no assumption here about the order in which the client may make |
| * requests. |
| */ |
| static int |
| ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done) |
| { |
| int32 len; |
| char *buf; |
| ProtocolVersion proto; |
| MemoryContext oldcontext; |
| char *gpqeid = NULL; |
| XLogRecPtr recptr; |
| |
| pq_startmsgread(); |
| |
| /* |
| * Grab the first byte of the length word separately, so that we can tell |
| * whether we have no data at all or an incomplete packet. (This might |
| * sound inefficient, but it's not really, because of buffering in |
| * pqcomm.c.) |
| */ |
| if (pq_getbytes((char *) &len, 1) == EOF) |
| { |
| /* |
| * If we get no data at all, don't clutter the log with a complaint; |
| * such cases often occur for legitimate reasons. An example is that |
| * we might be here after responding to NEGOTIATE_SSL_CODE, and if the |
| * client didn't like our response, it'll probably just drop the |
| * connection. Service-monitoring software also often just opens and |
| * closes a connection without sending anything. (So do port |
| * scanners, which may be less benign, but it's not really our job to |
| * notice those.) |
| */ |
| return STATUS_ERROR; |
| } |
| |
| if (pq_getbytes(((char *) &len) + 1, 3) == EOF) |
| { |
| /* Got a partial length word, so bleat about that */ |
| if (!ssl_done && !gss_done) |
| ereport(COMMERROR, |
| (errcode(ERRCODE_PROTOCOL_VIOLATION), |
| errmsg("incomplete startup packet"))); |
| return STATUS_ERROR; |
| } |
| |
| len = pg_ntoh32(len); |
| len -= 4; |
| |
| if (len < (int32) sizeof(ProtocolVersion) || |
| len > MAX_STARTUP_PACKET_LENGTH) |
| { |
| ereport(COMMERROR, |
| (errcode(ERRCODE_PROTOCOL_VIOLATION), |
| errmsg("invalid length of startup packet %ld",(long)len))); |
| return STATUS_ERROR; |
| } |
| |
| /* |
| * Allocate space to hold the startup packet, plus one extra byte that's |
| * initialized to be zero. This ensures we will have null termination of |
| * all strings inside the packet. |
| */ |
| buf = palloc(len + 1); |
| buf[len] = '\0'; |
| |
| if (pq_getbytes(buf, len) == EOF) |
| { |
| ereport(COMMERROR, |
| (errcode(ERRCODE_PROTOCOL_VIOLATION), |
| errmsg("incomplete startup packet"))); |
| return STATUS_ERROR; |
| } |
| pq_endmsgread(); |
| |
| /* |
| * The first field is either a protocol version number or a special |
| * request code. |
| */ |
| port->proto = proto = pg_ntoh32(*((ProtocolVersion *) buf)); |
| |
| if (proto == CANCEL_REQUEST_CODE || proto == FINISH_REQUEST_CODE) |
| { |
| processCancelRequest(port, buf, proto); |
| /* Not really an error, but we don't want to proceed further */ |
| return STATUS_ERROR; |
| } |
| |
| if (proto == NEGOTIATE_SSL_CODE && !ssl_done) |
| { |
| char SSLok; |
| |
| #ifdef USE_SSL |
| /* No SSL when disabled or on Unix sockets */ |
| if (!LoadedSSL || IS_AF_UNIX(port->laddr.addr.ss_family)) |
| SSLok = 'N'; |
| else |
| SSLok = 'S'; /* Support for SSL */ |
| #else |
| SSLok = 'N'; /* No support for SSL */ |
| #endif |
| |
| retry1: |
| if (send(port->sock, &SSLok, 1, 0) != 1) |
| { |
| if (errno == EINTR) |
| goto retry1; /* if interrupted, just retry */ |
| ereport(COMMERROR, |
| (errcode_for_socket_access(), |
| errmsg("failed to send SSL negotiation response: %m"))); |
| return STATUS_ERROR; /* close the connection */ |
| } |
| |
| #ifdef USE_SSL |
| if (SSLok == 'S' && secure_open_server(port) == -1) |
| return STATUS_ERROR; |
| #endif |
| |
| /* |
| * At this point we should have no data already buffered. If we do, |
| * it was received before we performed the SSL handshake, so it wasn't |
| * encrypted and indeed may have been injected by a man-in-the-middle. |
| * We report this case to the client. |
| */ |
| if (pq_buffer_has_data()) |
| ereport(FATAL, |
| (errcode(ERRCODE_PROTOCOL_VIOLATION), |
| errmsg("received unencrypted data after SSL request"), |
| errdetail("This could be either a client-software bug or evidence of an attempted man-in-the-middle attack."))); |
| |
| /* |
| * regular startup packet, cancel, etc packet should follow, but not |
| * another SSL negotiation request, and a GSS request should only |
| * follow if SSL was rejected (client may negotiate in either order) |
| */ |
| return ProcessStartupPacket(port, true, SSLok == 'S'); |
| } |
| else if (proto == NEGOTIATE_GSS_CODE && !gss_done) |
| { |
| char GSSok = 'N'; |
| |
| #ifdef ENABLE_GSS |
| /* No GSSAPI encryption when on Unix socket */ |
| if (!IS_AF_UNIX(port->laddr.addr.ss_family)) |
| GSSok = 'G'; |
| #endif |
| |
| while (send(port->sock, &GSSok, 1, 0) != 1) |
| { |
| if (errno == EINTR) |
| continue; |
| ereport(COMMERROR, |
| (errcode_for_socket_access(), |
| errmsg("failed to send GSSAPI negotiation response: %m"))); |
| return STATUS_ERROR; /* close the connection */ |
| } |
| |
| #ifdef ENABLE_GSS |
| if (GSSok == 'G' && secure_open_gssapi(port) == -1) |
| return STATUS_ERROR; |
| #endif |
| |
| /* |
| * At this point we should have no data already buffered. If we do, |
| * it was received before we performed the GSS handshake, so it wasn't |
| * encrypted and indeed may have been injected by a man-in-the-middle. |
| * We report this case to the client. |
| */ |
| if (pq_buffer_has_data()) |
| ereport(FATAL, |
| (errcode(ERRCODE_PROTOCOL_VIOLATION), |
| errmsg("received unencrypted data after GSSAPI encryption request"), |
| errdetail("This could be either a client-software bug or evidence of an attempted man-in-the-middle attack."))); |
| |
| /* |
| * regular startup packet, cancel, etc packet should follow, but not |
| * another GSS negotiation request, and an SSL request should only |
| * follow if GSS was rejected (client may negotiate in either order) |
| */ |
| return ProcessStartupPacket(port, GSSok == 'G', true); |
| } |
| |
| /* Could add additional special packet types here */ |
| |
| /* |
| * Set FrontendProtocol now so that ereport() knows what format to send if |
| * we fail during startup. |
| */ |
| FrontendProtocol = proto; |
| |
| /* Check that the major protocol version is in range. */ |
| if (PG_PROTOCOL_MAJOR(proto) < PG_PROTOCOL_MAJOR(PG_PROTOCOL_EARLIEST) || |
| PG_PROTOCOL_MAJOR(proto) > PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST)) |
| ereport(FATAL, |
| (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
| errmsg("unsupported frontend protocol %u.%u: server supports %u.0 to %u.%u", |
| PG_PROTOCOL_MAJOR(proto), PG_PROTOCOL_MINOR(proto), |
| PG_PROTOCOL_MAJOR(PG_PROTOCOL_EARLIEST), |
| PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST), |
| PG_PROTOCOL_MINOR(PG_PROTOCOL_LATEST)))); |
| |
| /* |
| * Now fetch parameters out of startup packet and save them into the Port |
| * structure. All data structures attached to the Port struct must be |
| * allocated in TopMemoryContext so that they will remain available in a |
| * running backend (even after PostmasterContext is destroyed). We need |
| * not worry about leaking this storage on failure, since we aren't in the |
| * postmaster process anymore. |
| */ |
| oldcontext = MemoryContextSwitchTo(TopMemoryContext); |
| |
| /* Handle protocol version 3 startup packet */ |
| { |
| int32 offset = sizeof(ProtocolVersion); |
| List *unrecognized_protocol_options = NIL; |
| |
| /* |
| * Scan packet body for name/option pairs. We can assume any string |
| * beginning within the packet body is null-terminated, thanks to |
| * zeroing extra byte above. |
| */ |
| port->guc_options = NIL; |
| |
| while (offset < len) |
| { |
| char *nameptr = buf + offset; |
| int32 valoffset; |
| char *valptr; |
| |
| if (*nameptr == '\0') |
| break; /* found packet terminator */ |
| valoffset = offset + strlen(nameptr) + 1; |
| if (valoffset >= len) |
| break; /* missing value, will complain below */ |
| valptr = buf + valoffset; |
| |
| if (strcmp(nameptr, "database") == 0) |
| port->database_name = pstrdup(valptr); |
| else if (strcmp(nameptr, "user") == 0) |
| port->user_name = pstrdup(valptr); |
| else if (strcmp(nameptr, "options") == 0) |
| port->cmdline_options = pstrdup(valptr); |
| else if (strcmp(nameptr, "gpqeid") == 0) |
| { |
| gpqeid = valptr; |
| /* |
| * Normally we do not need set this on QE nodes since QE |
| * postmaster should be launched with GP_ROLE_EXECUTE, but in |
| * the entrydb and the gpexpand case, still need to set this. |
| */ |
| Gp_role = GP_ROLE_EXECUTE; |
| } |
| else if (strcmp(nameptr, "replication") == 0) |
| { |
| /* |
| * Due to backward compatibility concerns the replication |
| * parameter is a hybrid beast which allows the value to be |
| * either boolean or the string 'database'. The latter |
| * connects to a specific database which is e.g. required for |
| * logical decoding while. |
| */ |
| if (strcmp(valptr, "database") == 0) |
| { |
| am_walsender = true; |
| am_db_walsender = true; |
| } |
| else if (!parse_bool(valptr, &am_walsender)) |
| ereport(FATAL, |
| (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
| errmsg("invalid value for parameter \"%s\": \"%s\"", |
| "replication", |
| valptr), |
| errhint("Valid values are: \"false\", 0, \"true\", 1, \"database\"."))); |
| } |
| else if (strncmp(nameptr, "_pq_.", 5) == 0) |
| { |
| /* |
| * Any option beginning with _pq_. is reserved for use as a |
| * protocol-level option, but at present no such options are |
| * defined. |
| */ |
| unrecognized_protocol_options = |
| lappend(unrecognized_protocol_options, pstrdup(nameptr)); |
| } |
| else if (strcmp(nameptr, GPCONN_TYPE) == 0) |
| { |
| am_mirror = IsRoleMirror(); |
| if (strcmp(valptr, GPCONN_TYPE_FTS) == 0) |
| { |
| #ifdef USE_INTERNAL_FTS |
| if (IS_QUERY_DISPATCHER()) |
| ereport(FATAL, |
| (errcode(ERRCODE_PROTOCOL_VIOLATION), |
| errmsg("cannot handle FTS connection on master"))); |
| #endif |
| /* If no defined USE_INTERNAL_FTS |
| * Allow deal fts meesage in master |
| * Also support promote standby when standby_promote_ready is true |
| */ |
| am_ftshandler = true; |
| |
| #ifdef FAULT_INJECTOR |
| if (FaultInjector_InjectFaultIfSet( |
| "fts_conn_startup_packet", |
| DDLNotSpecified, |
| "" /* databaseName */, |
| "" /* tableName */) == FaultInjectorTypeSkip) |
| { |
| /* |
| * If this fault is set to skip, report recovery is |
| * hung. Without this fault recovery is reported as |
| * progressing. |
| */ |
| if (FaultInjector_InjectFaultIfSet( |
| "fts_recovery_in_progress", |
| DDLNotSpecified, |
| "" /* databaseName */, |
| "" /* tableName */) == FaultInjectorTypeSkip) |
| { |
| recptr = last_xlog_replay_location(); |
| } |
| else |
| { |
| time_t counter = time(NULL); |
| |
| recptr = counter; |
| } |
| |
| ereport(FATAL, |
| (errcode(ERRCODE_CANNOT_CONNECT_NOW), |
| errmsg(POSTMASTER_IN_RECOVERY_MSG), |
| errdetail(POSTMASTER_IN_RECOVERY_DETAIL_MSG " %X/%X", |
| (uint32) (recptr >> 32), (uint32) recptr))); |
| } |
| #endif |
| } |
| #ifdef FAULT_INJECTOR |
| else if (strcmp(valptr, GPCONN_TYPE_FAULT) == 0) |
| am_faulthandler = true; |
| #endif |
| else |
| ereport(FATAL, |
| (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
| errmsg("invalid value for option: \"%s\"", GPCONN_TYPE))); |
| } |
| else if (strcmp(nameptr, "diff_options") == 0) |
| { |
| port->diff_options = pstrdup(valptr); |
| } |
| else |
| { |
| /* Assume it's a generic GUC option */ |
| port->guc_options = lappend(port->guc_options, |
| pstrdup(nameptr)); |
| port->guc_options = lappend(port->guc_options, |
| pstrdup(valptr)); |
| |
| /* |
| * Copy application_name to port if we come across it. This |
| * is done so we can log the application_name in the |
| * connection authorization message. Note that the GUC would |
| * be used but we haven't gone through GUC setup yet. |
| */ |
| if (strcmp(nameptr, "application_name") == 0) |
| { |
| char *tmp_app_name = pstrdup(valptr); |
| |
| pg_clean_ascii(tmp_app_name); |
| |
| port->application_name = tmp_app_name; |
| } |
| } |
| offset = valoffset + strlen(valptr) + 1; |
| } |
| |
| /* |
| * If we didn't find a packet terminator exactly at the end of the |
| * given packet length, complain. |
| */ |
| if (offset != len - 1) |
| ereport(FATAL, |
| (errcode(ERRCODE_PROTOCOL_VIOLATION), |
| errmsg("invalid startup packet layout: expected terminator as last byte"))); |
| |
| /* |
| * If the client requested a newer protocol version or if the client |
| * requested any protocol options we didn't recognize, let them know |
| * the newest minor protocol version we do support and the names of |
| * any unrecognized options. |
| */ |
| if (PG_PROTOCOL_MINOR(proto) > PG_PROTOCOL_MINOR(PG_PROTOCOL_LATEST) || |
| unrecognized_protocol_options != NIL) |
| SendNegotiateProtocolVersion(unrecognized_protocol_options); |
| } |
| |
| /* Check a user name was given. */ |
| if (port->user_name == NULL || port->user_name[0] == '\0') |
| ereport(FATAL, |
| (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION), |
| errmsg("no PostgreSQL user name specified in startup packet"))); |
| |
| /* The database defaults to the user name. */ |
| if (port->database_name == NULL || port->database_name[0] == '\0') |
| port->database_name = pstrdup(port->user_name); |
| |
| if (Db_user_namespace) |
| { |
| /* |
| * If user@, it is a global user, remove '@'. We only want to do this |
| * if there is an '@' at the end and no earlier in the user string or |
| * they may fake as a local user of another database attaching to this |
| * database. |
| */ |
| if (strchr(port->user_name, '@') == |
| port->user_name + strlen(port->user_name) - 1) |
| *strchr(port->user_name, '@') = '\0'; |
| else |
| { |
| /* Append '@' and dbname */ |
| port->user_name = psprintf("%s@%s", port->user_name, port->database_name); |
| } |
| } |
| |
| /* |
| * Truncate given database and user names to length of a Postgres name. |
| * This avoids lookup failures when overlength names are given. |
| */ |
| if (strlen(port->database_name) >= NAMEDATALEN) |
| port->database_name[NAMEDATALEN - 1] = '\0'; |
| if (strlen(port->user_name) >= NAMEDATALEN) |
| port->user_name[NAMEDATALEN - 1] = '\0'; |
| |
| if (am_walsender) |
| MyBackendType = B_WAL_SENDER; |
| else |
| MyBackendType = B_BACKEND; |
| |
| /* |
| * Normal walsender backends, e.g. for streaming replication, are not |
| * connected to a particular database. But walsenders used for logical |
| * replication need to connect to a specific database. We allow streaming |
| * replication commands to be issued even if connected to a database as it |
| * can make sense to first make a basebackup and then stream changes |
| * starting from that. |
| */ |
| if ((am_walsender && !am_db_walsender) || am_ftshandler || am_faulthandler) |
| port->database_name[0] = '\0'; |
| |
| /* |
| * CDB: Process "gpqeid" parameter string for qExec startup. |
| */ |
| if (gpqeid) |
| cdbgang_parse_gpqeid_params(port, gpqeid); |
| |
| /* |
| * Done putting stuff in TopMemoryContext. |
| */ |
| MemoryContextSwitchTo(oldcontext); |
| |
| /* |
| * If we're going to reject the connection due to database state, say so |
| * now instead of wasting cycles on an authentication exchange. (This also |
| * allows a pg_ping utility to be written.) |
| */ |
| switch (port->canAcceptConnections) |
| { |
| case CAC_STARTUP: |
| if ((am_ftshandler || am_faulthandler) && am_mirror) |
| break; |
| |
| recptr = last_xlog_replay_location(); |
| |
| ereport(FATAL, |
| (errcode(ERRCODE_CANNOT_CONNECT_NOW), |
| errmsg(POSTMASTER_IN_STARTUP_MSG), |
| errdetail(POSTMASTER_IN_RECOVERY_DETAIL_MSG " %X/%X", |
| (uint32) (recptr >> 32), (uint32) recptr))); |
| break; |
| case CAC_NOTCONSISTENT: |
| if (EnableHotStandby) |
| ereport(FATAL, |
| (errcode(ERRCODE_CANNOT_CONNECT_NOW), |
| errmsg("the database system is not yet accepting connections"), |
| errdetail("Consistent recovery state has not been yet reached."))); |
| else |
| ereport(FATAL, |
| (errcode(ERRCODE_CANNOT_CONNECT_NOW), |
| errmsg("the database system is not accepting connections"), |
| errdetail("Hot standby mode is disabled."))); |
| break; |
| case CAC_SHUTDOWN: |
| ereport(FATAL, |
| (errcode(ERRCODE_CANNOT_CONNECT_NOW), |
| errmsg("the database system is shutting down"))); |
| break; |
| case CAC_RECOVERY: |
| recptr = last_xlog_replay_location(); |
| |
| ereport(FATAL, |
| (errcode(ERRCODE_CANNOT_CONNECT_NOW), |
| errmsg(POSTMASTER_IN_RECOVERY_MSG), |
| errdetail(POSTMASTER_IN_RECOVERY_DETAIL_MSG " %X/%X", |
| (uint32) (recptr >> 32), (uint32) recptr))); |
| break; |
| case CAC_RESET: |
| ereport(FATAL, |
| (errcode(ERRCODE_CANNOT_CONNECT_NOW), |
| errmsg(POSTMASTER_IN_RESET_MSG))); |
| break; |
| case CAC_TOOMANY: |
| ereport(FATAL, |
| (errcode(ERRCODE_TOO_MANY_CONNECTIONS), |
| errmsg("sorry, too many clients already"))); |
| break; |
| case CAC_MIRROR_READY: |
| if (am_ftshandler || am_faulthandler) |
| { |
| /* Even if the connection state is MIRROR_READY, the role |
| * may change to primary during promoting. Hence, we need |
| * to decline this connection to avoid confusion. This needs |
| * to wait until promotion is finished and pmState changed |
| * to PM_RUN. |
| */ |
| if (!am_mirror) |
| ereport(FATAL, |
| (errmsg("mirror is being promoted."))); |
| break; |
| } |
| |
| /* |
| * Allow connections if hot_standby is on and our postmaster is |
| * acting as a standby. |
| */ |
| if (EnableHotStandby) |
| break; |
| |
| recptr = last_xlog_replay_location(); |
| ereport(FATAL, |
| (errcode(ERRCODE_MIRROR_READY), |
| errmsg(POSTMASTER_IN_RECOVERY_MSG), |
| errdetail(POSTMASTER_IN_RECOVERY_DETAIL_MSG " %X/%X\n" |
| POSTMASTER_MIRROR_VERSION_DETAIL_MSG " %s", |
| (uint32) (recptr >> 32), (uint32) recptr, |
| TextDatumGetCString(pgsql_version(NULL))))); |
| break; |
| case CAC_SUPERUSER: |
| /* OK for now, will check in InitPostgres */ |
| break; |
| case CAC_OK: |
| break; |
| } |
| |
| #ifdef FAULT_INJECTOR |
| if (!am_ftshandler && !am_faulthandler && !am_walsender && |
| FaultInjector_InjectFaultIfSet("process_startup_packet", |
| DDLNotSpecified, |
| port->database_name /* databaseName */, |
| "" /* tableName */) == FaultInjectorTypeSkip) |
| { |
| ereport(FATAL, |
| (errcode(ERRCODE_CANNOT_CONNECT_NOW), |
| errmsg(POSTMASTER_IN_RECOVERY_MSG), |
| errdetail(POSTMASTER_IN_RECOVERY_DETAIL_MSG " dummy location"))); |
| } |
| #endif |
| |
| return STATUS_OK; |
| } |
| |
| /* |
| * Send a NegotiateProtocolVersion to the client. This lets the client know |
| * that they have requested a newer minor protocol version than we are able |
| * to speak. We'll speak the highest version we know about; the client can, |
| * of course, abandon the connection if that's a problem. |
| * |
| * We also include in the response a list of protocol options we didn't |
| * understand. This allows clients to include optional parameters that might |
| * be present either in newer protocol versions or third-party protocol |
| * extensions without fear of having to reconnect if those options are not |
| * understood, while at the same time making certain that the client is aware |
| * of which options were actually accepted. |
| */ |
| static void |
| SendNegotiateProtocolVersion(List *unrecognized_protocol_options) |
| { |
| StringInfoData buf; |
| ListCell *lc; |
| |
| pq_beginmessage(&buf, 'v'); /* NegotiateProtocolVersion */ |
| pq_sendint32(&buf, PG_PROTOCOL_LATEST); |
| pq_sendint32(&buf, list_length(unrecognized_protocol_options)); |
| foreach(lc, unrecognized_protocol_options) |
| pq_sendstring(&buf, lfirst(lc)); |
| pq_endmessage(&buf); |
| |
| /* no need to flush, some other message will follow */ |
| } |
| |
| /* |
| * The client has sent a cancel request packet, not a normal |
| * start-a-new-connection packet. Perform the necessary processing. |
| * Nothing is sent back to the client. |
| */ |
| static void |
| processCancelRequest(Port *port, void *pkt, MsgType code) |
| { |
| CancelRequestPacket *canc = (CancelRequestPacket *) pkt; |
| int backendPID; |
| int32 cancelAuthCode; |
| Backend *bp; |
| |
| #ifndef EXEC_BACKEND |
| dlist_iter iter; |
| #else |
| int i; |
| #endif |
| |
| backendPID = (int) pg_ntoh32(canc->backendPID); |
| cancelAuthCode = (int32) pg_ntoh32(canc->cancelAuthCode); |
| |
| /* |
| * See if we have a matching backend. In the EXEC_BACKEND case, we can no |
| * longer access the postmaster's own backend list, and must rely on the |
| * duplicate array in shared memory. |
| */ |
| #ifndef EXEC_BACKEND |
| dlist_foreach(iter, &BackendList) |
| { |
| bp = dlist_container(Backend, elem, iter.cur); |
| #else |
| for (i = MaxLivePostmasterChildren() - 1; i >= 0; i--) |
| { |
| bp = (Backend *) &ShmemBackendArray[i]; |
| #endif |
| if (bp->pid == backendPID) |
| { |
| if (bp->cancel_key == cancelAuthCode) |
| { |
| /* Found a match; signal that backend to cancel current op */ |
| if (code == FINISH_REQUEST_CODE) |
| { |
| ereport(LOG, |
| (errmsg_internal("query finish request to process %d", |
| backendPID))); |
| SendProcSignal(bp->pid, PROCSIG_QUERY_FINISH, |
| InvalidBackendId); |
| } |
| else |
| { |
| ereport(DEBUG2, |
| (errmsg_internal("processing cancel request: sending SIGINT to process %d", |
| backendPID))); |
| signal_child(bp->pid, SIGINT); |
| } |
| } |
| else |
| /* Right PID, wrong key: no way, Jose */ |
| ereport(LOG, |
| (errmsg("wrong key in cancel request for process %d", |
| backendPID))); |
| return; |
| } |
| #ifndef EXEC_BACKEND /* make GNU Emacs 26.1 see brace balance */ |
| } |
| #else |
| } |
| #endif |
| |
| /* No matching backend */ |
| ereport(LOG, |
| (errmsg("PID %d in cancel request did not match any process", |
| backendPID))); |
| } |
| |
| /* |
| * canAcceptConnections --- check to see if database state allows connections |
| * of the specified type. backend_type can be BACKEND_TYPE_NORMAL, |
| * BACKEND_TYPE_AUTOVAC, or BACKEND_TYPE_BGWORKER. (Note that we don't yet |
| * know whether a NORMAL connection might turn into a walsender.) |
| */ |
| static CAC_state |
| canAcceptConnections(int backend_type) |
| { |
| CAC_state result = CAC_OK; |
| |
| /* |
| * Can't start backends when in startup/shutdown/inconsistent recovery |
| * state. We treat autovac workers the same as user backends for this |
| * purpose. However, bgworkers are excluded from this test; we expect |
| * bgworker_should_start_now() decided whether the DB state allows them. |
| */ |
| if (pmState != PM_RUN && pmState != PM_HOT_STANDBY && |
| backend_type != BACKEND_TYPE_BGWORKER) |
| { |
| if (Shutdown > NoShutdown) |
| return CAC_SHUTDOWN; /* shutdown is pending */ |
| /* |
| * If the wal receiver has been launched at least once, return that |
| * the mirror is ready. |
| */ |
| else if (GetMirrorReadyFlag()) |
| return CAC_MIRROR_READY; |
| else if (!FatalError && pmState == PM_STARTUP) |
| return CAC_STARTUP; /* normal startup */ |
| else if (!FatalError && pmState == PM_RECOVERY) |
| return CAC_NOTCONSISTENT; /* not yet at consistent recovery |
| * state */ |
| else if (pmState == PM_STARTUP || pmState == PM_RECOVERY) |
| return CAC_RECOVERY; /* else must be crash recovery */ |
| else |
| /* |
| * otherwise must be resetting: could be PM_WAIT_BACKENDS, |
| * PM_WAIT_DEAD_END or PM_NO_CHILDREN. |
| */ |
| return CAC_RESET; |
| } |
| |
| /* |
| * "Smart shutdown" restrictions are applied only to normal connections, |
| * not to autovac workers or bgworkers. When only superusers can connect, |
| * we return CAC_SUPERUSER to indicate that superuserness must be checked |
| * later. Note that neither CAC_OK nor CAC_SUPERUSER can safely be |
| * returned until we have checked for too many children. |
| */ |
| if (connsAllowed != ALLOW_ALL_CONNS && |
| backend_type == BACKEND_TYPE_NORMAL) |
| { |
| if (connsAllowed == ALLOW_SUPERUSER_CONNS) |
| result = CAC_SUPERUSER; /* allow superusers only */ |
| else |
| return CAC_SHUTDOWN; /* shutdown is pending */ |
| } |
| |
| /* |
| * Don't start too many children. |
| * |
| * We allow more connections here than we can have backends because some |
| * might still be authenticating; they might fail auth, or some existing |
| * backend might exit before the auth cycle is completed. The exact |
| * MaxBackends limit is enforced when a new backend tries to join the |
| * shared-inval backend array. |
| * |
| * The limit here must match the sizes of the per-child-process arrays; |
| * see comments for MaxLivePostmasterChildren(). |
| */ |
| if (CountChildren(BACKEND_TYPE_ALL) >= MaxLivePostmasterChildren()) |
| result = CAC_TOOMANY; |
| |
| return result; |
| } |
| |
| |
| /* |
| * ConnCreate -- create a local connection data structure |
| * |
| * Returns NULL on failure, other than out-of-memory which is fatal. |
| */ |
| static Port * |
| ConnCreate(int serverFd) |
| { |
| Port *port; |
| |
| if (!(port = (Port *) calloc(1, sizeof(Port)))) |
| { |
| ereport(LOG, |
| (errcode(ERRCODE_OUT_OF_MEMORY), |
| errmsg("out of memory"))); |
| ExitPostmaster(1); |
| } |
| |
| if (StreamConnection(serverFd, port) != STATUS_OK) |
| { |
| if (port->sock != PGINVALID_SOCKET) |
| StreamClose(port->sock); |
| ConnFree(port); |
| return NULL; |
| } |
| |
| return port; |
| } |
| |
| |
| /* |
| * ConnFree -- free a local connection data structure |
| * |
| * Caller has already closed the socket if any, so there's not much |
| * to do here. |
| */ |
| static void |
| ConnFree(Port *conn) |
| { |
| free(conn); |
| } |
| |
| |
| /* |
| * ClosePostmasterPorts -- close all the postmaster's open sockets |
| * |
| * This is called during child process startup to release file descriptors |
| * that are not needed by that child process. The postmaster still has |
| * them open, of course. |
| * |
| * Note: we pass am_syslogger as a boolean because we don't want to set |
| * the global variable yet when this is called. |
| */ |
| void |
| ClosePostmasterPorts(bool am_syslogger) |
| { |
| int i; |
| |
| #ifndef WIN32 |
| |
| /* |
| * Close the write end of postmaster death watch pipe. It's important to |
| * do this as early as possible, so that if postmaster dies, others won't |
| * think that it's still running because we're holding the pipe open. |
| */ |
| if (close(postmaster_alive_fds[POSTMASTER_FD_OWN]) != 0) |
| ereport(FATAL, |
| (errcode_for_file_access(), |
| errmsg_internal("could not close postmaster death monitoring pipe in child process: %m"))); |
| postmaster_alive_fds[POSTMASTER_FD_OWN] = -1; |
| /* Notify fd.c that we released one pipe FD. */ |
| ReleaseExternalFD(); |
| #endif |
| |
| /* |
| * Close the postmaster's listen sockets. These aren't tracked by fd.c, |
| * so we don't call ReleaseExternalFD() here. |
| */ |
| for (i = 0; i < MAXLISTEN; i++) |
| { |
| if (ListenSocket[i] != PGINVALID_SOCKET) |
| { |
| StreamClose(ListenSocket[i]); |
| ListenSocket[i] = PGINVALID_SOCKET; |
| } |
| } |
| |
| /* |
| * If using syslogger, close the read side of the pipe. We don't bother |
| * tracking this in fd.c, either. |
| */ |
| if (!am_syslogger) |
| { |
| #ifndef WIN32 |
| if (syslogPipe[0] >= 0) |
| close(syslogPipe[0]); |
| syslogPipe[0] = -1; |
| #else |
| if (syslogPipe[0]) |
| CloseHandle(syslogPipe[0]); |
| syslogPipe[0] = 0; |
| #endif |
| } |
| |
| #ifdef USE_BONJOUR |
| /* If using Bonjour, close the connection to the mDNS daemon */ |
| if (bonjour_sdref) |
| close(DNSServiceRefSockFD(bonjour_sdref)); |
| #endif |
| } |
| |
| |
| /* |
| * InitProcessGlobals -- set MyProcPid, MyStartTime[stamp], random seeds |
| * |
| * Called early in the postmaster and every backend. |
| */ |
| void |
| InitProcessGlobals(void) |
| { |
| unsigned int rseed; |
| |
| MyProcPid = getpid(); |
| MyStartTimestamp = GetCurrentTimestamp(); |
| MyStartTime = timestamptz_to_time_t(MyStartTimestamp); |
| |
| /* |
| * Set a different seed for random() in every process. We want something |
| * unpredictable, so if possible, use high-quality random bits for the |
| * seed. Otherwise, fall back to a seed based on timestamp and PID. |
| */ |
| if (!pg_strong_random(&rseed, sizeof(rseed))) |
| { |
| /* |
| * Since PIDs and timestamps tend to change more frequently in their |
| * least significant bits, shift the timestamp left to allow a larger |
| * total number of seeds in a given time period. Since that would |
| * leave only 20 bits of the timestamp that cycle every ~1 second, |
| * also mix in some higher bits. |
| */ |
| rseed = ((uint64) MyProcPid) ^ |
| ((uint64) MyStartTimestamp << 12) ^ |
| ((uint64) MyStartTimestamp >> 20); |
| } |
| srandom(rseed); |
| } |
| |
| |
| /* |
| * reset_shared -- reset shared memory and semaphores |
| */ |
| static void |
| reset_shared(void) |
| { |
| /* |
| * Create or re-create shared memory and semaphores. |
| * |
| * Note: in each "cycle of life" we will normally assign the same IPC keys |
| * (if using SysV shmem and/or semas). This helps ensure that we will |
| * clean up dead IPC objects if the postmaster crashes and is restarted. |
| */ |
| CreateSharedMemoryAndSemaphores(); |
| |
| /* |
| * When backend process crashes, postmaster restart database, |
| * we need to init kmgr share memory. |
| */ |
| InitializeKmgr(); |
| InitializeBufferEncryption(); |
| |
| } |
| |
| |
| /* |
| * SIGHUP -- reread config files, and tell children to do same |
| */ |
| static void |
| SIGHUP_handler(SIGNAL_ARGS) |
| { |
| int save_errno = errno; |
| |
| if (Shutdown <= SmartShutdown) |
| { |
| ereport(LOG, |
| (errmsg("received SIGHUP, reloading configuration files"))); |
| ProcessConfigFile(PGC_SIGHUP); |
| SignalChildren(SIGHUP); |
| if (StartupPID != 0) |
| signal_child(StartupPID, SIGHUP); |
| if (BgWriterPID != 0) |
| signal_child(BgWriterPID, SIGHUP); |
| if (CheckpointerPID != 0) |
| signal_child(CheckpointerPID, SIGHUP); |
| if (WalWriterPID != 0) |
| signal_child(WalWriterPID, SIGHUP); |
| if (WalReceiverPID != 0) |
| signal_child(WalReceiverPID, SIGHUP); |
| if (AutoVacPID != 0) |
| signal_child(AutoVacPID, SIGHUP); |
| if (PgArchPID != 0) |
| signal_child(PgArchPID, SIGHUP); |
| if (SysLoggerPID != 0) |
| signal_child(SysLoggerPID, SIGHUP); |
| if (PgStatPID != 0) |
| signal_child(PgStatPID, SIGHUP); |
| if (enable_password_profile && LoginMonitorPID != 0) |
| signal_child(LoginMonitorPID, SIGHUP); |
| |
| /* Reload authentication config files too */ |
| if (!load_hba()) |
| ereport(LOG, |
| /* translator: %s is a configuration file */ |
| (errmsg("%s was not reloaded", "pg_hba.conf"))); |
| |
| if (!load_ident()) |
| ereport(LOG, |
| (errmsg("%s was not reloaded", "pg_ident.conf"))); |
| |
| #ifdef USE_SSL |
| /* Reload SSL configuration as well */ |
| if (EnableSSL) |
| { |
| if (secure_initialize(false) == 0) |
| LoadedSSL = true; |
| else |
| ereport(LOG, |
| (errmsg("SSL configuration was not reloaded"))); |
| } |
| else |
| { |
| secure_destroy(); |
| LoadedSSL = false; |
| } |
| #endif |
| |
| #ifdef EXEC_BACKEND |
| /* Update the starting-point file for future children */ |
| write_nondefault_variables(PGC_SIGHUP); |
| #endif |
| } |
| |
| errno = save_errno; |
| } |
| |
| |
| /* |
| * pmdie -- signal handler for processing various postmaster signals. |
| */ |
| static void |
| pmdie(SIGNAL_ARGS) |
| { |
| int save_errno = errno; |
| |
| ereport(DEBUG2, |
| (errmsg_internal("postmaster received signal %d", |
| postgres_signal_arg))); |
| |
| switch (postgres_signal_arg) |
| { |
| case SIGTERM: |
| |
| /* |
| * Smart Shutdown: |
| * |
| * Wait for children to end their work, then shut down. |
| */ |
| if (Shutdown >= SmartShutdown) |
| break; |
| Shutdown = SmartShutdown; |
| ereport(LOG, |
| (errmsg("received smart shutdown request"))); |
| |
| /* Report status */ |
| AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STOPPING); |
| #ifdef USE_SYSTEMD |
| sd_notify(0, "STOPPING=1"); |
| #endif |
| |
| /* |
| * If we reached normal running, we have to wait for any online |
| * backup mode to end; otherwise go straight to waiting for client |
| * backends to exit. (The difference is that in the former state, |
| * we'll still let in new superuser clients, so that somebody can |
| * end the online backup mode.) If already in PM_STOP_BACKENDS or |
| * a later state, do not change it. |
| */ |
| if (pmState == PM_RUN) |
| connsAllowed = ALLOW_SUPERUSER_CONNS; |
| else if (pmState == PM_HOT_STANDBY) |
| connsAllowed = ALLOW_NO_CONNS; |
| else if (pmState == PM_STARTUP || pmState == PM_RECOVERY) |
| { |
| /* There should be no clients, so proceed to stop children */ |
| pmState = PM_STOP_BACKENDS; |
| } |
| |
| /* |
| * Now wait for online backup mode to end and backends to exit. If |
| * that is already the case, PostmasterStateMachine will take the |
| * next step. |
| */ |
| PostmasterStateMachine(); |
| break; |
| |
| case SIGINT: |
| |
| /* |
| * Fast Shutdown: |
| * |
| * Abort all children with SIGTERM (rollback active transactions |
| * and exit) and shut down when they are gone. |
| */ |
| if (Shutdown >= FastShutdown) |
| break; |
| Shutdown = FastShutdown; |
| ereport(LOG, |
| (errmsg("received fast shutdown request"))); |
| |
| /* Report status */ |
| AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STOPPING); |
| #ifdef USE_SYSTEMD |
| sd_notify(0, "STOPPING=1"); |
| #endif |
| |
| if (pmState == PM_STARTUP || pmState == PM_RECOVERY) |
| { |
| /* Just shut down background processes silently */ |
| pmState = PM_STOP_BACKENDS; |
| } |
| else if (pmState == PM_RUN || |
| pmState == PM_HOT_STANDBY) |
| { |
| /* Report that we're about to zap live client sessions */ |
| ereport(LOG, |
| (errmsg("aborting any active transactions"))); |
| pmState = PM_STOP_BACKENDS; |
| } |
| |
| /* |
| * PostmasterStateMachine will issue any necessary signals, or |
| * take the next step if no child processes need to be killed. |
| */ |
| PostmasterStateMachine(); |
| break; |
| |
| case SIGQUIT: |
| |
| /* |
| * Immediate Shutdown: |
| * |
| * abort all children with SIGQUIT, wait for them to exit, |
| * terminate remaining ones with SIGKILL, then exit without |
| * attempt to properly shut down the data base system. |
| */ |
| if (Shutdown >= ImmediateShutdown) |
| break; |
| Shutdown = ImmediateShutdown; |
| ereport(LOG, |
| (errmsg("received immediate shutdown request"))); |
| |
| /* Report status */ |
| AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STOPPING); |
| #ifdef USE_SYSTEMD |
| sd_notify(0, "STOPPING=1"); |
| #endif |
| |
| /* tell children to shut down ASAP */ |
| SetQuitSignalReason(PMQUIT_FOR_STOP); |
| TerminateChildren(SIGQUIT); |
| pmState = PM_WAIT_BACKENDS; |
| |
| /* set stopwatch for them to die */ |
| AbortStartTime = time(NULL); |
| |
| /* |
| * Now wait for backends to exit. If there are none, |
| * PostmasterStateMachine will take the next step. |
| */ |
| PostmasterStateMachine(); |
| break; |
| } |
| |
| errno = save_errno; |
| } |
| |
| /* |
| * Reaper -- signal handler to cleanup after a child process dies. |
| */ |
| static void |
| reaper(SIGNAL_ARGS) |
| { |
| int save_errno = errno; |
| int pid; /* process id of dead child process */ |
| int exitstatus; /* its exit status */ |
| |
| ereport(DEBUG4, |
| (errmsg_internal("reaping dead processes"))); |
| |
| while ((pid = waitpid(-1, &exitstatus, WNOHANG)) > 0) |
| { |
| /* |
| * Check if this child was a startup process. |
| */ |
| if (pid == StartupPID) |
| { |
| StartupPID = 0; |
| |
| /* |
| * Startup process exited in response to a shutdown request (or it |
| * completed normally regardless of the shutdown request). |
| */ |
| if (Shutdown > NoShutdown && |
| (EXIT_STATUS_0(exitstatus) || EXIT_STATUS_1(exitstatus))) |
| { |
| StartupStatus = STARTUP_NOT_RUNNING; |
| pmState = PM_WAIT_BACKENDS; |
| /* PostmasterStateMachine logic does the rest */ |
| continue; |
| } |
| |
| if (EXIT_STATUS_3(exitstatus)) |
| { |
| ereport(LOG, |
| (errmsg("shutdown at recovery target"))); |
| StartupStatus = STARTUP_NOT_RUNNING; |
| Shutdown = Max(Shutdown, SmartShutdown); |
| TerminateChildren(SIGTERM); |
| pmState = PM_WAIT_BACKENDS; |
| /* PostmasterStateMachine logic does the rest */ |
| continue; |
| } |
| |
| /* |
| * Unexpected exit of startup process (including FATAL exit) |
| * during PM_STARTUP is treated as catastrophic. There are no |
| * other processes running yet, so we can just exit. |
| */ |
| if (pmState == PM_STARTUP && |
| StartupStatus != STARTUP_SIGNALED && |
| !EXIT_STATUS_0(exitstatus)) |
| { |
| LogChildExit(LOG, _("startup process"), |
| pid, exitstatus); |
| ereport(LOG, |
| (errmsg("aborting startup due to startup process failure"))); |
| ExitPostmaster(1); |
| } |
| |
| /* |
| * After PM_STARTUP, any unexpected exit (including FATAL exit) of |
| * the startup process is catastrophic, so kill other children, |
| * and set StartupStatus so we don't try to reinitialize after |
| * they're gone. Exception: if StartupStatus is STARTUP_SIGNALED, |
| * then we previously sent the startup process a SIGQUIT; so |
| * that's probably the reason it died, and we do want to try to |
| * restart in that case. |
| * |
| * This stanza also handles the case where we sent a SIGQUIT |
| * during PM_STARTUP due to some dead_end child crashing: in that |
| * situation, if the startup process dies on the SIGQUIT, we need |
| * to transition to PM_WAIT_BACKENDS state which will allow |
| * PostmasterStateMachine to restart the startup process. (On the |
| * other hand, the startup process might complete normally, if we |
| * were too late with the SIGQUIT. In that case we'll fall |
| * through and commence normal operations.) |
| */ |
| if (!EXIT_STATUS_0(exitstatus)) |
| { |
| if (StartupStatus == STARTUP_SIGNALED) |
| { |
| StartupStatus = STARTUP_NOT_RUNNING; |
| if (pmState == PM_STARTUP) |
| pmState = PM_WAIT_BACKENDS; |
| } |
| else |
| StartupStatus = STARTUP_CRASHED; |
| HandleChildCrash(pid, exitstatus, |
| _("startup process")); |
| continue; |
| } |
| |
| /* |
| * Startup succeeded, commence normal operations |
| */ |
| StartupStatus = STARTUP_NOT_RUNNING; |
| FatalError = false; |
| AbortStartTime = 0; |
| ReachedNormalRunning = true; |
| pmState = PM_RUN; |
| connsAllowed = ALLOW_ALL_CONNS; |
| |
| /* |
| * Crank up the background tasks, if we didn't do that already |
| * when we entered consistent recovery state. It doesn't matter |
| * if this fails, we'll just try again later. |
| */ |
| if (CheckpointerPID == 0) |
| CheckpointerPID = StartCheckpointer(); |
| if (BgWriterPID == 0) |
| BgWriterPID = StartBackgroundWriter(); |
| if (WalWriterPID == 0) |
| WalWriterPID = StartWalWriter(); |
| |
| /* |
| * Likewise, start other special children as needed. In a restart |
| * situation, some of them may be alive already. |
| */ |
| if (!IsBinaryUpgrade && AutoVacuumingActive() && AutoVacPID == 0) |
| AutoVacPID = StartAutoVacLauncher(); |
| if (PgArchStartupAllowed() && PgArchPID == 0) |
| PgArchPID = StartArchiver(); |
| if (PgStatPID == 0) |
| PgStatPID = pgstat_start(); |
| if (enable_password_profile && LoginMonitorPID == 0) |
| LoginMonitorPID = StartLoginMonitorLauncher(); |
| |
| /* workers may be scheduled to start now */ |
| maybe_start_bgworkers(); |
| |
| /* at this point we are really open for business */ |
| { |
| char version[512]; |
| |
| strlcpy(version, PG_VERSION_STR " compiled on " __DATE__ " " __TIME__, |
| sizeof(version)); |
| |
| #ifdef USE_ASSERT_CHECKING |
| strlcat(version, " (with assert checking)", sizeof(version)); |
| #endif |
| ereport(LOG,(errmsg("%s", version))); |
| |
| ereport(LOG, |
| (errmsg("database system is ready to accept connections"), |
| errdetail("%s",version))); |
| |
| PMAcceptingConnectionsStartTime = (pg_time_t) time(NULL); |
| } |
| |
| /* Report status */ |
| AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_READY); |
| #ifdef USE_SYSTEMD |
| sd_notify(0, "READY=1"); |
| #endif |
| |
| continue; |
| } |
| |
| /* |
| * Was it the bgwriter? Normal exit can be ignored; we'll start a new |
| * one at the next iteration of the postmaster's main loop, if |
| * necessary. Any other exit condition is treated as a crash. |
| */ |
| if (pid == BgWriterPID) |
| { |
| BgWriterPID = 0; |
| if (!EXIT_STATUS_0(exitstatus)) |
| HandleChildCrash(pid, exitstatus, |
| _("background writer process")); |
| continue; |
| } |
| |
| /* |
| * Was it the checkpointer? |
| */ |
| if (pid == CheckpointerPID) |
| { |
| CheckpointerPID = 0; |
| if (EXIT_STATUS_0(exitstatus) && pmState == PM_SHUTDOWN) |
| { |
| /* |
| * OK, we saw normal exit of the checkpointer after it's been |
| * told to shut down. We expect that it wrote a shutdown |
| * checkpoint. (If for some reason it didn't, recovery will |
| * occur on next postmaster start.) |
| * |
| * At this point we should have no normal backend children |
| * left (else we'd not be in PM_SHUTDOWN state) but we might |
| * have dead_end children to wait for. |
| * |
| * If we have an archiver subprocess, tell it to do a last |
| * archive cycle and quit. Likewise, if we have walsender |
| * processes, tell them to send any remaining WAL and quit. |
| */ |
| Assert(Shutdown > NoShutdown); |
| |
| /* Waken archiver for the last time */ |
| if (PgArchPID != 0) |
| signal_child(PgArchPID, SIGUSR2); |
| |
| /* |
| * Waken walsenders for the last time. No regular backends |
| * should be around anymore. |
| */ |
| SignalChildren(SIGUSR2); |
| |
| pmState = PM_SHUTDOWN_2; |
| |
| /* |
| * We can also shut down the stats collector now; there's |
| * nothing left for it to do. |
| */ |
| if (PgStatPID != 0) |
| signal_child(PgStatPID, SIGQUIT); |
| } |
| else |
| { |
| /* |
| * Any unexpected exit of the checkpointer (including FATAL |
| * exit) is treated as a crash. |
| */ |
| HandleChildCrash(pid, exitstatus, |
| _("checkpointer process")); |
| } |
| |
| continue; |
| } |
| |
| /* |
| * Was it the wal writer? Normal exit can be ignored; we'll start a |
| * new one at the next iteration of the postmaster's main loop, if |
| * necessary. Any other exit condition is treated as a crash. |
| */ |
| if (pid == WalWriterPID) |
| { |
| WalWriterPID = 0; |
| if (!EXIT_STATUS_0(exitstatus)) |
| HandleChildCrash(pid, exitstatus, |
| _("WAL writer process")); |
| continue; |
| } |
| |
| /* |
| * Was it the wal receiver? If exit status is zero (normal) or one |
| * (FATAL exit), we assume everything is all right just like normal |
| * backends. (If we need a new wal receiver, we'll start one at the |
| * next iteration of the postmaster's main loop.) |
| */ |
| if (pid == WalReceiverPID) |
| { |
| WalReceiverPID = 0; |
| if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus)) |
| HandleChildCrash(pid, exitstatus, |
| _("WAL receiver process")); |
| continue; |
| } |
| |
| /* |
| * Was it the autovacuum launcher? Normal exit can be ignored; we'll |
| * start a new one at the next iteration of the postmaster's main |
| * loop, if necessary. Any other exit condition is treated as a |
| * crash. |
| */ |
| if (pid == AutoVacPID) |
| { |
| AutoVacPID = 0; |
| if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus)) |
| HandleChildCrash(pid, exitstatus, |
| _("autovacuum launcher process")); |
| continue; |
| } |
| |
| /* |
| * Was it the login monitor? If exit status is zero (normal) or one |
| * (FATAL exit), we assume everything is all right just like normal |
| * backends and just try to restart a new one. Any other exit condition |
| * is treated as a crash. |
| */ |
| if (enable_password_profile && pid == LoginMonitorPID) |
| { |
| LoginMonitorPID = 0; |
| if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus)) |
| HandleChildCrash(pid, exitstatus, |
| _("login monitor process")); |
| continue; |
| } |
| |
| /* |
| * Was it the archiver? If exit status is zero (normal) or one (FATAL |
| * exit), we assume everything is all right just like normal backends |
| * and just try to restart a new one so that we immediately retry |
| * archiving remaining files. (If fail, we'll try again in future |
| * cycles of the postmaster's main loop.) Unless we were waiting for |
| * it to shut down; don't restart it in that case, and |
| * PostmasterStateMachine() will advance to the next shutdown step. |
| */ |
| if (pid == PgArchPID) |
| { |
| PgArchPID = 0; |
| if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus)) |
| HandleChildCrash(pid, exitstatus, |
| _("archiver process")); |
| if (PgArchStartupAllowed()) |
| PgArchPID = StartArchiver(); |
| continue; |
| } |
| |
| /* |
| * Was it the statistics collector? If so, just try to start a new |
| * one; no need to force reset of the rest of the system. (If fail, |
| * we'll try again in future cycles of the main loop.) |
| */ |
| if (pid == PgStatPID) |
| { |
| PgStatPID = 0; |
| if (!EXIT_STATUS_0(exitstatus)) |
| LogChildExit(LOG, _("statistics collector process"), |
| pid, exitstatus); |
| if (pmState == PM_RUN || pmState == PM_HOT_STANDBY) |
| PgStatPID = pgstat_start(); |
| continue; |
| } |
| |
| /* Was it the system logger? If so, try to start a new one */ |
| if (pid == SysLoggerPID) |
| { |
| SysLoggerPID = 0; |
| /* for safety's sake, launch new logger *first* */ |
| SysLoggerPID = SysLogger_Start(); |
| if (!EXIT_STATUS_0(exitstatus)) |
| LogChildExit(LOG, _("system logger process"), |
| pid, exitstatus); |
| continue; |
| } |
| |
| /* Was it one of our background workers? */ |
| if (CleanupBackgroundWorker(pid, exitstatus)) |
| { |
| /* have it be restarted */ |
| HaveCrashedWorker = true; |
| continue; |
| } |
| |
| /* |
| * Else do standard backend child cleanup. |
| */ |
| CleanupBackend(pid, exitstatus); |
| } /* loop over pending child-death reports */ |
| |
| /* |
| * After cleaning out the SIGCHLD queue, see if we have any state changes |
| * or actions to make. |
| */ |
| PostmasterStateMachine(); |
| |
| errno = save_errno; |
| } |
| |
| /* |
| * Scan the bgworkers list and see if the given PID (which has just stopped |
| * or crashed) is in it. Handle its shutdown if so, and return true. If not a |
| * bgworker, return false. |
| * |
| * This is heavily based on CleanupBackend. One important difference is that |
| * we don't know yet that the dying process is a bgworker, so we must be silent |
| * until we're sure it is. |
| */ |
| static bool |
| CleanupBackgroundWorker(int pid, |
| int exitstatus) /* child's exit status */ |
| { |
| char namebuf[MAXPGPATH]; |
| slist_mutable_iter iter; |
| |
| slist_foreach_modify(iter, &BackgroundWorkerList) |
| { |
| RegisteredBgWorker *rw; |
| |
| rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur); |
| |
| if (rw->rw_pid != pid) |
| continue; |
| |
| #ifdef WIN32 |
| /* see CleanupBackend */ |
| if (exitstatus == ERROR_WAIT_NO_CHILDREN) |
| exitstatus = 0; |
| #endif |
| |
| snprintf(namebuf, MAXPGPATH, _("background worker \"%s\""), |
| rw->rw_worker.bgw_type); |
| |
| |
| if (!EXIT_STATUS_0(exitstatus)) |
| { |
| /* Record timestamp, so we know when to restart the worker. */ |
| rw->rw_crashed_at = GetCurrentTimestamp(); |
| } |
| else |
| { |
| /* Zero exit status means terminate */ |
| rw->rw_crashed_at = 0; |
| rw->rw_terminate = true; |
| } |
| |
| /* |
| * Additionally, for shared-memory-connected workers, just like a |
| * backend, any exit status other than 0 or 1 is considered a crash |
| * and causes a system-wide restart. |
| */ |
| if ((rw->rw_worker.bgw_flags & BGWORKER_SHMEM_ACCESS) != 0) |
| { |
| if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus)) |
| { |
| HandleChildCrash(pid, exitstatus, namebuf); |
| return true; |
| } |
| } |
| |
| /* |
| * We must release the postmaster child slot whether this worker is |
| * connected to shared memory or not, but we only treat it as a crash |
| * if it is in fact connected. |
| */ |
| if (!ReleasePostmasterChildSlot(rw->rw_child_slot) && |
| (rw->rw_worker.bgw_flags & BGWORKER_SHMEM_ACCESS) != 0) |
| { |
| HandleChildCrash(pid, exitstatus, namebuf); |
| return true; |
| } |
| |
| /* Get it out of the BackendList and clear out remaining data */ |
| dlist_delete(&rw->rw_backend->elem); |
| #ifdef EXEC_BACKEND |
| ShmemBackendArrayRemove(rw->rw_backend); |
| #endif |
| |
| /* |
| * It's possible that this background worker started some OTHER |
| * background worker and asked to be notified when that worker started |
| * or stopped. If so, cancel any notifications destined for the |
| * now-dead backend. |
| */ |
| if (rw->rw_backend->bgworker_notify) |
| BackgroundWorkerStopNotifications(rw->rw_pid); |
| free(rw->rw_backend); |
| rw->rw_backend = NULL; |
| rw->rw_pid = 0; |
| rw->rw_child_slot = 0; |
| ReportBackgroundWorkerExit(&iter); /* report child death */ |
| |
| LogChildExit(EXIT_STATUS_0(exitstatus) ? DEBUG1 : LOG, |
| namebuf, pid, exitstatus); |
| |
| return true; |
| } |
| |
| return false; |
| } |
| |
| /* |
| * CleanupBackend -- cleanup after terminated backend. |
| * |
| * Remove all local state associated with backend. |
| * |
| * If you change this, see also CleanupBackgroundWorker. |
| */ |
| static void |
| CleanupBackend(int pid, |
| int exitstatus) /* child's exit status. */ |
| { |
| dlist_mutable_iter iter; |
| |
| LogChildExit(DEBUG2, _("server process"), pid, exitstatus); |
| |
| /* |
| * If a backend dies in an ugly way then we must signal all other backends |
| * to quickdie. If exit status is zero (normal) or one (FATAL exit), we |
| * assume everything is all right and proceed to remove the backend from |
| * the active backend list. |
| */ |
| |
| #ifdef WIN32 |
| |
| /* |
| * On win32, also treat ERROR_WAIT_NO_CHILDREN (128) as nonfatal case, |
| * since that sometimes happens under load when the process fails to start |
| * properly (long before it starts using shared memory). Microsoft reports |
| * it is related to mutex failure: |
| * http://archives.postgresql.org/pgsql-hackers/2010-09/msg00790.php |
| */ |
| if (exitstatus == ERROR_WAIT_NO_CHILDREN) |
| { |
| LogChildExit(LOG, _("server process"), pid, exitstatus); |
| exitstatus = 0; |
| } |
| #endif |
| |
| if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus)) |
| { |
| HandleChildCrash(pid, exitstatus, _("server process")); |
| return; |
| } |
| |
| dlist_foreach_modify(iter, &BackendList) |
| { |
| Backend *bp = dlist_container(Backend, elem, iter.cur); |
| |
| if (bp->pid == pid) |
| { |
| if (!bp->dead_end) |
| { |
| if (!ReleasePostmasterChildSlot(bp->child_slot)) |
| { |
| /* |
| * Uh-oh, the child failed to clean itself up. Treat as a |
| * crash after all. |
| */ |
| HandleChildCrash(pid, exitstatus, _("server process")); |
| return; |
| } |
| #ifdef EXEC_BACKEND |
| ShmemBackendArrayRemove(bp); |
| #endif |
| } |
| if (bp->bgworker_notify) |
| { |
| /* |
| * This backend may have been slated to receive SIGUSR1 when |
| * some background worker started or stopped. Cancel those |
| * notifications, as we don't want to signal PIDs that are not |
| * PostgreSQL backends. This gets skipped in the (probably |
| * very common) case where the backend has never requested any |
| * such notifications. |
| */ |
| BackgroundWorkerStopNotifications(bp->pid); |
| } |
| dlist_delete(iter.cur); |
| free(bp); |
| break; |
| } |
| } |
| } |
| |
| /* |
| * HandleChildCrash -- cleanup after failed backend, bgwriter, checkpointer, |
| * walwriter, autovacuum, archiver or background worker. |
| * |
| * The objectives here are to clean up our local state about the child |
| * process, and to signal all other remaining children to quickdie. |
| */ |
| static void |
| HandleChildCrash(int pid, int exitstatus, const char *procname) |
| { |
| dlist_mutable_iter iter; |
| slist_iter siter; |
| Backend *bp; |
| bool take_action; |
| |
| /* |
| * We only log messages and send signals if this is the first process |
| * crash and we're not doing an immediate shutdown; otherwise, we're only |
| * here to update postmaster's idea of live processes. If we have already |
| * signaled children, nonzero exit status is to be expected, so don't |
| * clutter log. |
| */ |
| take_action = !FatalError && Shutdown != ImmediateShutdown; |
| |
| if (take_action) |
| { |
| LogChildExit(LOG, procname, pid, exitstatus); |
| ereport(LOG, |
| (errmsg("terminating any other active server processes"))); |
| SetQuitSignalReason(PMQUIT_FOR_CRASH); |
| } |
| |
| /* Process background workers. */ |
| slist_foreach(siter, &BackgroundWorkerList) |
| { |
| RegisteredBgWorker *rw; |
| |
| rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur); |
| if (rw->rw_pid == 0) |
| continue; /* not running */ |
| if (rw->rw_pid == pid) |
| { |
| /* |
| * Found entry for freshly-dead worker, so remove it. |
| */ |
| (void) ReleasePostmasterChildSlot(rw->rw_child_slot); |
| dlist_delete(&rw->rw_backend->elem); |
| #ifdef EXEC_BACKEND |
| ShmemBackendArrayRemove(rw->rw_backend); |
| #endif |
| free(rw->rw_backend); |
| rw->rw_backend = NULL; |
| rw->rw_pid = 0; |
| rw->rw_child_slot = 0; |
| /* don't reset crashed_at */ |
| /* don't report child stop, either */ |
| /* Keep looping so we can signal remaining workers */ |
| } |
| else |
| { |
| /* |
| * This worker is still alive. Unless we did so already, tell it |
| * to commit hara-kiri. |
| * |
| * SIGQUIT is the special signal that says exit without proc_exit |
| * and let the user know what's going on. But if SendStop is set |
| * (-T on command line), then we send SIGSTOP instead, so that we |
| * can get core dumps from all backends by hand. |
| */ |
| if (take_action) |
| { |
| ereport(DEBUG2, |
| (errmsg_internal("sending %s to process %d", |
| (SendStop ? "SIGSTOP" : "SIGQUIT"), |
| (int) rw->rw_pid))); |
| signal_child(rw->rw_pid, (SendStop ? SIGSTOP : SIGQUIT)); |
| } |
| } |
| } |
| |
| /* Process regular backends */ |
| dlist_foreach_modify(iter, &BackendList) |
| { |
| bp = dlist_container(Backend, elem, iter.cur); |
| |
| if (bp->pid == pid) |
| { |
| /* |
| * Found entry for freshly-dead backend, so remove it. |
| */ |
| if (!bp->dead_end) |
| { |
| (void) ReleasePostmasterChildSlot(bp->child_slot); |
| #ifdef EXEC_BACKEND |
| ShmemBackendArrayRemove(bp); |
| #endif |
| } |
| dlist_delete(iter.cur); |
| free(bp); |
| /* Keep looping so we can signal remaining backends */ |
| } |
| else |
| { |
| /* |
| * This backend is still alive. Unless we did so already, tell it |
| * to commit hara-kiri. |
| * |
| * SIGQUIT is the special signal that says exit without proc_exit |
| * and let the user know what's going on. But if SendStop is set |
| * (-T on command line), then we send SIGSTOP instead, so that we |
| * can get core dumps from all backends by hand. |
| * |
| * We could exclude dead_end children here, but at least in the |
| * SIGSTOP case it seems better to include them. |
| * |
| * Background workers were already processed above; ignore them |
| * here. |
| */ |
| if (bp->bkend_type == BACKEND_TYPE_BGWORKER) |
| continue; |
| |
| if (take_action) |
| { |
| ereport(DEBUG2, |
| (errmsg_internal("sending %s to process %d", |
| (SendStop ? "SIGSTOP" : "SIGQUIT"), |
| (int) bp->pid))); |
| signal_child(bp->pid, (SendStop ? SIGSTOP : SIGQUIT)); |
| } |
| } |
| } |
| |
| /* Take care of the startup process too */ |
| if (pid == StartupPID) |
| { |
| StartupPID = 0; |
| /* Caller adjusts StartupStatus, so don't touch it here */ |
| } |
| else if (StartupPID != 0 && take_action) |
| { |
| ereport(DEBUG2, |
| (errmsg_internal("sending %s to process %d", |
| (SendStop ? "SIGSTOP" : "SIGQUIT"), |
| (int) StartupPID))); |
| signal_child(StartupPID, (SendStop ? SIGSTOP : SIGQUIT)); |
| StartupStatus = STARTUP_SIGNALED; |
| } |
| |
| /* Take care of the bgwriter too */ |
| if (pid == BgWriterPID) |
| BgWriterPID = 0; |
| else if (BgWriterPID != 0 && take_action) |
| { |
| ereport(DEBUG2, |
| (errmsg_internal("sending %s to process %d", |
| (SendStop ? "SIGSTOP" : "SIGQUIT"), |
| (int) BgWriterPID))); |
| signal_child(BgWriterPID, (SendStop ? SIGSTOP : SIGQUIT)); |
| } |
| |
| /* Take care of the checkpointer too */ |
| if (pid == CheckpointerPID) |
| CheckpointerPID = 0; |
| else if (CheckpointerPID != 0 && take_action) |
| { |
| ereport(DEBUG2, |
| (errmsg_internal("sending %s to process %d", |
| (SendStop ? "SIGSTOP" : "SIGQUIT"), |
| (int) CheckpointerPID))); |
| signal_child(CheckpointerPID, (SendStop ? SIGSTOP : SIGQUIT)); |
| } |
| |
| /* Take care of the walwriter too */ |
| if (pid == WalWriterPID) |
| WalWriterPID = 0; |
| else if (WalWriterPID != 0 && take_action) |
| { |
| ereport(DEBUG2, |
| (errmsg_internal("sending %s to process %d", |
| (SendStop ? "SIGSTOP" : "SIGQUIT"), |
| (int) WalWriterPID))); |
| signal_child(WalWriterPID, (SendStop ? SIGSTOP : SIGQUIT)); |
| } |
| |
| /* Take care of the walreceiver too */ |
| if (pid == WalReceiverPID) |
| WalReceiverPID = 0; |
| else if (WalReceiverPID != 0 && take_action) |
| { |
| ereport(DEBUG2, |
| (errmsg_internal("sending %s to process %d", |
| (SendStop ? "SIGSTOP" : "SIGQUIT"), |
| (int) WalReceiverPID))); |
| signal_child(WalReceiverPID, (SendStop ? SIGSTOP : SIGQUIT)); |
| } |
| |
| /* Take care of the autovacuum launcher too */ |
| if (pid == AutoVacPID) |
| AutoVacPID = 0; |
| else if (AutoVacPID != 0 && take_action) |
| { |
| ereport(DEBUG2, |
| (errmsg_internal("sending %s to process %d", |
| (SendStop ? "SIGSTOP" : "SIGQUIT"), |
| (int) AutoVacPID))); |
| signal_child(AutoVacPID, (SendStop ? SIGSTOP : SIGQUIT)); |
| } |
| |
| /* Take care of the login monitor too */ |
| if (enable_password_profile) |
| { |
| if (pid == LoginMonitorPID) |
| LoginMonitorPID = 0; |
| else if (LoginMonitorPID != 0 && take_action) |
| { |
| ereport(DEBUG2, |
| (errmsg_internal("sending %s to process %d", |
| (SendStop ? "SIGSTOP" : "SIGQUIT"), |
| (int) LoginMonitorPID))); |
| signal_child(LoginMonitorPID, (SendStop ? SIGSTOP : SIGQUIT)); |
| } |
| } |
| |
| /* Take care of the archiver too */ |
| if (pid == PgArchPID) |
| PgArchPID = 0; |
| else if (PgArchPID != 0 && take_action) |
| { |
| ereport(DEBUG2, |
| (errmsg_internal("sending %s to process %d", |
| (SendStop ? "SIGSTOP" : "SIGQUIT"), |
| (int) PgArchPID))); |
| signal_child(PgArchPID, (SendStop ? SIGSTOP : SIGQUIT)); |
| } |
| |
| /* |
| * Force a power-cycle of the pgstat process too. (This isn't absolutely |
| * necessary, but it seems like a good idea for robustness, and it |
| * simplifies the state-machine logic in the case where a shutdown request |
| * arrives during crash processing.) |
| */ |
| if (PgStatPID != 0 && take_action) |
| { |
| ereport(DEBUG2, |
| (errmsg_internal("sending %s to process %d", |
| "SIGQUIT", |
| (int) PgStatPID))); |
| signal_child(PgStatPID, SIGQUIT); |
| allow_immediate_pgstat_restart(); |
| } |
| |
| /* We do NOT restart the syslogger */ |
| |
| if (Shutdown != ImmediateShutdown) |
| FatalError = true; |
| |
| /* We now transit into a state of waiting for children to die */ |
| if (pmState == PM_RECOVERY || |
| pmState == PM_HOT_STANDBY || |
| pmState == PM_RUN || |
| pmState == PM_STOP_BACKENDS || |
| pmState == PM_SHUTDOWN) |
| pmState = PM_WAIT_BACKENDS; |
| |
| /* |
| * .. and if this doesn't happen quickly enough, now the clock is ticking |
| * for us to kill them without mercy. |
| */ |
| if (AbortStartTime == 0) |
| AbortStartTime = time(NULL); |
| } |
| |
| /* |
| * Log the death of a child process. |
| */ |
| static void |
| LogChildExit(int lev, const char *procname, int pid, int exitstatus) |
| { |
| /* |
| * size of activity_buffer is arbitrary, but set equal to default |
| * track_activity_query_size |
| */ |
| char activity_buffer[1024]; |
| const char *activity = NULL; |
| |
| if (!EXIT_STATUS_0(exitstatus)) |
| activity = pgstat_get_crashed_backend_activity(pid, |
| activity_buffer, |
| sizeof(activity_buffer)); |
| |
| if (WIFEXITED(exitstatus)) |
| ereport(lev, |
| |
| /*------ |
| translator: %s is a noun phrase describing a child process, such as |
| "server process" */ |
| (errmsg("%s (PID %d) exited with exit code %d", |
| procname, pid, WEXITSTATUS(exitstatus)), |
| activity ? errdetail("Failed process was running: %s", activity) : 0)); |
| else if (WIFSIGNALED(exitstatus)) |
| { |
| #if defined(WIN32) |
| ereport(lev, |
| |
| /*------ |
| translator: %s is a noun phrase describing a child process, such as |
| "server process" */ |
| (errmsg("%s (PID %d) was terminated by exception 0x%X", |
| procname, pid, WTERMSIG(exitstatus)), |
| errhint("See C include file \"ntstatus.h\" for a description of the hexadecimal value."), |
| activity ? errdetail("Failed process was running: %s", activity) : 0)); |
| #else |
| ereport(lev, |
| |
| /*------ |
| translator: %s is a noun phrase describing a child process, such as |
| "server process" */ |
| (errmsg("%s (PID %d) was terminated by signal %d: %s", |
| procname, pid, WTERMSIG(exitstatus), |
| pg_strsignal(WTERMSIG(exitstatus))), |
| activity ? errdetail("Failed process was running: %s", activity) : 0)); |
| #endif |
| } |
| else |
| ereport(lev, |
| |
| /*------ |
| translator: %s is a noun phrase describing a child process, such as |
| "server process" */ |
| (errmsg("%s (PID %d) exited with unrecognized status %d", |
| procname, pid, exitstatus), |
| activity ? errdetail("Failed process was running: %s", activity) : 0)); |
| } |
| |
| /* |
| * Advance the postmaster's state machine and take actions as appropriate |
| * |
| * This is common code for pmdie(), reaper() and sigusr1_handler(), which |
| * receive the signals that might mean we need to change state. |
| */ |
| static void |
| PostmasterStateMachine(void) |
| { |
| /* If we're doing a smart shutdown, try to advance that state. */ |
| if (pmState == PM_RUN || pmState == PM_HOT_STANDBY) |
| { |
| if (connsAllowed == ALLOW_SUPERUSER_CONNS) |
| { |
| /* |
| * ALLOW_SUPERUSER_CONNS state ends as soon as online backup mode |
| * is not active. |
| */ |
| if (!BackupInProgress()) |
| connsAllowed = ALLOW_NO_CONNS; |
| } |
| |
| if (connsAllowed == ALLOW_NO_CONNS) |
| { |
| /* |
| * ALLOW_NO_CONNS state ends when we have no normal client |
| * backends running. Then we're ready to stop other children. |
| */ |
| if (CountChildren(BACKEND_TYPE_NORMAL) == 0) |
| pmState = PM_STOP_BACKENDS; |
| } |
| } |
| |
| /* |
| * If we're ready to do so, signal child processes to shut down. (This |
| * isn't a persistent state, but treating it as a distinct pmState allows |
| * us to share this code across multiple shutdown code paths.) |
| */ |
| if (pmState == PM_STOP_BACKENDS) |
| { |
| /* |
| * Forget any pending requests for background workers, since we're no |
| * longer willing to launch any new workers. (If additional requests |
| * arrive, BackgroundWorkerStateChange will reject them.) |
| */ |
| ForgetUnstartedBackgroundWorkers(); |
| |
| /* Signal all backend children except walsenders */ |
| SignalSomeChildren(SIGTERM, |
| BACKEND_TYPE_ALL - BACKEND_TYPE_WALSND); |
| /* and the autovac launcher too */ |
| if (AutoVacPID != 0) |
| signal_child(AutoVacPID, SIGTERM); |
| /* and the bgwriter too */ |
| if (BgWriterPID != 0) |
| signal_child(BgWriterPID, SIGTERM); |
| /* and the walwriter too */ |
| if (WalWriterPID != 0) |
| signal_child(WalWriterPID, SIGTERM); |
| /* If we're in recovery, also stop startup and walreceiver procs */ |
| if (StartupPID != 0) |
| signal_child(StartupPID, SIGTERM); |
| if (WalReceiverPID != 0) |
| signal_child(WalReceiverPID, SIGTERM); |
| /* stop login monitor */ |
| if (enable_password_profile && LoginMonitorPID != 0) |
| signal_child(LoginMonitorPID, SIGTERM); |
| /* checkpointer, archiver, stats, and syslogger may continue for now */ |
| |
| /* Now transition to PM_WAIT_BACKENDS state to wait for them to die */ |
| pmState = PM_WAIT_BACKENDS; |
| } |
| |
| /* |
| * If we are in a state-machine state that implies waiting for backends to |
| * exit, see if they're all gone, and change state if so. |
| */ |
| if (pmState == PM_WAIT_BACKENDS) |
| { |
| /* |
| * PM_WAIT_BACKENDS state ends when we have no regular backends |
| * (including autovac workers), no bgworkers (including unconnected |
| * ones), and no walwriter, autovac launcher or bgwriter. If we are |
| * doing crash recovery or an immediate shutdown then we expect the |
| * checkpointer to exit as well, otherwise not. The stats and |
| * syslogger processes are disregarded since they are not connected to |
| * shared memory; we also disregard dead_end children here. Walsenders |
| * and archiver are also disregarded, they will be terminated later |
| * after writing the checkpoint record. |
| */ |
| if (CountChildren(BACKEND_TYPE_ALL - BACKEND_TYPE_WALSND) == 0 && |
| StartupPID == 0 && |
| WalReceiverPID == 0 && |
| BgWriterPID == 0 && |
| (CheckpointerPID == 0 || |
| (!FatalError && Shutdown < ImmediateShutdown)) && |
| WalWriterPID == 0 && |
| AutoVacPID == 0 && |
| LoginMonitorPID == 0) |
| { |
| if (Shutdown >= ImmediateShutdown || FatalError) |
| { |
| /* |
| * Start waiting for dead_end children to die. This state |
| * change causes ServerLoop to stop creating new ones. |
| */ |
| pmState = PM_WAIT_DEAD_END; |
| |
| /* |
| * We already SIGQUIT'd the archiver and stats processes, if |
| * any, when we started immediate shutdown or entered |
| * FatalError state. |
| */ |
| } |
| else |
| { |
| /* |
| * If we get here, we are proceeding with normal shutdown. All |
| * the regular children are gone, and it's time to tell the |
| * checkpointer to do a shutdown checkpoint. |
| */ |
| Assert(Shutdown > NoShutdown); |
| /* Start the checkpointer if not running */ |
| if (CheckpointerPID == 0) |
| CheckpointerPID = StartCheckpointer(); |
| /* And tell it to shut down */ |
| if (CheckpointerPID != 0) |
| { |
| signal_child(CheckpointerPID, SIGUSR2); |
| pmState = PM_SHUTDOWN; |
| } |
| else |
| { |
| /* |
| * If we failed to fork a checkpointer, just shut down. |
| * Any required cleanup will happen at next restart. We |
| * set FatalError so that an "abnormal shutdown" message |
| * gets logged when we exit. |
| */ |
| FatalError = true; |
| pmState = PM_WAIT_DEAD_END; |
| |
| /* Kill the walsenders, archiver and stats collector too */ |
| SignalChildren(SIGQUIT); |
| if (PgArchPID != 0) |
| signal_child(PgArchPID, SIGQUIT); |
| if (PgStatPID != 0) |
| signal_child(PgStatPID, SIGQUIT); |
| } |
| } |
| } |
| } |
| |
| if (pmState == PM_SHUTDOWN_2) |
| { |
| /* |
| * PM_SHUTDOWN_2 state ends when there's no other children than |
| * dead_end children left. There shouldn't be any regular backends |
| * left by now anyway; what we're really waiting for is walsenders and |
| * archiver. |
| */ |
| if (PgArchPID == 0 && CountChildren(BACKEND_TYPE_ALL) == 0) |
| { |
| pmState = PM_WAIT_DEAD_END; |
| } |
| } |
| |
| if (pmState == PM_WAIT_DEAD_END) |
| { |
| /* |
| * PM_WAIT_DEAD_END state ends when the BackendList is entirely empty |
| * (ie, no dead_end children remain), and the archiver and stats |
| * collector are gone too. |
| * |
| * The reason we wait for those two is to protect them against a new |
| * postmaster starting conflicting subprocesses; this isn't an |
| * ironclad protection, but it at least helps in the |
| * shutdown-and-immediately-restart scenario. Note that they have |
| * already been sent appropriate shutdown signals, either during a |
| * normal state transition leading up to PM_WAIT_DEAD_END, or during |
| * FatalError processing. |
| */ |
| if (dlist_is_empty(&BackendList) && |
| PgArchPID == 0 && PgStatPID == 0) |
| { |
| /* These other guys should be dead already */ |
| Assert(StartupPID == 0); |
| Assert(WalReceiverPID == 0); |
| Assert(BgWriterPID == 0); |
| Assert(CheckpointerPID == 0); |
| Assert(WalWriterPID == 0); |
| Assert(AutoVacPID == 0); |
| Assert(LoginMonitorPID == 0); |
| /* syslogger is not considered here */ |
| pmState = PM_NO_CHILDREN; |
| } |
| } |
| |
| /* |
| * If we've been told to shut down, we exit as soon as there are no |
| * remaining children. If there was a crash, cleanup will occur at the |
| * next startup. (Before PostgreSQL 8.3, we tried to recover from the |
| * crash before exiting, but that seems unwise if we are quitting because |
| * we got SIGTERM from init --- there may well not be time for recovery |
| * before init decides to SIGKILL us.) |
| * |
| * Note that the syslogger continues to run. It will exit when it sees |
| * EOF on its input pipe, which happens when there are no more upstream |
| * processes. |
| */ |
| if (Shutdown > NoShutdown && pmState == PM_NO_CHILDREN) |
| { |
| if (FatalError) |
| { |
| ereport(LOG, (errmsg("abnormal database system shutdown"))); |
| ExitPostmaster(1); |
| } |
| else |
| { |
| /* |
| * Terminate exclusive backup mode to avoid recovery after a clean |
| * fast shutdown. Since an exclusive backup can only be taken |
| * during normal running (and not, for example, while running |
| * under Hot Standby) it only makes sense to do this if we reached |
| * normal running. If we're still in recovery, the backup file is |
| * one we're recovering *from*, and we must keep it around so that |
| * recovery restarts from the right place. |
| */ |
| if (ReachedNormalRunning) |
| CancelBackup(); |
| |
| /* |
| * Normal exit from the postmaster is here. We don't need to log |
| * anything here, since the UnlinkLockFiles proc_exit callback |
| * will do so, and that should be the last user-visible action. |
| */ |
| ExitPostmaster(0); |
| } |
| } |
| |
| /* |
| * If the startup process failed, or the user does not want an automatic |
| * restart after backend crashes, wait for all non-syslogger children to |
| * exit, and then exit postmaster. We don't try to reinitialize when the |
| * startup process fails, because more than likely it will just fail again |
| * and we will keep trying forever. |
| */ |
| if (pmState == PM_NO_CHILDREN) |
| { |
| if (StartupStatus == STARTUP_CRASHED) |
| { |
| ereport(LOG, |
| (errmsg("shutting down due to startup process failure"))); |
| ExitPostmaster(1); |
| } |
| if (!restart_after_crash) |
| { |
| ereport(LOG, |
| (errmsg("shutting down because restart_after_crash is off"))); |
| ExitPostmaster(1); |
| } |
| } |
| |
| /* |
| * If we need to recover from a crash, wait for all non-syslogger children |
| * to exit, then reset shmem and StartupDataBase. |
| */ |
| if (FatalError && pmState == PM_NO_CHILDREN) |
| { |
| ereport(LOG, |
| (errmsg("all server processes terminated; reinitializing"))); |
| |
| /* CDB: reload all auxiliary workers like FTS and DTX recover or GDD */ |
| load_auxiliary_libraries(); |
| |
| /* remove leftover temporary files after a crash */ |
| if (remove_temp_files_after_crash) |
| RemovePgTempFiles(); |
| |
| /* allow background workers to immediately restart */ |
| ResetBackgroundWorkerCrashTimes(); |
| |
| shmem_exit(1); |
| |
| /* re-read control file into local memory */ |
| LocalProcessControlFile(true); |
| |
| reset_shared(); |
| |
| StartupPID = StartupDataBase(); |
| Assert(StartupPID != 0); |
| StartupStatus = STARTUP_RUNNING; |
| pmState = PM_STARTUP; |
| /* crash recovery started, reset SIGKILL flag */ |
| AbortStartTime = 0; |
| |
| if (start_bgworkers_hook) |
| { |
| (*start_bgworkers_hook) (FatalError, pmState, do_start_bgworker); |
| } |
| } |
| } |
| |
| |
| /* |
| * Send a signal to a postmaster child process |
| * |
| * On systems that have setsid(), each child process sets itself up as a |
| * process group leader. For signals that are generally interpreted in the |
| * appropriate fashion, we signal the entire process group not just the |
| * direct child process. This allows us to, for example, SIGQUIT a blocked |
| * archive_recovery script, or SIGINT a script being run by a backend via |
| * system(). |
| * |
| * There is a race condition for recently-forked children: they might not |
| * have executed setsid() yet. So we signal the child directly as well as |
| * the group. We assume such a child will handle the signal before trying |
| * to spawn any grandchild processes. We also assume that signaling the |
| * child twice will not cause any problems. |
| */ |
| static void |
| signal_child(pid_t pid, int signal) |
| { |
| if (kill(pid, signal) < 0) |
| elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) pid, signal); |
| #ifdef HAVE_SETSID |
| switch (signal) |
| { |
| case SIGINT: |
| case SIGTERM: |
| case SIGQUIT: |
| case SIGSTOP: |
| case SIGKILL: |
| if (kill(-pid, signal) < 0) |
| elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) (-pid), signal); |
| break; |
| default: |
| break; |
| } |
| #endif |
| } |
| |
| /* |
| * Send a signal to the targeted children (but NOT special children; |
| * dead_end children are never signaled, either). |
| */ |
| static bool |
| SignalSomeChildren(int signal, int target) |
| { |
| dlist_iter iter; |
| bool signaled = false; |
| |
| dlist_foreach(iter, &BackendList) |
| { |
| Backend *bp = dlist_container(Backend, elem, iter.cur); |
| |
| if (bp->dead_end) |
| continue; |
| |
| /* |
| * Since target == BACKEND_TYPE_ALL is the most common case, we test |
| * it first and avoid touching shared memory for every child. |
| */ |
| if (target != BACKEND_TYPE_ALL) |
| { |
| /* |
| * Assign bkend_type for any recently announced WAL Sender |
| * processes. |
| */ |
| if (bp->bkend_type == BACKEND_TYPE_NORMAL && |
| IsPostmasterChildWalSender(bp->child_slot)) |
| bp->bkend_type = BACKEND_TYPE_WALSND; |
| |
| if (!(target & bp->bkend_type)) |
| continue; |
| } |
| |
| ereport(DEBUG4, |
| (errmsg_internal("sending signal %d to process %d", |
| signal, (int) bp->pid))); |
| signal_child(bp->pid, signal); |
| signaled = true; |
| } |
| return signaled; |
| } |
| |
| /* |
| * Send a termination signal to children. This considers all of our children |
| * processes, except syslogger and dead_end backends. |
| */ |
| static void |
| TerminateChildren(int signal) |
| { |
| SignalChildren(signal); |
| if (StartupPID != 0) |
| { |
| signal_child(StartupPID, signal); |
| if (signal == SIGQUIT || signal == SIGKILL) |
| StartupStatus = STARTUP_SIGNALED; |
| } |
| if (BgWriterPID != 0) |
| signal_child(BgWriterPID, signal); |
| if (CheckpointerPID != 0) |
| signal_child(CheckpointerPID, signal); |
| if (WalWriterPID != 0) |
| signal_child(WalWriterPID, signal); |
| if (WalReceiverPID != 0) |
| signal_child(WalReceiverPID, signal); |
| if (AutoVacPID != 0) |
| signal_child(AutoVacPID, signal); |
| if (PgArchPID != 0) |
| signal_child(PgArchPID, signal); |
| if (PgStatPID != 0) |
| signal_child(PgStatPID, signal); |
| if (enable_password_profile && LoginMonitorPID != 0) |
| signal_child(LoginMonitorPID, signal); |
| } |
| |
| /* |
| * BackendStartup -- start backend process |
| * |
| * returns: STATUS_ERROR if the fork failed, STATUS_OK otherwise. |
| * |
| * Note: if you change this code, also consider StartAutovacuumWorker. |
| */ |
| static int |
| BackendStartup(Port *port) |
| { |
| Backend *bn; /* for backend cleanup */ |
| pid_t pid; |
| |
| /* |
| * Create backend data structure. Better before the fork() so we can |
| * handle failure cleanly. |
| */ |
| bn = (Backend *) malloc(sizeof(Backend)); |
| if (!bn) |
| { |
| ereport(LOG, |
| (errcode(ERRCODE_OUT_OF_MEMORY), |
| errmsg("out of memory"))); |
| return STATUS_ERROR; |
| } |
| |
| /* |
| * Compute the cancel key that will be assigned to this backend. The |
| * backend will have its own copy in the forked-off process' value of |
| * MyCancelKey, so that it can transmit the key to the frontend. |
| */ |
| if (!RandomCancelKey(&MyCancelKey)) |
| { |
| free(bn); |
| ereport(LOG, |
| (errcode(ERRCODE_INTERNAL_ERROR), |
| errmsg("could not generate random cancel key"))); |
| return STATUS_ERROR; |
| } |
| |
| bn->cancel_key = MyCancelKey; |
| |
| /* Pass down canAcceptConnections state */ |
| port->canAcceptConnections = canAcceptConnections(BACKEND_TYPE_NORMAL); |
| bn->dead_end = (port->canAcceptConnections != CAC_OK && |
| port->canAcceptConnections != CAC_SUPERUSER && |
| port->canAcceptConnections != CAC_MIRROR_READY); |
| |
| /* |
| * Unless it's a dead_end child, assign it a child slot number |
| */ |
| if (!bn->dead_end) |
| bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot(); |
| else |
| bn->child_slot = 0; |
| |
| /* Hasn't asked to be notified about any bgworkers yet */ |
| bn->bgworker_notify = false; |
| |
| #ifdef EXEC_BACKEND |
| pid = backend_forkexec(port); |
| #else /* !EXEC_BACKEND */ |
| pid = fork_process(); |
| if (pid == 0) /* child */ |
| { |
| free(bn); |
| |
| /* Detangle from postmaster */ |
| InitPostmasterChild(); |
| |
| /* Close the postmaster's sockets */ |
| ClosePostmasterPorts(false); |
| |
| /* Perform additional initialization and collect startup packet */ |
| BackendInitialize(port); |
| |
| /* And run the backend */ |
| BackendRun(port); |
| } |
| #endif /* EXEC_BACKEND */ |
| |
| if (pid < 0) |
| { |
| /* in parent, fork failed */ |
| int save_errno = errno; |
| |
| if (!bn->dead_end) |
| (void) ReleasePostmasterChildSlot(bn->child_slot); |
| free(bn); |
| errno = save_errno; |
| ereport(LOG, |
| (errmsg("could not fork new process for connection: %m"))); |
| report_fork_failure_to_client(port, save_errno); |
| return STATUS_ERROR; |
| } |
| |
| /* in parent, successful fork */ |
| ereport(DEBUG2, |
| (errmsg_internal("forked new backend, pid=%d socket=%d", |
| (int) pid, (int) port->sock))); |
| |
| /* |
| * Everything's been successful, it's safe to add this backend to our list |
| * of backends. |
| */ |
| bn->pid = pid; |
| bn->bkend_type = BACKEND_TYPE_NORMAL; /* Can change later to WALSND */ |
| dlist_push_head(&BackendList, &bn->elem); |
| |
| #ifdef EXEC_BACKEND |
| if (!bn->dead_end) |
| ShmemBackendArrayAdd(bn); |
| #endif |
| |
| return STATUS_OK; |
| } |
| |
| /* |
| * Try to report backend fork() failure to client before we close the |
| * connection. Since we do not care to risk blocking the postmaster on |
| * this connection, we set the connection to non-blocking and try only once. |
| * |
| * This is grungy special-purpose code; we cannot use backend libpq since |
| * it's not up and running. |
| */ |
| static void |
| report_fork_failure_to_client(Port *port, int errnum) |
| { |
| char buffer[1000]; |
| int rc; |
| |
| /* Format the error message packet (always V2 protocol) */ |
| snprintf(buffer, sizeof(buffer), "E%s%s\n", |
| _("could not fork new process for connection: "), |
| strerror(errnum)); |
| |
| /* Set port to non-blocking. Don't do send() if this fails */ |
| if (!pg_set_noblock(port->sock)) |
| return; |
| |
| /* We'll retry after EINTR, but ignore all other failures */ |
| do |
| { |
| rc = send(port->sock, buffer, strlen(buffer) + 1, 0); |
| } while (rc < 0 && errno == EINTR); |
| } |
| |
| |
| /* |
| * BackendInitialize -- initialize an interactive (postmaster-child) |
| * backend process, and collect the client's startup packet. |
| * |
| * returns: nothing. Will not return at all if there's any failure. |
| * |
| * Note: this code does not depend on having any access to shared memory. |
| * Indeed, our approach to SIGTERM/timeout handling *requires* that |
| * shared memory not have been touched yet; see comments within. |
| * In the EXEC_BACKEND case, we are physically attached to shared memory |
| * but have not yet set up most of our local pointers to shmem structures. |
| */ |
| static void |
| BackendInitialize(Port *port) |
| { |
| int status; |
| int ret; |
| char remote_host[NI_MAXHOST]; |
| char remote_port[NI_MAXSERV]; |
| StringInfoData ps_data; |
| |
| /* Save port etc. for ps status */ |
| MyProcPort = port; |
| |
| /* Tell fd.c about the long-lived FD associated with the port */ |
| ReserveExternalFD(); |
| |
| /* |
| * PreAuthDelay is a debugging aid for investigating problems in the |
| * authentication cycle: it can be set in postgresql.conf to allow time to |
| * attach to the newly-forked backend with a debugger. (See also |
| * PostAuthDelay, which we allow clients to pass through PGOPTIONS, but it |
| * is not honored until after authentication.) |
| */ |
| if (PreAuthDelay > 0) |
| pg_usleep(PreAuthDelay * 1000000L); |
| |
| /* This flag will remain set until InitPostgres finishes authentication */ |
| ClientAuthInProgress = true; /* limit visibility of log messages */ |
| |
| /* set these to empty in case they are needed before we set them up */ |
| port->remote_host = ""; |
| port->remote_port = ""; |
| |
| /* |
| * Initialize libpq and enable reporting of ereport errors to the client. |
| * Must do this now because authentication uses libpq to send messages. |
| */ |
| pq_init(); /* initialize libpq to talk to client */ |
| whereToSendOutput = DestRemote; /* now safe to ereport to client */ |
| |
| /* |
| * We arrange to do _exit(1) if we receive SIGTERM or timeout while trying |
| * to collect the startup packet; while SIGQUIT results in _exit(2). |
| * Otherwise the postmaster cannot shutdown the database FAST or IMMED |
| * cleanly if a buggy client fails to send the packet promptly. |
| * |
| * Exiting with _exit(1) is only possible because we have not yet touched |
| * shared memory; therefore no outside-the-process state needs to get |
| * cleaned up. |
| */ |
| pqsignal(SIGTERM, process_startup_packet_die); |
| /* SIGQUIT handler was already set up by InitPostmasterChild */ |
| InitializeTimeouts(); /* establishes SIGALRM handler */ |
| PG_SETMASK(&StartupBlockSig); |
| |
| /* |
| * Get the remote host name and port for logging and status display. |
| */ |
| remote_host[0] = '\0'; |
| remote_port[0] = '\0'; |
| if ((ret = pg_getnameinfo_all(&port->raddr.addr, port->raddr.salen, |
| remote_host, sizeof(remote_host), |
| remote_port, sizeof(remote_port), |
| (log_hostname ? 0 : NI_NUMERICHOST) | NI_NUMERICSERV)) != 0) |
| ereport(WARNING, |
| (errmsg_internal("pg_getnameinfo_all() failed: %s", |
| gai_strerror(ret)))); |
| |
| /* |
| * Save remote_host and remote_port in port structure (after this, they |
| * will appear in log_line_prefix data for log messages). |
| */ |
| port->remote_host = strdup(remote_host); |
| port->remote_port = strdup(remote_port); |
| |
| /* And now we can issue the Log_connections message, if wanted */ |
| if (Log_connections) |
| { |
| if (remote_port[0]) |
| ereport(LOG, |
| (errmsg("connection received: host=%s port=%s", |
| remote_host, |
| remote_port))); |
| else |
| ereport(LOG, |
| (errmsg("connection received: host=%s", |
| remote_host))); |
| } |
| |
| /* |
| * If we did a reverse lookup to name, we might as well save the results |
| * rather than possibly repeating the lookup during authentication. |
| * |
| * Note that we don't want to specify NI_NAMEREQD above, because then we'd |
| * get nothing useful for a client without an rDNS entry. Therefore, we |
| * must check whether we got a numeric IPv4 or IPv6 address, and not save |
| * it into remote_hostname if so. (This test is conservative and might |
| * sometimes classify a hostname as numeric, but an error in that |
| * direction is safe; it only results in a possible extra lookup.) |
| */ |
| if (log_hostname && |
| ret == 0 && |
| strspn(remote_host, "0123456789.") < strlen(remote_host) && |
| strspn(remote_host, "0123456789ABCDEFabcdef:") < strlen(remote_host)) |
| port->remote_hostname = strdup(remote_host); |
| |
| /* |
| * Ready to begin client interaction. We will give up and _exit(1) after |
| * a time delay, so that a broken client can't hog a connection |
| * indefinitely. PreAuthDelay and any DNS interactions above don't count |
| * against the time limit. |
| * |
| * Note: AuthenticationTimeout is applied here while waiting for the |
| * startup packet, and then again in InitPostgres for the duration of any |
| * authentication operations. So a hostile client could tie up the |
| * process for nearly twice AuthenticationTimeout before we kick him off. |
| * |
| * Note: because PostgresMain will call InitializeTimeouts again, the |
| * registration of STARTUP_PACKET_TIMEOUT will be lost. This is okay |
| * since we never use it again after this function. |
| */ |
| RegisterTimeout(STARTUP_PACKET_TIMEOUT, StartupPacketTimeoutHandler); |
| enable_timeout_after(STARTUP_PACKET_TIMEOUT, AuthenticationTimeout * 1000); |
| |
| /* |
| * Receive the startup packet (which might turn out to be a cancel request |
| * packet). |
| */ |
| status = ProcessStartupPacket(port, false, false); |
| |
| /* |
| * Disable the timeout, and prevent SIGTERM again. |
| */ |
| disable_timeout(STARTUP_PACKET_TIMEOUT, false); |
| PG_SETMASK(&BlockSig); |
| |
| /* |
| * As a safety check that nothing in startup has yet performed |
| * shared-memory modifications that would need to be undone if we had |
| * exited through SIGTERM or timeout above, check that no on_shmem_exit |
| * handlers have been registered yet. (This isn't terribly bulletproof, |
| * since someone might misuse an on_proc_exit handler for shmem cleanup, |
| * but it's a cheap and helpful check. We cannot disallow on_proc_exit |
| * handlers unfortunately, since pq_init() already registered one.) |
| */ |
| check_on_shmem_exit_lists_are_empty(); |
| |
| /* |
| * Stop here if it was bad or a cancel packet. ProcessStartupPacket |
| * already did any appropriate error reporting. |
| */ |
| if (status != STATUS_OK) |
| proc_exit(0); |
| |
| /* |
| * Now that we have the user and database name, we can set the process |
| * title for ps. It's good to do this as early as possible in startup. |
| */ |
| initStringInfo(&ps_data); |
| if (am_walsender) |
| { |
| appendStringInfo(&ps_data, "%s ", GetBackendTypeDesc(B_WAL_SENDER)); |
| set_ps_display_username(GetBackendTypeDesc(B_WAL_SENDER)); |
| } |
| else if (am_ftshandler) |
| { |
| appendStringInfo(&ps_data, "%s ", "fts handler process"); |
| set_ps_display_username("fts handler process"); |
| } |
| else if (am_faulthandler) |
| { |
| appendStringInfo(&ps_data, "%s ", "fault handler process"); |
| set_ps_display_username("fault handler process"); |
| } |
| else |
| { |
| set_ps_display_username(port->user_name); |
| } |
| |
| appendStringInfo(&ps_data, "%s ", port->user_name); |
| if (!am_walsender && !am_ftshandler && !am_faulthandler) |
| appendStringInfo(&ps_data, "%s ", port->database_name); |
| appendStringInfo(&ps_data, "%s", port->remote_host); |
| if (port->remote_port[0] != '\0') |
| appendStringInfo(&ps_data, "(%s)", port->remote_port); |
| |
| init_ps_display(ps_data.data); |
| pfree(ps_data.data); |
| |
| set_ps_display("initializing"); |
| } |
| |
| |
| /* |
| * BackendRun -- set up the backend's argument list and invoke PostgresMain() |
| * |
| * returns: |
| * Doesn't return at all. |
| */ |
| static void |
| BackendRun(Port *port) |
| { |
| char *av[2]; |
| const int ac = 1; |
| |
| av[0] = "postgres"; |
| av[1] = NULL; |
| |
| /* |
| * Make sure we aren't in PostmasterContext anymore. (We can't delete it |
| * just yet, though, because InitPostgres will need the HBA data.) |
| */ |
| MemoryContextSwitchTo(TopMemoryContext); |
| |
| PostgresMain(ac, av, port->database_name, port->user_name); |
| } |
| |
| |
| #ifdef EXEC_BACKEND |
| |
| /* |
| * postmaster_forkexec -- fork and exec a postmaster subprocess |
| * |
| * The caller must have set up the argv array already, except for argv[2] |
| * which will be filled with the name of the temp variable file. |
| * |
| * Returns the child process PID, or -1 on fork failure (a suitable error |
| * message has been logged on failure). |
| * |
| * All uses of this routine will dispatch to SubPostmasterMain in the |
| * child process. |
| */ |
| pid_t |
| postmaster_forkexec(int argc, char *argv[]) |
| { |
| Port port; |
| |
| /* This entry point passes dummy values for the Port variables */ |
| memset(&port, 0, sizeof(port)); |
| return internal_forkexec(argc, argv, &port); |
| } |
| |
| /* |
| * backend_forkexec -- fork/exec off a backend process |
| * |
| * Some operating systems (WIN32) don't have fork() so we have to simulate |
| * it by storing parameters that need to be passed to the child and |
| * then create a new child process. |
| * |
| * returns the pid of the fork/exec'd process, or -1 on failure |
| */ |
| static pid_t |
| backend_forkexec(Port *port) |
| { |
| char *av[4]; |
| int ac = 0; |
| |
| av[ac++] = "postgres"; |
| av[ac++] = "--forkbackend"; |
| av[ac++] = NULL; /* filled in by internal_forkexec */ |
| |
| av[ac] = NULL; |
| Assert(ac < lengthof(av)); |
| |
| return internal_forkexec(ac, av, port); |
| } |
| |
| #ifndef WIN32 |
| |
| /* |
| * internal_forkexec non-win32 implementation |
| * |
| * - writes out backend variables to the parameter file |
| * - fork():s, and then exec():s the child process |
| */ |
| static pid_t |
| internal_forkexec(int argc, char *argv[], Port *port) |
| { |
| static unsigned long tmpBackendFileNum = 0; |
| pid_t pid; |
| char tmpfilename[MAXPGPATH]; |
| BackendParameters param; |
| FILE *fp; |
| |
| if (!save_backend_variables(¶m, port)) |
| return -1; /* log made by save_backend_variables */ |
| |
| /* Calculate name for temp file */ |
| snprintf(tmpfilename, MAXPGPATH, "%s/%s.backend_var.%d.%lu", |
| PG_TEMP_FILES_DIR, PG_TEMP_FILE_PREFIX, |
| MyProcPid, ++tmpBackendFileNum); |
| |
| /* Open file */ |
| fp = AllocateFile(tmpfilename, PG_BINARY_W); |
| if (!fp) |
| { |
| /* |
| * As in OpenTemporaryFileInTablespace, try to make the temp-file |
| * directory, ignoring errors. |
| */ |
| (void) MakePGDirectory(PG_TEMP_FILES_DIR); |
| |
| fp = AllocateFile(tmpfilename, PG_BINARY_W); |
| if (!fp) |
| { |
| ereport(LOG, |
| (errcode_for_file_access(), |
| errmsg("could not create file \"%s\": %m", |
| tmpfilename))); |
| return -1; |
| } |
| } |
| |
| if (fwrite(¶m, sizeof(param), 1, fp) != 1) |
| { |
| ereport(LOG, |
| (errcode_for_file_access(), |
| errmsg("could not write to file \"%s\": %m", tmpfilename))); |
| FreeFile(fp); |
| return -1; |
| } |
| |
| /* Release file */ |
| if (FreeFile(fp)) |
| { |
| ereport(LOG, |
| (errcode_for_file_access(), |
| errmsg("could not write to file \"%s\": %m", tmpfilename))); |
| return -1; |
| } |
| |
| /* Make sure caller set up argv properly */ |
| Assert(argc >= 3); |
| Assert(argv[argc] == NULL); |
| Assert(strncmp(argv[1], "--fork", 6) == 0); |
| Assert(argv[2] == NULL); |
| |
| /* Insert temp file name after --fork argument */ |
| argv[2] = tmpfilename; |
| |
| /* Fire off execv in child */ |
| if ((pid = fork_process()) == 0) |
| { |
| if (execv(postgres_exec_path, argv) < 0) |
| { |
| ereport(LOG, |
| (errmsg("could not execute server process \"%s\": %m", |
| postgres_exec_path))); |
| /* We're already in the child process here, can't return */ |
| exit(1); |
| } |
| } |
| |
| return pid; /* Parent returns pid, or -1 on fork failure */ |
| } |
| #else /* WIN32 */ |
| |
| /* |
| * internal_forkexec win32 implementation |
| * |
| * - starts backend using CreateProcess(), in suspended state |
| * - writes out backend variables to the parameter file |
| * - during this, duplicates handles and sockets required for |
| * inheritance into the new process |
| * - resumes execution of the new process once the backend parameter |
| * file is complete. |
| */ |
| static pid_t |
| internal_forkexec(int argc, char *argv[], Port *port) |
| { |
| int retry_count = 0; |
| STARTUPINFO si; |
| PROCESS_INFORMATION pi; |
| int i; |
| int j; |
| char cmdLine[MAXPGPATH * 2]; |
| HANDLE paramHandle; |
| BackendParameters *param; |
| SECURITY_ATTRIBUTES sa; |
| char paramHandleStr[32]; |
| win32_deadchild_waitinfo *childinfo; |
| |
| /* Make sure caller set up argv properly */ |
| Assert(argc >= 3); |
| Assert(argv[argc] == NULL); |
| Assert(strncmp(argv[1], "--fork", 6) == 0); |
| Assert(argv[2] == NULL); |
| |
| /* Resume here if we need to retry */ |
| retry: |
| |
| /* Set up shared memory for parameter passing */ |
| ZeroMemory(&sa, sizeof(sa)); |
| sa.nLength = sizeof(sa); |
| sa.bInheritHandle = TRUE; |
| paramHandle = CreateFileMapping(INVALID_HANDLE_VALUE, |
| &sa, |
| PAGE_READWRITE, |
| 0, |
| sizeof(BackendParameters), |
| NULL); |
| if (paramHandle == INVALID_HANDLE_VALUE) |
| { |
| ereport(LOG, |
| (errmsg("could not create backend parameter file mapping: error code %lu", |
| GetLastError()))); |
| return -1; |
| } |
| |
| param = MapViewOfFile(paramHandle, FILE_MAP_WRITE, 0, 0, sizeof(BackendParameters)); |
| if (!param) |
| { |
| ereport(LOG, |
| (errmsg("could not map backend parameter memory: error code %lu", |
| GetLastError()))); |
| CloseHandle(paramHandle); |
| return -1; |
| } |
| |
| /* Insert temp file name after --fork argument */ |
| #ifdef _WIN64 |
| sprintf(paramHandleStr, "%llu", (LONG_PTR) paramHandle); |
| #else |
| sprintf(paramHandleStr, "%lu", (DWORD) paramHandle); |
| #endif |
| argv[2] = paramHandleStr; |
| |
| /* Format the cmd line */ |
| cmdLine[sizeof(cmdLine) - 1] = '\0'; |
| cmdLine[sizeof(cmdLine) - 2] = '\0'; |
| snprintf(cmdLine, sizeof(cmdLine) - 1, "\"%s\"", postgres_exec_path); |
| i = 0; |
| while (argv[++i] != NULL) |
| { |
| j = strlen(cmdLine); |
| snprintf(cmdLine + j, sizeof(cmdLine) - 1 - j, " \"%s\"", argv[i]); |
| } |
| if (cmdLine[sizeof(cmdLine) - 2] != '\0') |
| { |
| ereport(LOG, |
| (errmsg("subprocess command line too long"))); |
| UnmapViewOfFile(param); |
| CloseHandle(paramHandle); |
| return -1; |
| } |
| |
| memset(&pi, 0, sizeof(pi)); |
| memset(&si, 0, sizeof(si)); |
| si.cb = sizeof(si); |
| |
| /* |
| * Create the subprocess in a suspended state. This will be resumed later, |
| * once we have written out the parameter file. |
| */ |
| if (!CreateProcess(NULL, cmdLine, NULL, NULL, TRUE, CREATE_SUSPENDED, |
| NULL, NULL, &si, &pi)) |
| { |
| ereport(LOG, |
| (errmsg("CreateProcess() call failed: %m (error code %lu)", |
| GetLastError()))); |
| UnmapViewOfFile(param); |
| CloseHandle(paramHandle); |
| return -1; |
| } |
| |
| if (!save_backend_variables(param, port, pi.hProcess, pi.dwProcessId)) |
| { |
| /* |
| * log made by save_backend_variables, but we have to clean up the |
| * mess with the half-started process |
| */ |
| if (!TerminateProcess(pi.hProcess, 255)) |
| ereport(LOG, |
| (errmsg_internal("could not terminate unstarted process: error code %lu", |
| GetLastError()))); |
| CloseHandle(pi.hProcess); |
| CloseHandle(pi.hThread); |
| UnmapViewOfFile(param); |
| CloseHandle(paramHandle); |
| return -1; /* log made by save_backend_variables */ |
| } |
| |
| /* Drop the parameter shared memory that is now inherited to the backend */ |
| if (!UnmapViewOfFile(param)) |
| ereport(LOG, |
| (errmsg("could not unmap view of backend parameter file: error code %lu", |
| GetLastError()))); |
| if (!CloseHandle(paramHandle)) |
| ereport(LOG, |
| (errmsg("could not close handle to backend parameter file: error code %lu", |
| GetLastError()))); |
| |
| /* |
| * Reserve the memory region used by our main shared memory segment before |
| * we resume the child process. Normally this should succeed, but if ASLR |
| * is active then it might sometimes fail due to the stack or heap having |
| * gotten mapped into that range. In that case, just terminate the |
| * process and retry. |
| */ |
| if (!pgwin32_ReserveSharedMemoryRegion(pi.hProcess)) |
| { |
| /* pgwin32_ReserveSharedMemoryRegion already made a log entry */ |
| if (!TerminateProcess(pi.hProcess, 255)) |
| ereport(LOG, |
| (errmsg_internal("could not terminate process that failed to reserve memory: error code %lu", |
| GetLastError()))); |
| CloseHandle(pi.hProcess); |
| CloseHandle(pi.hThread); |
| if (++retry_count < 100) |
| goto retry; |
| ereport(LOG, |
| (errmsg("giving up after too many tries to reserve shared memory"), |
| errhint("This might be caused by ASLR or antivirus software."))); |
| return -1; |
| } |
| |
| /* |
| * Now that the backend variables are written out, we start the child |
| * thread so it can start initializing while we set up the rest of the |
| * parent state. |
| */ |
| if (ResumeThread(pi.hThread) == -1) |
| { |
| if (!TerminateProcess(pi.hProcess, 255)) |
| { |
| ereport(LOG, |
| (errmsg_internal("could not terminate unstartable process: error code %lu", |
| GetLastError()))); |
| CloseHandle(pi.hProcess); |
| CloseHandle(pi.hThread); |
| return -1; |
| } |
| CloseHandle(pi.hProcess); |
| CloseHandle(pi.hThread); |
| ereport(LOG, |
| (errmsg_internal("could not resume thread of unstarted process: error code %lu", |
| GetLastError()))); |
| return -1; |
| } |
| |
| /* |
| * Queue a waiter to signal when this child dies. The wait will be handled |
| * automatically by an operating system thread pool. |
| * |
| * Note: use malloc instead of palloc, since it needs to be thread-safe. |
| * Struct will be free():d from the callback function that runs on a |
| * different thread. |
| */ |
| childinfo = malloc(sizeof(win32_deadchild_waitinfo)); |
| if (!childinfo) |
| ereport(FATAL, |
| (errcode(ERRCODE_OUT_OF_MEMORY), |
| errmsg("out of memory"))); |
| |
| childinfo->procHandle = pi.hProcess; |
| childinfo->procId = pi.dwProcessId; |
| |
| if (!RegisterWaitForSingleObject(&childinfo->waitHandle, |
| pi.hProcess, |
| pgwin32_deadchild_callback, |
| childinfo, |
| INFINITE, |
| WT_EXECUTEONLYONCE | WT_EXECUTEINWAITTHREAD)) |
| ereport(FATAL, |
| (errmsg_internal("could not register process for wait: error code %lu", |
| GetLastError()))); |
| |
| /* Don't close pi.hProcess here - the wait thread needs access to it */ |
| |
| CloseHandle(pi.hThread); |
| |
| return pi.dwProcessId; |
| } |
| #endif /* WIN32 */ |
| |
| /* |
| * SubPostmasterMain -- Get the fork/exec'd process into a state equivalent |
| * to what it would be if we'd simply forked on Unix, and then |
| * dispatch to the appropriate place. |
| * |
| * The first two command line arguments are expected to be "--forkFOO" |
| * (where FOO indicates which postmaster child we are to become), and |
| * the name of a variables file that we can read to load data that would |
| * have been inherited by fork() on Unix. Remaining arguments go to the |
| * subprocess FooMain() routine. |
| */ |
| void |
| SubPostmasterMain(int argc, char *argv[]) |
| { |
| Port port; |
| |
| /* In EXEC_BACKEND case we will not have inherited these settings */ |
| IsPostmasterEnvironment = true; |
| whereToSendOutput = DestNone; |
| |
| /* Setup essential subsystems (to ensure elog() behaves sanely) */ |
| InitializeGUCOptions(); |
| |
| /* Check we got appropriate args */ |
| if (argc < 3) |
| elog(FATAL, "invalid subpostmaster invocation"); |
| |
| /* Read in the variables file */ |
| memset(&port, 0, sizeof(Port)); |
| read_backend_variables(argv[2], &port); |
| |
| /* Close the postmaster's sockets (as soon as we know them) */ |
| ClosePostmasterPorts(strcmp(argv[1], "--forklog") == 0); |
| |
| /* |
| * Start our win32 signal implementation. This has to be done after we |
| * read the backend variables, because we need to pick up the signal pipe |
| * from the parent process. |
| */ |
| #ifdef WIN32 |
| pgwin32_signal_initialize(); |
| #endif |
| |
| /* Setup as postmaster child */ |
| InitPostmasterChild(); |
| |
| /* |
| * If appropriate, physically re-attach to shared memory segment. We want |
| * to do this before going any further to ensure that we can attach at the |
| * same address the postmaster used. On the other hand, if we choose not |
| * to re-attach, we may have other cleanup to do. |
| * |
| * If testing EXEC_BACKEND on Linux, you should run this as root before |
| * starting the postmaster: |
| * |
| * echo 0 >/proc/sys/kernel/randomize_va_space |
| * |
| * This prevents using randomized stack and code addresses that cause the |
| * child process's memory map to be different from the parent's, making it |
| * sometimes impossible to attach to shared memory at the desired address. |
| * Return the setting to its old value (usually '1' or '2') when finished. |
| */ |
| if (strcmp(argv[1], "--forkbackend") == 0 || |
| strcmp(argv[1], "--forkavlauncher") == 0 || |
| strcmp(argv[1], "--forkavworker") == 0 || |
| strcmp(argv[1], "--forkautovac") == 0 || |
| strcmp(argv[1], "--forkglobaldeadlockdetector") == 0 || |
| strcmp(argv[1], "--forkboot") == 0 || |
| strncmp(argv[1], "--forkbgworker=", 15) == 0) |
| PGSharedMemoryReAttach(); |
| else |
| PGSharedMemoryNoReAttach(); |
| |
| /* autovacuum needs this set before calling InitProcess */ |
| if (strcmp(argv[1], "--forkavlauncher") == 0) |
| AutovacuumLauncherIAm(); |
| if (strcmp(argv[1], "--forkavworker") == 0) |
| AutovacuumWorkerIAm(); |
| |
| /* Read in remaining GUC variables */ |
| read_nondefault_variables(); |
| |
| /* |
| * CDB: gpdb auxilary process like fts probe, dtx recovery process is |
| * essential, we need to load them ahead of custom shared preload libraries |
| * to avoid exceeding max_worker_processes. |
| */ |
| load_auxiliary_libraries(); |
| |
| /* |
| * Check that the data directory looks valid, which will also check the |
| * privileges on the data directory and update our umask and file/group |
| * variables for creating files later. Note: this should really be done |
| * before we create any files or directories. |
| */ |
| checkDataDir(); |
| |
| /* |
| * (re-)read control file, as it contains config. The postmaster will |
| * already have read this, but this process doesn't know about that. |
| */ |
| LocalProcessControlFile(false); |
| |
| /* |
| * Reload any libraries that were preloaded by the postmaster. Since we |
| * exec'd this process, those libraries didn't come along with us; but we |
| * should load them into all child processes to be consistent with the |
| * non-EXEC_BACKEND behavior. |
| */ |
| process_shared_preload_libraries(); |
| |
| /* Run backend or appropriate child */ |
| if (strcmp(argv[1], "--forkbackend") == 0) |
| { |
| Assert(argc == 3); /* shouldn't be any more args */ |
| |
| /* |
| * Need to reinitialize the SSL library in the backend, since the |
| * context structures contain function pointers and cannot be passed |
| * through the parameter file. |
| * |
| * If for some reason reload fails (maybe the user installed broken |
| * key files), soldier on without SSL; that's better than all |
| * connections becoming impossible. |
| * |
| * XXX should we do this in all child processes? For the moment it's |
| * enough to do it in backend children. |
| */ |
| #ifdef USE_SSL |
| if (EnableSSL) |
| { |
| if (secure_initialize(false) == 0) |
| LoadedSSL = true; |
| else |
| ereport(LOG, |
| (errmsg("SSL configuration could not be loaded in child process"))); |
| } |
| #endif |
| |
| /* |
| * Perform additional initialization and collect startup packet. |
| * |
| * We want to do this before InitProcess() for a couple of reasons: 1. |
| * so that we aren't eating up a PGPROC slot while waiting on the |
| * client. 2. so that if InitProcess() fails due to being out of |
| * PGPROC slots, we have already initialized libpq and are able to |
| * report the error to the client. |
| */ |
| BackendInitialize(&port); |
| |
| /* Restore basic shared memory pointers */ |
| InitShmemAccess(UsedShmemSegAddr); |
| |
| /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */ |
| InitProcess(); |
| |
| /* Attach process to shared data structures */ |
| CreateSharedMemoryAndSemaphores(); |
| |
| /* And run the backend */ |
| BackendRun(&port); /* does not return */ |
| } |
| if (strcmp(argv[1], "--forkboot") == 0) |
| { |
| /* Restore basic shared memory pointers */ |
| InitShmemAccess(UsedShmemSegAddr); |
| |
| /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */ |
| InitAuxiliaryProcess(); |
| |
| /* Attach process to shared data structures */ |
| CreateSharedMemoryAndSemaphores(); |
| |
| AuxiliaryProcessMain(argc - 2, argv + 2); /* does not return */ |
| } |
| if (strcmp(argv[1], "--forkavlauncher") == 0) |
| { |
| /* Restore basic shared memory pointers */ |
| InitShmemAccess(UsedShmemSegAddr); |
| |
| /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */ |
| InitProcess(); |
| |
| /* Attach process to shared data structures */ |
| CreateSharedMemoryAndSemaphores(); |
| |
| AutoVacLauncherMain(argc - 2, argv + 2); /* does not return */ |
| } |
| if (strcmp(argv[1], "--forkavworker") == 0) |
| { |
| /* Restore basic shared memory pointers */ |
| InitShmemAccess(UsedShmemSegAddr); |
| |
| /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */ |
| InitProcess(); |
| |
| /* Attach process to shared data structures */ |
| CreateSharedMemoryAndSemaphores(); |
| |
| AutoVacWorkerMain(argc - 2, argv + 2); /* does not return */ |
| } |
| if (strncmp(argv[1], "--forkbgworker=", 15) == 0) |
| { |
| int shmem_slot; |
| |
| /* do this as early as possible; in particular, before InitProcess() */ |
| IsBackgroundWorker = true; |
| |
| /* Restore basic shared memory pointers */ |
| InitShmemAccess(UsedShmemSegAddr); |
| |
| /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */ |
| InitProcess(); |
| |
| /* Attach process to shared data structures */ |
| CreateSharedMemoryAndSemaphores(); |
| |
| /* Fetch MyBgworkerEntry from shared memory */ |
| shmem_slot = atoi(argv[1] + 15); |
| MyBgworkerEntry = BackgroundWorkerEntry(shmem_slot); |
| |
| StartBackgroundWorker(); |
| } |
| if (strcmp(argv[1], "--forkcol") == 0) |
| { |
| /* Do not want to attach to shared memory */ |
| |
| PgstatCollectorMain(argc, argv); /* does not return */ |
| } |
| if (strcmp(argv[1], "--forklog") == 0) |
| { |
| /* Do not want to attach to shared memory */ |
| |
| SysLoggerMain(argc, argv); /* does not return */ |
| } |
| |
| abort(); /* shouldn't get here */ |
| } |
| #endif /* EXEC_BACKEND */ |
| |
| |
| /* |
| * ExitPostmaster -- cleanup |
| * |
| * Do NOT call exit() directly --- always go through here! |
| */ |
| static void |
| ExitPostmaster(int status) |
| { |
| #ifdef HAVE_PTHREAD_IS_THREADED_NP |
| |
| /* |
| * There is no known cause for a postmaster to become multithreaded after |
| * startup. Recheck to account for the possibility of unknown causes. |
| * This message uses LOG level, because an unclean shutdown at this point |
| * would usually not look much different from a clean shutdown. |
| */ |
| if (pthread_is_threaded_np() != 0) |
| ereport(LOG, |
| (errcode(ERRCODE_INTERNAL_ERROR), |
| errmsg_internal("postmaster became multithreaded"), |
| errdetail("Please report this to <%s>.", PACKAGE_BUGREPORT))); |
| #endif |
| |
| /* should cleanup shared memory and kill all backends */ |
| |
| /* |
| * Not sure of the semantics here. When the Postmaster dies, should the |
| * backends all be killed? probably not. |
| * |
| * MUST -- vadim 05-10-1999 |
| */ |
| |
| proc_exit(status); |
| } |
| |
| /* |
| * sigusr1_handler - handle signal conditions from child processes |
| */ |
| static void |
| sigusr1_handler(SIGNAL_ARGS) |
| { |
| int save_errno = errno; |
| |
| /* |
| * RECOVERY_STARTED and BEGIN_HOT_STANDBY signals are ignored in |
| * unexpected states. If the startup process quickly starts up, completes |
| * recovery, exits, we might process the death of the startup process |
| * first. We don't want to go back to recovery in that case. |
| */ |
| if (CheckPostmasterSignal(PMSIGNAL_RECOVERY_STARTED) && |
| pmState == PM_STARTUP && Shutdown == NoShutdown) |
| { |
| /* WAL redo has started. We're out of reinitialization. */ |
| bool promotion_requested = false; |
| |
| FatalError = false; |
| AbortStartTime = 0; |
| |
| /* |
| * Crank up the background tasks. It doesn't matter if this fails, |
| * we'll just try again later. |
| */ |
| Assert(CheckpointerPID == 0); |
| CheckpointerPID = StartCheckpointer(); |
| Assert(BgWriterPID == 0); |
| BgWriterPID = StartBackgroundWriter(); |
| |
| /* |
| * Start the archiver if we're responsible for (re-)archiving received |
| * files. |
| */ |
| Assert(PgArchPID == 0); |
| if (XLogArchivingAlways()) |
| PgArchPID = StartArchiver(); |
| |
| /* |
| * GPDB: if promote trigger file exist we don't wish to convey |
| * PM_STATUS_STANDBY, instead wish pg_ctl -w to wait till |
| * connections can be actually accepted by the database. |
| */ |
| if (PromoteTriggerFile != NULL && strcmp(PromoteTriggerFile, "") != 0) |
| { |
| struct stat stat_buf; |
| |
| if (stat(PromoteTriggerFile, &stat_buf) == 0) |
| promotion_requested = true; |
| } |
| /* |
| * GPDB: Setting recovery_target_action |
| * configuration parameter to 'promote' will also result |
| * into promotion after recovery is completed. |
| */ |
| if (recoveryTargetAction == RECOVERY_TARGET_ACTION_PROMOTE) |
| promotion_requested = true; |
| |
| /* |
| * If we aren't planning to enter hot standby mode later, treat |
| * RECOVERY_STARTED as meaning we're out of startup, and report status |
| * accordingly. |
| * |
| * GPDB: Avoid PM_STATUS_STANDBY if promotion requested as wish "pg_ctl -w" |
| * to wait till connections can be actually accepted by the database via |
| * PM_STATUS_READY state instead. PM_STATUS_STANDBY will incorrectly show |
| * database is ready to accept connections during promotion. |
| */ |
| if (!EnableHotStandby && !promotion_requested) |
| { |
| AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STANDBY); |
| #ifdef USE_SYSTEMD |
| sd_notify(0, "READY=1"); |
| #endif |
| } |
| |
| pmState = PM_RECOVERY; |
| } |
| |
| if (CheckPostmasterSignal(PMSIGNAL_BEGIN_HOT_STANDBY) && |
| pmState == PM_RECOVERY && Shutdown == NoShutdown) |
| { |
| /* |
| * Likewise, start other special children as needed. |
| */ |
| Assert(PgStatPID == 0); |
| PgStatPID = pgstat_start(); |
| |
| ereport(LOG, |
| (errmsg("database system is ready to accept read-only connections"))); |
| |
| /* Report status */ |
| AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_READY); |
| #ifdef USE_SYSTEMD |
| sd_notify(0, "READY=1"); |
| #endif |
| |
| pmState = PM_HOT_STANDBY; |
| connsAllowed = ALLOW_ALL_CONNS; |
| |
| /* Some workers may be scheduled to start now */ |
| StartWorkerNeeded = true; |
| } |
| |
| /* Process background worker state changes. */ |
| if (CheckPostmasterSignal(PMSIGNAL_BACKGROUND_WORKER_CHANGE)) |
| { |
| /* Accept new worker requests only if not stopping. */ |
| BackgroundWorkerStateChange(pmState < PM_STOP_BACKENDS); |
| StartWorkerNeeded = true; |
| } |
| |
| if (StartWorkerNeeded || HaveCrashedWorker) |
| maybe_start_bgworkers(); |
| |
| /* Tell syslogger to rotate logfile if requested */ |
| if (SysLoggerPID != 0) |
| { |
| if (CheckLogrotateSignal()) |
| { |
| signal_child(SysLoggerPID, SIGUSR1); |
| RemoveLogrotateSignalFiles(); |
| } |
| else if (CheckPostmasterSignal(PMSIGNAL_ROTATE_LOGFILE)) |
| { |
| signal_child(SysLoggerPID, SIGUSR1); |
| } |
| } |
| |
| if (CheckPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER) && |
| Shutdown <= SmartShutdown && pmState < PM_STOP_BACKENDS) |
| { |
| /* |
| * Start one iteration of the autovacuum daemon, even if autovacuuming |
| * is nominally not enabled. This is so we can have an active defense |
| * against transaction ID wraparound. We set a flag for the main loop |
| * to do it rather than trying to do it here --- this is because the |
| * autovac process itself may send the signal, and we want to handle |
| * that by launching another iteration as soon as the current one |
| * completes. |
| */ |
| start_autovac_launcher = true; |
| } |
| |
| if (CheckPostmasterSignal(PMSIGNAL_START_AUTOVAC_WORKER) && |
| Shutdown <= SmartShutdown && pmState < PM_STOP_BACKENDS) |
| { |
| /* The autovacuum launcher wants us to start a worker process. */ |
| StartAutovacuumWorker(); |
| } |
| |
| if (CheckPostmasterSignal(PMSIGNAL_START_LOGIN_MONITOR_WORKER) && |
| Shutdown <= SmartShutdown && pmState < PM_STOP_BACKENDS) |
| { |
| /* The login monitor wants us to start a worker process. */ |
| StartLMWorker(); |
| } |
| |
| if (CheckPostmasterSignal(PMSIGNAL_START_WALRECEIVER)) |
| { |
| /* Startup Process wants us to start the walreceiver process. */ |
| /* Start immediately if possible, else remember request for later. */ |
| WalReceiverRequested = true; |
| MaybeStartWalReceiver(); |
| } |
| |
| #ifdef USE_INTERNAL_FTS |
| if (CheckPostmasterSignal(PMSIGNAL_WAKEN_FTS) && FtsProbePID() != 0) |
| { |
| signal_child(FtsProbePID(), SIGINT); |
| } |
| #endif |
| /* If no defined USE_INTERNAL_FTS |
| * FTS_TODO: support signal FTS to probe |
| */ |
| |
| if (CheckPostmasterSignal(PMSIGNAL_DTM_RECOVERED)) |
| { |
| /* Report status, dtx recovery completed successfully */ |
| AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_DTM_RECOVERED); |
| } |
| |
| if (CheckPostmasterSignal(PMSIGNAL_WAKEN_DTX_RECOVERY) && DtxRecoveryPID() != 0) |
| { |
| signal_child(DtxRecoveryPID(), SIGINT); |
| } |
| |
| /* |
| * Try to advance postmaster's state machine, if a child requests it. |
| * |
| * Be careful about the order of this action relative to sigusr1_handler's |
| * other actions. Generally, this should be after other actions, in case |
| * they have effects PostmasterStateMachine would need to know about. |
| * However, we should do it before the CheckPromoteSignal step, which |
| * cannot have any (immediate) effect on the state machine, but does |
| * depend on what state we're in now. |
| */ |
| if (CheckPostmasterSignal(PMSIGNAL_ADVANCE_STATE_MACHINE)) |
| { |
| PostmasterStateMachine(); |
| } |
| |
| /* |
| * Postmaster send signal to login monitor, notifying login monitor to |
| * process failed login. |
| */ |
| if (LoginMonitorPID != 0 && |
| pmState == PM_RUN && CheckPostmasterSignal(PMSIGNAL_FAILED_LOGIN)) |
| { |
| SendProcSignal(LoginMonitorPID, PROCSIG_FAILED_LOGIN, InvalidBackendId); |
| } |
| |
| if (StartupPID != 0 && |
| (pmState == PM_STARTUP || pmState == PM_RECOVERY || |
| pmState == PM_HOT_STANDBY) && |
| CheckPromoteSignal()) |
| { |
| /* |
| * Tell startup process to finish recovery. |
| * |
| * Leave the promote signal file in place and let the Startup process |
| * do the unlink. |
| */ |
| signal_child(StartupPID, SIGUSR2); |
| } |
| |
| errno = save_errno; |
| } |
| |
| /* |
| * SIGTERM while processing startup packet. |
| * |
| * Running proc_exit() from a signal handler would be quite unsafe. |
| * However, since we have not yet touched shared memory, we can just |
| * pull the plug and exit without running any atexit handlers. |
| * |
| * One might be tempted to try to send a message, or log one, indicating |
| * why we are disconnecting. However, that would be quite unsafe in itself. |
| * Also, it seems undesirable to provide clues about the database's state |
| * to a client that has not yet completed authentication, or even sent us |
| * a startup packet. |
| */ |
| static void |
| process_startup_packet_die(SIGNAL_ARGS) |
| { |
| _exit(1); |
| } |
| |
| /* |
| * Dummy signal handler |
| * |
| * We use this for signals that we don't actually use in the postmaster, |
| * but we do use in backends. If we were to SIG_IGN such signals in the |
| * postmaster, then a newly started backend might drop a signal that arrives |
| * before it's able to reconfigure its signal processing. (See notes in |
| * tcop/postgres.c.) |
| */ |
| static void |
| dummy_handler(SIGNAL_ARGS) |
| { |
| } |
| |
| /* |
| * Timeout while processing startup packet. |
| * As for process_startup_packet_die(), we exit via _exit(1). |
| */ |
| static void |
| StartupPacketTimeoutHandler(void) |
| { |
| _exit(1); |
| } |
| |
| |
| /* |
| * Generate a random cancel key. |
| */ |
| static bool |
| RandomCancelKey(int32 *cancel_key) |
| { |
| return pg_strong_random(cancel_key, sizeof(int32)); |
| } |
| |
| /* |
| * Count up number of child processes of specified types (dead_end children |
| * are always excluded). |
| */ |
| static int |
| CountChildren(int target) |
| { |
| dlist_iter iter; |
| int cnt = 0; |
| |
| dlist_foreach(iter, &BackendList) |
| { |
| Backend *bp = dlist_container(Backend, elem, iter.cur); |
| |
| if (bp->dead_end) |
| continue; |
| |
| /* |
| * Since target == BACKEND_TYPE_ALL is the most common case, we test |
| * it first and avoid touching shared memory for every child. |
| */ |
| if (target != BACKEND_TYPE_ALL) |
| { |
| /* |
| * Assign bkend_type for any recently announced WAL Sender |
| * processes. |
| */ |
| if (bp->bkend_type == BACKEND_TYPE_NORMAL && |
| IsPostmasterChildWalSender(bp->child_slot)) |
| bp->bkend_type = BACKEND_TYPE_WALSND; |
| |
| if (!(target & bp->bkend_type)) |
| continue; |
| } |
| |
| cnt++; |
| } |
| return cnt; |
| } |
| |
| |
| /* |
| * StartChildProcess -- start an auxiliary process for the postmaster |
| * |
| * "type" determines what kind of child will be started. All child types |
| * initially go to AuxiliaryProcessMain, which will handle common setup. |
| * |
| * Return value of StartChildProcess is subprocess' PID, or 0 if failed |
| * to start subprocess. |
| */ |
| static pid_t |
| StartChildProcess(AuxProcType type) |
| { |
| pid_t pid; |
| char *av[10]; |
| int ac = 0; |
| char typebuf[32]; |
| |
| /* |
| * Set up command-line arguments for subprocess |
| */ |
| av[ac++] = "postgres"; |
| |
| #ifdef EXEC_BACKEND |
| av[ac++] = "--forkboot"; |
| av[ac++] = NULL; /* filled in by postmaster_forkexec */ |
| #endif |
| |
| snprintf(typebuf, sizeof(typebuf), "-x%d", type); |
| av[ac++] = typebuf; |
| |
| av[ac] = NULL; |
| Assert(ac < lengthof(av)); |
| |
| #ifdef EXEC_BACKEND |
| pid = postmaster_forkexec(ac, av); |
| #else /* !EXEC_BACKEND */ |
| pid = fork_process(); |
| |
| if (pid == 0) /* child */ |
| { |
| InitPostmasterChild(); |
| |
| /* Close the postmaster's sockets */ |
| ClosePostmasterPorts(false); |
| |
| /* Release postmaster's working memory context */ |
| MemoryContextSwitchTo(TopMemoryContext); |
| MemoryContextDelete(PostmasterContext); |
| PostmasterContext = NULL; |
| |
| AuxiliaryProcessMain(ac, av); /* does not return */ |
| } |
| #endif /* EXEC_BACKEND */ |
| |
| if (pid < 0) |
| { |
| /* in parent, fork failed */ |
| int save_errno = errno; |
| |
| errno = save_errno; |
| switch (type) |
| { |
| case StartupProcess: |
| ereport(LOG, |
| (errmsg("could not fork startup process: %m"))); |
| break; |
| case ArchiverProcess: |
| ereport(LOG, |
| (errmsg("could not fork archiver process: %m"))); |
| break; |
| case BgWriterProcess: |
| ereport(LOG, |
| (errmsg("could not fork background writer process: %m"))); |
| break; |
| case CheckpointerProcess: |
| ereport(LOG, |
| (errmsg("could not fork checkpointer process: %m"))); |
| break; |
| case WalWriterProcess: |
| ereport(LOG, |
| (errmsg("could not fork WAL writer process: %m"))); |
| break; |
| case WalReceiverProcess: |
| ereport(LOG, |
| (errmsg("could not fork WAL receiver process: %m"))); |
| break; |
| default: |
| ereport(LOG, |
| (errmsg("could not fork process: %m"))); |
| break; |
| } |
| |
| /* |
| * fork failure is fatal during startup, but there's no need to choke |
| * immediately if starting other child types fails. |
| */ |
| if (type == StartupProcess) |
| ExitPostmaster(1); |
| return 0; |
| } |
| |
| /* |
| * in parent, successful fork |
| */ |
| return pid; |
| } |
| |
| /* |
| * StartAutovacuumWorker |
| * Start an autovac worker process. |
| * |
| * This function is here because it enters the resulting PID into the |
| * postmaster's private backends list. |
| * |
| * NB -- this code very roughly matches BackendStartup. |
| */ |
| static void |
| StartAutovacuumWorker(void) |
| { |
| Backend *bn; |
| |
| /* |
| * If not in condition to run a process, don't try, but handle it like a |
| * fork failure. This does not normally happen, since the signal is only |
| * supposed to be sent by autovacuum launcher when it's OK to do it, but |
| * we have to check to avoid race-condition problems during DB state |
| * changes. |
| */ |
| if (canAcceptConnections(BACKEND_TYPE_AUTOVAC) == CAC_OK) |
| { |
| /* |
| * Compute the cancel key that will be assigned to this session. We |
| * probably don't need cancel keys for autovac workers, but we'd |
| * better have something random in the field to prevent unfriendly |
| * people from sending cancels to them. |
| */ |
| if (!RandomCancelKey(&MyCancelKey)) |
| { |
| ereport(LOG, |
| (errcode(ERRCODE_INTERNAL_ERROR), |
| errmsg("could not generate random cancel key"))); |
| return; |
| } |
| |
| bn = (Backend *) malloc(sizeof(Backend)); |
| if (bn) |
| { |
| bn->cancel_key = MyCancelKey; |
| |
| /* Autovac workers are not dead_end and need a child slot */ |
| bn->dead_end = false; |
| bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot(); |
| bn->bgworker_notify = false; |
| |
| bn->pid = StartAutoVacWorker(); |
| if (bn->pid > 0) |
| { |
| bn->bkend_type = BACKEND_TYPE_AUTOVAC; |
| dlist_push_head(&BackendList, &bn->elem); |
| #ifdef EXEC_BACKEND |
| ShmemBackendArrayAdd(bn); |
| #endif |
| /* all OK */ |
| return; |
| } |
| |
| /* |
| * fork failed, fall through to report -- actual error message was |
| * logged by StartAutoVacWorker |
| */ |
| (void) ReleasePostmasterChildSlot(bn->child_slot); |
| free(bn); |
| } |
| else |
| ereport(LOG, |
| (errcode(ERRCODE_OUT_OF_MEMORY), |
| errmsg("out of memory"))); |
| } |
| |
| /* |
| * Report the failure to the launcher, if it's running. (If it's not, we |
| * might not even be connected to shared memory, so don't try to call |
| * AutoVacWorkerFailed.) Note that we also need to signal it so that it |
| * responds to the condition, but we don't do that here, instead waiting |
| * for ServerLoop to do it. This way we avoid a ping-pong signaling in |
| * quick succession between the autovac launcher and postmaster in case |
| * things get ugly. |
| */ |
| if (AutoVacPID != 0) |
| { |
| AutoVacWorkerFailed(); |
| avlauncher_needs_signal = true; |
| } |
| } |
| |
| /* |
| * StartLMWorker |
| * Start a login monitor worker process. |
| * |
| * Like StartAutovacuumWorker, this function is here. As login monitor |
| * worker process is used like normal process, we set backend_type to |
| * NORMAL. |
| */ |
| static void |
| StartLMWorker(void) |
| { |
| Backend *bn; |
| |
| /* |
| * If not in condition to run a process, don't try, but handle it like a |
| * fork failure. This does not normally happen, since the signal is only |
| * supposed to be sent by login monitor launcher when it's OK to do it, |
| * but we have to check to avoid race-condition problems during DB state |
| * changes. |
| */ |
| if (canAcceptConnections(BACKEND_TYPE_NORMAL) == CAC_OK) |
| { |
| /* |
| * Compute the cancel key that will be assigned to this session. We |
| * probably don't need cancel keys for autovac workers, but we'd |
| * better have something random in the field to prevent unfriendly |
| * people from sending cancels to them。 |
| */ |
| if (!RandomCancelKey(&MyCancelKey)) |
| { |
| ereport(LOG, |
| (errcode(ERRCODE_INTERNAL_ERROR), |
| errmsg("could not generate random cancel key"))); |
| return; |
| } |
| |
| bn = (Backend *) malloc(sizeof(Backend)); |
| if (bn) |
| { |
| bn->cancel_key = MyCancelKey; |
| |
| /* Login monitor workers are not dead_end and need a child slot */ |
| bn->dead_end = false; |
| bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot(); |
| bn->bgworker_notify = false; |
| |
| bn->pid = StartLoginMonitorWorker(); |
| if (bn->pid > 0) |
| { |
| bn->bkend_type = BACKEND_TYPE_NORMAL; |
| dlist_push_head(&BackendList, &bn->elem); |
| #ifdef EXEC_BACKEND |
| ShmemBackendArrayAdd(bn); |
| #endif |
| |
| /* all OK */ |
| return; |
| } |
| |
| /* |
| * fork failed, fall through to report -- actual error message was |
| * logged by StartLoginMonitorWorker |
| */ |
| (void) ReleasePostmasterChildSlot(bn->child_slot); |
| free(bn); |
| } |
| else |
| ereport(LOG, |
| (errcode(ERRCODE_OUT_OF_MEMORY), |
| errmsg("out of memory"))); |
| } |
| |
| /* |
| * Notify failure to the launcher by latch, if it's running. (If it's not, |
| * we might not even be connected to shared memory, so don't try to call |
| * LoginMonitorWorkerFailed.) Note that we also need to notify it so that |
| * it responds to the condition, but we don't do that here, instead waiting |
| * for ServerLoop to do it. |
| */ |
| if (LoginMonitorPID != 0) |
| { |
| LoginMonitorWorkerFailed(); |
| } |
| } |
| |
| /* |
| * MaybeStartWalReceiver |
| * Start the WAL receiver process, if not running and our state allows. |
| * |
| * Note: if WalReceiverPID is already nonzero, it might seem that we should |
| * clear WalReceiverRequested. However, there's a race condition if the |
| * walreceiver terminates and the startup process immediately requests a new |
| * one: it's quite possible to get the signal for the request before reaping |
| * the dead walreceiver process. Better to risk launching an extra |
| * walreceiver than to miss launching one we need. (The walreceiver code |
| * has logic to recognize that it should go away if not needed.) |
| */ |
| static void |
| MaybeStartWalReceiver(void) |
| { |
| if (WalReceiverPID == 0 && |
| (pmState == PM_STARTUP || pmState == PM_RECOVERY || |
| pmState == PM_HOT_STANDBY) && |
| Shutdown <= SmartShutdown) |
| { |
| WalReceiverPID = StartWalReceiver(); |
| if (WalReceiverPID != 0) |
| { |
| WalReceiverRequested = false; |
| |
| /* wal receiver has been launched */ |
| SetMirrorReadyFlag(); |
| } |
| /* else leave the flag set, so we'll try again later */ |
| } |
| } |
| |
| |
| /* |
| * Create the opts file |
| */ |
| static bool |
| CreateOptsFile(int argc, char *argv[], char *fullprogname) |
| { |
| FILE *fp; |
| int i; |
| |
| #define OPTS_FILE "postmaster.opts" |
| |
| if ((fp = fopen(OPTS_FILE, "w")) == NULL) |
| { |
| ereport(LOG, |
| (errcode_for_file_access(), |
| errmsg("could not create file \"%s\": %m", OPTS_FILE))); |
| return false; |
| } |
| |
| fprintf(fp, "%s", fullprogname); |
| for (i = 1; i < argc; i++) |
| fprintf(fp, " \"%s\"", argv[i]); |
| fputs("\n", fp); |
| |
| if (fclose(fp)) |
| { |
| ereport(LOG, |
| (errcode_for_file_access(), |
| errmsg("could not write file \"%s\": %m", OPTS_FILE))); |
| return false; |
| } |
| |
| return true; |
| } |
| |
| |
| /* |
| * MaxLivePostmasterChildren |
| * |
| * This reports the number of entries needed in per-child-process arrays |
| * (the PMChildFlags array, and if EXEC_BACKEND the ShmemBackendArray). |
| * These arrays include regular backends, autovac workers, walsenders |
| * and background workers, but not special children nor dead_end children. |
| * This allows the arrays to have a fixed maximum size, to wit the same |
| * too-many-children limit enforced by canAcceptConnections(). The exact value |
| * isn't too critical as long as it's more than MaxBackends. |
| */ |
| int |
| MaxLivePostmasterChildren(void) |
| { |
| return 2 * (MaxConnections + autovacuum_max_workers + 1 + |
| login_monitor_max_processes /* Login Monitor */ + max_wal_senders + max_worker_processes); |
| } |
| |
| /* |
| * Connect background worker to a database. |
| */ |
| void |
| BackgroundWorkerInitializeConnection(const char *dbname, const char *username, uint32 flags) |
| { |
| BackgroundWorker *worker = MyBgworkerEntry; |
| |
| /* XXX is this the right errcode? */ |
| if (!(worker->bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION)) |
| ereport(FATAL, |
| (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), |
| errmsg("database connection requirement not indicated during registration"))); |
| |
| InitPostgres(dbname, InvalidOid, username, InvalidOid, NULL, (flags & BGWORKER_BYPASS_ALLOWCONN) != 0); |
| |
| /* it had better not gotten out of "init" mode yet */ |
| if (!IsInitProcessingMode()) |
| ereport(ERROR, |
| (errmsg("invalid processing mode in background worker"))); |
| SetProcessingMode(NormalProcessing); |
| } |
| |
| /* |
| * Connect background worker to a database using OIDs. |
| */ |
| void |
| BackgroundWorkerInitializeConnectionByOid(Oid dboid, Oid useroid, uint32 flags) |
| { |
| BackgroundWorker *worker = MyBgworkerEntry; |
| |
| /* XXX is this the right errcode? */ |
| if (!(worker->bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION)) |
| ereport(FATAL, |
| (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), |
| errmsg("database connection requirement not indicated during registration"))); |
| |
| InitPostgres(NULL, dboid, NULL, useroid, NULL, (flags & BGWORKER_BYPASS_ALLOWCONN) != 0); |
| |
| /* it had better not gotten out of "init" mode yet */ |
| if (!IsInitProcessingMode()) |
| ereport(ERROR, |
| (errmsg("invalid processing mode in background worker"))); |
| SetProcessingMode(NormalProcessing); |
| } |
| |
| /* |
| * Block/unblock signals in a background worker |
| */ |
| void |
| BackgroundWorkerBlockSignals(void) |
| { |
| PG_SETMASK(&BlockSig); |
| } |
| |
| void |
| BackgroundWorkerUnblockSignals(void) |
| { |
| PG_SETMASK(&UnBlockSig); |
| } |
| |
| #ifdef EXEC_BACKEND |
| static pid_t |
| bgworker_forkexec(int shmem_slot) |
| { |
| char *av[10]; |
| int ac = 0; |
| char forkav[MAXPGPATH]; |
| |
| snprintf(forkav, MAXPGPATH, "--forkbgworker=%d", shmem_slot); |
| |
| av[ac++] = "postgres"; |
| av[ac++] = forkav; |
| av[ac++] = NULL; /* filled in by postmaster_forkexec */ |
| av[ac] = NULL; |
| |
| Assert(ac < lengthof(av)); |
| |
| return postmaster_forkexec(ac, av); |
| } |
| #endif |
| |
| /* |
| * Start a new bgworker. |
| * Starting time conditions must have been checked already. |
| * |
| * Returns true on success, false on failure. |
| * In either case, update the RegisteredBgWorker's state appropriately. |
| * |
| * This code is heavily based on autovacuum.c, q.v. |
| */ |
| static bool |
| do_start_bgworker(RegisteredBgWorker *rw) |
| { |
| pid_t worker_pid; |
| |
| Assert(rw->rw_pid == 0); |
| |
| /* |
| * Allocate and assign the Backend element. Note we must do this before |
| * forking, so that we can handle failures (out of memory or child-process |
| * slots) cleanly. |
| * |
| * Treat failure as though the worker had crashed. That way, the |
| * postmaster will wait a bit before attempting to start it again; if we |
| * tried again right away, most likely we'd find ourselves hitting the |
| * same resource-exhaustion condition. |
| */ |
| if (!assign_backendlist_entry(rw)) |
| { |
| rw->rw_crashed_at = GetCurrentTimestamp(); |
| return false; |
| } |
| |
| ereport(DEBUG1, |
| (errmsg_internal("starting background worker process \"%s\"", |
| rw->rw_worker.bgw_name))); |
| |
| #ifdef EXEC_BACKEND |
| switch ((worker_pid = bgworker_forkexec(rw->rw_shmem_slot))) |
| #else |
| switch ((worker_pid = fork_process())) |
| #endif |
| { |
| case -1: |
| /* in postmaster, fork failed ... */ |
| ereport(LOG, |
| (errmsg("could not fork worker process: %m"))); |
| /* undo what assign_backendlist_entry did */ |
| ReleasePostmasterChildSlot(rw->rw_child_slot); |
| rw->rw_child_slot = 0; |
| free(rw->rw_backend); |
| rw->rw_backend = NULL; |
| /* mark entry as crashed, so we'll try again later */ |
| rw->rw_crashed_at = GetCurrentTimestamp(); |
| break; |
| |
| #ifndef EXEC_BACKEND |
| case 0: |
| /* in postmaster child ... */ |
| InitPostmasterChild(); |
| |
| /* Close the postmaster's sockets */ |
| ClosePostmasterPorts(false); |
| |
| /* |
| * Before blowing away PostmasterContext, save this bgworker's |
| * data where it can find it. |
| */ |
| MyBgworkerEntry = (BackgroundWorker *) |
| MemoryContextAlloc(TopMemoryContext, sizeof(BackgroundWorker)); |
| memcpy(MyBgworkerEntry, &rw->rw_worker, sizeof(BackgroundWorker)); |
| |
| /* Release postmaster's working memory context */ |
| MemoryContextSwitchTo(TopMemoryContext); |
| MemoryContextDelete(PostmasterContext); |
| PostmasterContext = NULL; |
| |
| StartBackgroundWorker(); |
| |
| exit(1); /* should not get here */ |
| break; |
| #endif |
| default: |
| /* in postmaster, fork successful ... */ |
| rw->rw_pid = worker_pid; |
| rw->rw_backend->pid = rw->rw_pid; |
| ReportBackgroundWorkerPID(rw); |
| /* add new worker to lists of backends */ |
| dlist_push_head(&BackendList, &rw->rw_backend->elem); |
| #ifdef EXEC_BACKEND |
| ShmemBackendArrayAdd(rw->rw_backend); |
| #endif |
| return true; |
| } |
| |
| return false; |
| } |
| |
| /* |
| * Does the current postmaster state require starting a worker with the |
| * specified start_time? |
| */ |
| static bool |
| bgworker_should_start_now(BgWorkerStartTime start_time) |
| { |
| switch (pmState) |
| { |
| case PM_NO_CHILDREN: |
| case PM_WAIT_DEAD_END: |
| case PM_SHUTDOWN_2: |
| case PM_SHUTDOWN: |
| case PM_WAIT_BACKENDS: |
| case PM_STOP_BACKENDS: |
| break; |
| |
| case PM_RUN: |
| if (start_time == BgWorkerStart_DtxRecovering) |
| return true; |
| if (start_time == BgWorkerStart_RecoveryFinished) |
| return true; |
| /* fall through */ |
| |
| case PM_HOT_STANDBY: |
| if (start_time == BgWorkerStart_ConsistentState) |
| return true; |
| /* fall through */ |
| |
| case PM_RECOVERY: |
| case PM_STARTUP: |
| case PM_INIT: |
| if (start_time == BgWorkerStart_PostmasterStart) |
| return true; |
| /* fall through */ |
| |
| } |
| |
| return false; |
| } |
| |
| /* |
| * Does specified start_time need distributed transactions been recovered? |
| */ |
| static bool |
| bgworker_should_start_mpp(BackgroundWorker *worker) |
| { |
| BgWorkerStartTime start_time = worker->bgw_start_time; |
| |
| /* |
| * background worker is not scheduled until distributed transactions |
| * are recovered if it needs to start at BgWorkerStart_RecoveryFinished |
| * or BgWorkerStart_ConsistentState because it's not safe to do a read |
| * or write if DTX is not recovered. |
| */ |
| if (Gp_role == GP_ROLE_DISPATCH) |
| { |
| if (!*shmDtmStarted && |
| (start_time == BgWorkerStart_ConsistentState || |
| start_time == BgWorkerStart_RecoveryFinished)) |
| return false; |
| } |
| |
| return true; |
| } |
| |
| /* |
| * Allocate the Backend struct for a connected background worker, but don't |
| * add it to the list of backends just yet. |
| * |
| * On failure, return false without changing any worker state. |
| * |
| * Some info from the Backend is copied into the passed rw. |
| */ |
| static bool |
| assign_backendlist_entry(RegisteredBgWorker *rw) |
| { |
| Backend *bn; |
| |
| /* |
| * Check that database state allows another connection. Currently the |
| * only possible failure is CAC_TOOMANY, so we just log an error message |
| * based on that rather than checking the error code precisely. |
| */ |
| if (canAcceptConnections(BACKEND_TYPE_BGWORKER) != CAC_OK) |
| { |
| ereport(LOG, |
| (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED), |
| errmsg("no slot available for new worker process"))); |
| return false; |
| } |
| |
| /* |
| * Compute the cancel key that will be assigned to this session. We |
| * probably don't need cancel keys for background workers, but we'd better |
| * have something random in the field to prevent unfriendly people from |
| * sending cancels to them. |
| */ |
| if (!RandomCancelKey(&MyCancelKey)) |
| { |
| ereport(LOG, |
| (errcode(ERRCODE_INTERNAL_ERROR), |
| errmsg("could not generate random cancel key"))); |
| return false; |
| } |
| |
| bn = malloc(sizeof(Backend)); |
| if (bn == NULL) |
| { |
| ereport(LOG, |
| (errcode(ERRCODE_OUT_OF_MEMORY), |
| errmsg("out of memory"))); |
| return false; |
| } |
| |
| bn->cancel_key = MyCancelKey; |
| bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot(); |
| bn->bkend_type = BACKEND_TYPE_BGWORKER; |
| bn->dead_end = false; |
| bn->bgworker_notify = false; |
| |
| rw->rw_backend = bn; |
| rw->rw_child_slot = bn->child_slot; |
| |
| return true; |
| } |
| |
| /* |
| * If the time is right, start background worker(s). |
| * |
| * As a side effect, the bgworker control variables are set or reset |
| * depending on whether more workers may need to be started. |
| * |
| * We limit the number of workers started per call, to avoid consuming the |
| * postmaster's attention for too long when many such requests are pending. |
| * As long as StartWorkerNeeded is true, ServerLoop will not block and will |
| * call this function again after dealing with any other issues. |
| */ |
| static void |
| maybe_start_bgworkers(void) |
| { |
| #define MAX_BGWORKERS_TO_LAUNCH 100 |
| int num_launched = 0; |
| TimestampTz now = 0; |
| slist_mutable_iter iter; |
| |
| if (start_bgworkers_hook) |
| { |
| (*start_bgworkers_hook) (FatalError, pmState, do_start_bgworker); |
| } |
| |
| /* |
| * During crash recovery, we have no need to be called until the state |
| * transition out of recovery. |
| */ |
| if (FatalError) |
| { |
| StartWorkerNeeded = false; |
| HaveCrashedWorker = false; |
| return; |
| } |
| |
| /* Don't need to be called again unless we find a reason for it below */ |
| StartWorkerNeeded = false; |
| HaveCrashedWorker = false; |
| |
| slist_foreach_modify(iter, &BackgroundWorkerList) |
| { |
| RegisteredBgWorker *rw; |
| |
| rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur); |
| |
| /* ignore if already running */ |
| if (rw->rw_pid != 0) |
| continue; |
| |
| /* if marked for death, clean up and remove from list */ |
| if (rw->rw_terminate) |
| { |
| ForgetBackgroundWorker(&iter); |
| |
| continue; |
| } |
| |
| /* |
| * If this worker has crashed previously, maybe it needs to be |
| * restarted (unless on registration it specified it doesn't want to |
| * be restarted at all). Check how long ago did a crash last happen. |
| * If the last crash is too recent, don't start it right away; let it |
| * be restarted once enough time has passed. |
| */ |
| if (rw->rw_crashed_at != 0) |
| { |
| if (rw->rw_worker.bgw_restart_time == BGW_NEVER_RESTART) |
| { |
| int notify_pid; |
| |
| notify_pid = rw->rw_worker.bgw_notify_pid; |
| |
| ForgetBackgroundWorker(&iter); |
| |
| /* Report worker is gone now. */ |
| if (notify_pid != 0) |
| kill(notify_pid, SIGUSR1); |
| |
| continue; |
| } |
| |
| /* read system time only when needed */ |
| if (now == 0) |
| now = GetCurrentTimestamp(); |
| |
| if (!TimestampDifferenceExceeds(rw->rw_crashed_at, now, |
| rw->rw_worker.bgw_restart_time * 1000)) |
| { |
| /* Set flag to remember that we have workers to start later */ |
| HaveCrashedWorker = true; |
| continue; |
| } |
| } |
| |
| if (bgworker_should_start_now(rw->rw_worker.bgw_start_time)) |
| { |
| if (!bgworker_should_start_mpp(&rw->rw_worker)) |
| continue; |
| |
| /* reset crash time before trying to start worker */ |
| rw->rw_crashed_at = 0; |
| |
| /* |
| * Try to start the worker. |
| * |
| * On failure, give up processing workers for now, but set |
| * StartWorkerNeeded so we'll come back here on the next iteration |
| * of ServerLoop to try again. (We don't want to wait, because |
| * there might be additional ready-to-run workers.) We could set |
| * HaveCrashedWorker as well, since this worker is now marked |
| * crashed, but there's no need because the next run of this |
| * function will do that. |
| */ |
| if (!do_start_bgworker(rw)) |
| { |
| StartWorkerNeeded = true; |
| return; |
| } |
| |
| /* |
| * If we've launched as many workers as allowed, quit, but have |
| * ServerLoop call us again to look for additional ready-to-run |
| * workers. There might not be any, but we'll find out the next |
| * time we run. |
| */ |
| if (++num_launched >= MAX_BGWORKERS_TO_LAUNCH) |
| { |
| StartWorkerNeeded = true; |
| return; |
| } |
| } |
| } |
| } |
| |
| /* |
| * When a backend asks to be notified about worker state changes, we |
| * set a flag in its backend entry. The background worker machinery needs |
| * to know when such backends exit. |
| */ |
| bool |
| PostmasterMarkPIDForWorkerNotify(int pid) |
| { |
| dlist_iter iter; |
| Backend *bp; |
| |
| dlist_foreach(iter, &BackendList) |
| { |
| bp = dlist_container(Backend, elem, iter.cur); |
| if (bp->pid == pid) |
| { |
| bp->bgworker_notify = true; |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| #ifdef EXEC_BACKEND |
| |
| /* |
| * The following need to be available to the save/restore_backend_variables |
| * functions. They are marked NON_EXEC_STATIC in their home modules. |
| */ |
| extern slock_t *ShmemLock; |
| extern slock_t *ProcStructLock; |
| extern PGPROC *AuxiliaryProcs; |
| extern PMSignalData *PMSignalState; |
| extern pgsocket pgStatSock; |
| extern pg_time_t first_syslogger_file_time; |
| |
| #ifndef WIN32 |
| #define write_inheritable_socket(dest, src, childpid) ((*(dest) = (src)), true) |
| #define read_inheritable_socket(dest, src) (*(dest) = *(src)) |
| #else |
| static bool write_duplicated_handle(HANDLE *dest, HANDLE src, HANDLE child); |
| static bool write_inheritable_socket(InheritableSocket *dest, SOCKET src, |
| pid_t childPid); |
| static void read_inheritable_socket(SOCKET *dest, InheritableSocket *src); |
| #endif |
| |
| |
| /* Save critical backend variables into the BackendParameters struct */ |
| #ifndef WIN32 |
| static bool |
| save_backend_variables(BackendParameters *param, Port *port) |
| #else |
| static bool |
| save_backend_variables(BackendParameters *param, Port *port, |
| HANDLE childProcess, pid_t childPid) |
| #endif |
| { |
| memcpy(¶m->port, port, sizeof(Port)); |
| if (!write_inheritable_socket(¶m->portsocket, port->sock, childPid)) |
| return false; |
| |
| strlcpy(param->DataDir, DataDir, MAXPGPATH); |
| |
| memcpy(¶m->ListenSocket, &ListenSocket, sizeof(ListenSocket)); |
| |
| param->MyCancelKey = MyCancelKey; |
| param->MyPMChildSlot = MyPMChildSlot; |
| |
| #ifdef WIN32 |
| param->ShmemProtectiveRegion = ShmemProtectiveRegion; |
| #endif |
| param->UsedShmemSegID = UsedShmemSegID; |
| param->UsedShmemSegAddr = UsedShmemSegAddr; |
| |
| param->ShmemLock = ShmemLock; |
| param->ShmemVariableCache = ShmemVariableCache; |
| param->ShmemBackendArray = ShmemBackendArray; |
| |
| #ifndef HAVE_SPINLOCKS |
| param->SpinlockSemaArray = SpinlockSemaArray; |
| #endif |
| param->NamedLWLockTrancheRequests = NamedLWLockTrancheRequests; |
| param->NamedLWLockTrancheArray = NamedLWLockTrancheArray; |
| param->MainLWLockArray = MainLWLockArray; |
| param->ProcStructLock = ProcStructLock; |
| param->ProcGlobal = ProcGlobal; |
| param->AuxiliaryProcs = AuxiliaryProcs; |
| param->PreparedXactProcs = PreparedXactProcs; |
| param->PMSignalState = PMSignalState; |
| if (!write_inheritable_socket(¶m->pgStatSock, pgStatSock, childPid)) |
| return false; |
| |
| param->PostmasterPid = PostmasterPid; |
| param->PgStartTime = PgStartTime; |
| param->PgReloadTime = PgReloadTime; |
| param->first_syslogger_file_time = first_syslogger_file_time; |
| |
| param->redirection_done = redirection_done; |
| param->IsBinaryUpgrade = IsBinaryUpgrade; |
| param->ConvertMasterDataDirToSegment = ConvertMasterDataDirToSegment; |
| param->query_id_enabled = query_id_enabled; |
| param->max_safe_fds = max_safe_fds; |
| |
| param->MaxBackends = MaxBackends; |
| |
| #ifdef WIN32 |
| param->PostmasterHandle = PostmasterHandle; |
| if (!write_duplicated_handle(¶m->initial_signal_pipe, |
| pgwin32_create_signal_listener(childPid), |
| childProcess)) |
| return false; |
| #else |
| memcpy(¶m->postmaster_alive_fds, &postmaster_alive_fds, |
| sizeof(postmaster_alive_fds)); |
| #endif |
| |
| memcpy(¶m->syslogPipe, &syslogPipe, sizeof(syslogPipe)); |
| |
| strlcpy(param->my_exec_path, my_exec_path, MAXPGPATH); |
| |
| strlcpy(param->pkglib_path, pkglib_path, MAXPGPATH); |
| |
| return true; |
| } |
| |
| |
| #ifdef WIN32 |
| /* |
| * Duplicate a handle for usage in a child process, and write the child |
| * process instance of the handle to the parameter file. |
| */ |
| static bool |
| write_duplicated_handle(HANDLE *dest, HANDLE src, HANDLE childProcess) |
| { |
| HANDLE hChild = INVALID_HANDLE_VALUE; |
| |
| if (!DuplicateHandle(GetCurrentProcess(), |
| src, |
| childProcess, |
| &hChild, |
| 0, |
| TRUE, |
| DUPLICATE_CLOSE_SOURCE | DUPLICATE_SAME_ACCESS)) |
| { |
| ereport(LOG, |
| (errmsg_internal("could not duplicate handle to be written to backend parameter file: error code %lu", |
| GetLastError()))); |
| return false; |
| } |
| |
| *dest = hChild; |
| return true; |
| } |
| |
| /* |
| * Duplicate a socket for usage in a child process, and write the resulting |
| * structure to the parameter file. |
| * This is required because a number of LSPs (Layered Service Providers) very |
| * common on Windows (antivirus, firewalls, download managers etc) break |
| * straight socket inheritance. |
| */ |
| static bool |
| write_inheritable_socket(InheritableSocket *dest, SOCKET src, pid_t childpid) |
| { |
| dest->origsocket = src; |
| if (src != 0 && src != PGINVALID_SOCKET) |
| { |
| /* Actual socket */ |
| if (WSADuplicateSocket(src, childpid, &dest->wsainfo) != 0) |
| { |
| ereport(LOG, |
| (errmsg("could not duplicate socket %d for use in backend: error code %d", |
| (int) src, WSAGetLastError()))); |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| /* |
| * Read a duplicate socket structure back, and get the socket descriptor. |
| */ |
| static void |
| read_inheritable_socket(SOCKET *dest, InheritableSocket *src) |
| { |
| SOCKET s; |
| |
| if (src->origsocket == PGINVALID_SOCKET || src->origsocket == 0) |
| { |
| /* Not a real socket! */ |
| *dest = src->origsocket; |
| } |
| else |
| { |
| /* Actual socket, so create from structure */ |
| s = WSASocket(FROM_PROTOCOL_INFO, |
| FROM_PROTOCOL_INFO, |
| FROM_PROTOCOL_INFO, |
| &src->wsainfo, |
| 0, |
| 0); |
| if (s == INVALID_SOCKET) |
| { |
| write_stderr("could not create inherited socket: error code %d\n", |
| WSAGetLastError()); |
| exit(1); |
| } |
| *dest = s; |
| |
| /* |
| * To make sure we don't get two references to the same socket, close |
| * the original one. (This would happen when inheritance actually |
| * works.. |
| */ |
| closesocket(src->origsocket); |
| } |
| } |
| #endif |
| |
| static void |
| read_backend_variables(char *id, Port *port) |
| { |
| BackendParameters param; |
| |
| #ifndef WIN32 |
| /* Non-win32 implementation reads from file */ |
| FILE *fp; |
| |
| /* Open file */ |
| fp = AllocateFile(id, PG_BINARY_R); |
| if (!fp) |
| { |
| write_stderr("could not open backend variables file \"%s\": %s\n", |
| id, strerror(errno)); |
| exit(1); |
| } |
| |
| if (fread(¶m, sizeof(param), 1, fp) != 1) |
| { |
| write_stderr("could not read from backend variables file \"%s\": %s\n", |
| id, strerror(errno)); |
| exit(1); |
| } |
| |
| /* Release file */ |
| FreeFile(fp); |
| if (unlink(id) != 0) |
| { |
| write_stderr("could not remove file \"%s\": %s\n", |
| id, strerror(errno)); |
| exit(1); |
| } |
| #else |
| /* Win32 version uses mapped file */ |
| HANDLE paramHandle; |
| BackendParameters *paramp; |
| |
| #ifdef _WIN64 |
| paramHandle = (HANDLE) _atoi64(id); |
| #else |
| paramHandle = (HANDLE) atol(id); |
| #endif |
| paramp = MapViewOfFile(paramHandle, FILE_MAP_READ, 0, 0, 0); |
| if (!paramp) |
| { |
| write_stderr("could not map view of backend variables: error code %lu\n", |
| GetLastError()); |
| exit(1); |
| } |
| |
| memcpy(¶m, paramp, sizeof(BackendParameters)); |
| |
| if (!UnmapViewOfFile(paramp)) |
| { |
| write_stderr("could not unmap view of backend variables: error code %lu\n", |
| GetLastError()); |
| exit(1); |
| } |
| |
| if (!CloseHandle(paramHandle)) |
| { |
| write_stderr("could not close handle to backend parameter variables: error code %lu\n", |
| GetLastError()); |
| exit(1); |
| } |
| #endif |
| |
| restore_backend_variables(¶m, port); |
| } |
| |
| /* Restore critical backend variables from the BackendParameters struct */ |
| static void |
| restore_backend_variables(BackendParameters *param, Port *port) |
| { |
| memcpy(port, ¶m->port, sizeof(Port)); |
| read_inheritable_socket(&port->sock, ¶m->portsocket); |
| |
| SetDataDir(param->DataDir); |
| |
| memcpy(&ListenSocket, ¶m->ListenSocket, sizeof(ListenSocket)); |
| |
| MyCancelKey = param->MyCancelKey; |
| MyPMChildSlot = param->MyPMChildSlot; |
| |
| #ifdef WIN32 |
| ShmemProtectiveRegion = param->ShmemProtectiveRegion; |
| #endif |
| UsedShmemSegID = param->UsedShmemSegID; |
| UsedShmemSegAddr = param->UsedShmemSegAddr; |
| |
| ShmemLock = param->ShmemLock; |
| ShmemVariableCache = param->ShmemVariableCache; |
| ShmemBackendArray = param->ShmemBackendArray; |
| |
| #ifndef HAVE_SPINLOCKS |
| SpinlockSemaArray = param->SpinlockSemaArray; |
| #endif |
| NamedLWLockTrancheRequests = param->NamedLWLockTrancheRequests; |
| NamedLWLockTrancheArray = param->NamedLWLockTrancheArray; |
| MainLWLockArray = param->MainLWLockArray; |
| ProcStructLock = param->ProcStructLock; |
| ProcGlobal = param->ProcGlobal; |
| AuxiliaryProcs = param->AuxiliaryProcs; |
| PreparedXactProcs = param->PreparedXactProcs; |
| PMSignalState = param->PMSignalState; |
| read_inheritable_socket(&pgStatSock, ¶m->pgStatSock); |
| |
| PostmasterPid = param->PostmasterPid; |
| PgStartTime = param->PgStartTime; |
| PgReloadTime = param->PgReloadTime; |
| first_syslogger_file_time = param->first_syslogger_file_time; |
| |
| redirection_done = param->redirection_done; |
| IsBinaryUpgrade = param->IsBinaryUpgrade; |
| ConvertMasterDataDirToSegment = param->ConvertMasterDataDirToSegment; |
| query_id_enabled = param->query_id_enabled; |
| max_safe_fds = param->max_safe_fds; |
| |
| MaxBackends = param->MaxBackends; |
| |
| #ifdef WIN32 |
| PostmasterHandle = param->PostmasterHandle; |
| pgwin32_initial_signal_pipe = param->initial_signal_pipe; |
| #else |
| memcpy(&postmaster_alive_fds, ¶m->postmaster_alive_fds, |
| sizeof(postmaster_alive_fds)); |
| #endif |
| |
| memcpy(&syslogPipe, ¶m->syslogPipe, sizeof(syslogPipe)); |
| |
| strlcpy(my_exec_path, param->my_exec_path, MAXPGPATH); |
| |
| strlcpy(pkglib_path, param->pkglib_path, MAXPGPATH); |
| |
| /* |
| * We need to restore fd.c's counts of externally-opened FDs; to avoid |
| * confusion, be sure to do this after restoring max_safe_fds. (Note: |
| * BackendInitialize will handle this for port->sock.) |
| */ |
| #ifndef WIN32 |
| if (postmaster_alive_fds[0] >= 0) |
| ReserveExternalFD(); |
| if (postmaster_alive_fds[1] >= 0) |
| ReserveExternalFD(); |
| #endif |
| if (pgStatSock != PGINVALID_SOCKET) |
| ReserveExternalFD(); |
| } |
| |
| |
| Size |
| ShmemBackendArraySize(void) |
| { |
| return mul_size(MaxLivePostmasterChildren(), sizeof(Backend)); |
| } |
| |
| void |
| ShmemBackendArrayAllocation(void) |
| { |
| Size size = ShmemBackendArraySize(); |
| |
| ShmemBackendArray = (Backend *) ShmemAlloc(size); |
| /* Mark all slots as empty */ |
| memset(ShmemBackendArray, 0, size); |
| } |
| |
| static void |
| ShmemBackendArrayAdd(Backend *bn) |
| { |
| /* The array slot corresponding to my PMChildSlot should be free */ |
| int i = bn->child_slot - 1; |
| |
| Assert(ShmemBackendArray[i].pid == 0); |
| ShmemBackendArray[i] = *bn; |
| } |
| |
| static void |
| ShmemBackendArrayRemove(Backend *bn) |
| { |
| int i = bn->child_slot - 1; |
| |
| Assert(ShmemBackendArray[i].pid == bn->pid); |
| /* Mark the slot as empty */ |
| ShmemBackendArray[i].pid = 0; |
| } |
| #endif /* EXEC_BACKEND */ |
| |
| |
| #ifdef WIN32 |
| |
| /* |
| * Subset implementation of waitpid() for Windows. We assume pid is -1 |
| * (that is, check all child processes) and options is WNOHANG (don't wait). |
| */ |
| static pid_t |
| waitpid(pid_t pid, int *exitstatus, int options) |
| { |
| DWORD dwd; |
| ULONG_PTR key; |
| OVERLAPPED *ovl; |
| |
| /* |
| * Check if there are any dead children. If there are, return the pid of |
| * the first one that died. |
| */ |
| if (GetQueuedCompletionStatus(win32ChildQueue, &dwd, &key, &ovl, 0)) |
| { |
| *exitstatus = (int) key; |
| return dwd; |
| } |
| |
| return -1; |
| } |
| |
| /* |
| * Note! Code below executes on a thread pool! All operations must |
| * be thread safe! Note that elog() and friends must *not* be used. |
| */ |
| static void WINAPI |
| pgwin32_deadchild_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired) |
| { |
| win32_deadchild_waitinfo *childinfo = (win32_deadchild_waitinfo *) lpParameter; |
| DWORD exitcode; |
| |
| if (TimerOrWaitFired) |
| return; /* timeout. Should never happen, since we use |
| * INFINITE as timeout value. */ |
| |
| /* |
| * Remove handle from wait - required even though it's set to wait only |
| * once |
| */ |
| UnregisterWaitEx(childinfo->waitHandle, NULL); |
| |
| if (!GetExitCodeProcess(childinfo->procHandle, &exitcode)) |
| { |
| /* |
| * Should never happen. Inform user and set a fixed exitcode. |
| */ |
| write_stderr("could not read exit code for process\n"); |
| exitcode = 255; |
| } |
| |
| if (!PostQueuedCompletionStatus(win32ChildQueue, childinfo->procId, (ULONG_PTR) exitcode, NULL)) |
| write_stderr("could not post child completion status\n"); |
| |
| /* |
| * Handle is per-process, so we close it here instead of in the |
| * originating thread |
| */ |
| CloseHandle(childinfo->procHandle); |
| |
| /* |
| * Free struct that was allocated before the call to |
| * RegisterWaitForSingleObject() |
| */ |
| free(childinfo); |
| |
| /* Queue SIGCHLD signal */ |
| pg_queue_signal(SIGCHLD); |
| } |
| #endif /* WIN32 */ |
| |
| /* |
| * Initialize one and only handle for monitoring postmaster death. |
| * |
| * Called once in the postmaster, so that child processes can subsequently |
| * monitor if their parent is dead. |
| */ |
| static void |
| InitPostmasterDeathWatchHandle(void) |
| { |
| #ifndef WIN32 |
| |
| /* |
| * Create a pipe. Postmaster holds the write end of the pipe open |
| * (POSTMASTER_FD_OWN), and children hold the read end. Children can pass |
| * the read file descriptor to select() to wake up in case postmaster |
| * dies, or check for postmaster death with a (read() == 0). Children must |
| * close the write end as soon as possible after forking, because EOF |
| * won't be signaled in the read end until all processes have closed the |
| * write fd. That is taken care of in ClosePostmasterPorts(). |
| */ |
| Assert(MyProcPid == PostmasterPid); |
| if (pipe(postmaster_alive_fds) < 0) |
| ereport(FATAL, |
| (errcode_for_file_access(), |
| errmsg_internal("could not create pipe to monitor postmaster death: %m"))); |
| |
| /* Notify fd.c that we've eaten two FDs for the pipe. */ |
| ReserveExternalFD(); |
| ReserveExternalFD(); |
| |
| /* |
| * Set O_NONBLOCK to allow testing for the fd's presence with a read() |
| * call. |
| */ |
| if (fcntl(postmaster_alive_fds[POSTMASTER_FD_WATCH], F_SETFL, O_NONBLOCK) == -1) |
| ereport(FATAL, |
| (errcode_for_socket_access(), |
| errmsg_internal("could not set postmaster death monitoring pipe to nonblocking mode: %m"))); |
| #else |
| |
| /* |
| * On Windows, we use a process handle for the same purpose. |
| */ |
| if (DuplicateHandle(GetCurrentProcess(), |
| GetCurrentProcess(), |
| GetCurrentProcess(), |
| &PostmasterHandle, |
| 0, |
| TRUE, |
| DUPLICATE_SAME_ACCESS) == 0) |
| ereport(FATAL, |
| (errmsg_internal("could not duplicate postmaster handle: error code %lu", |
| GetLastError()))); |
| #endif /* WIN32 */ |
| } |
| |
| #if defined(HAVE_NUMA_H) && defined(HAVE_LIBNUMA) |
| /* LINUX */ |
| static void |
| setProcAffinity(int id) |
| { |
| int limit; |
| nodemask_t mask; |
| |
| if (numa_available() < 0) |
| { |
| elog(LOG, "Numa unavailable, will remain unbound."); |
| return; |
| } |
| |
| limit = numa_max_node() + 1; |
| |
| nodemask_zero(&mask); |
| nodemask_set(&mask, (id % limit)); |
| |
| elog(LOG, "Numa binding to numa-node %d", (id % limit)); |
| |
| /* this sets the memory */ |
| numa_bind(&mask); |
| |
| return; |
| } |
| #else |
| /* UNSUPPORTED */ |
| static void |
| setProcAffinity(int id) |
| { |
| elog(LOG, "gp_set_proc_affinity setting ignored; feature not configured"); |
| } |
| #endif |
| |
| void |
| load_auxiliary_libraries(void) |
| { |
| BackgroundWorker *worker; |
| slist_iter iter; |
| int i; |
| bool registered; |
| |
| /* load all auxiliary workers */ |
| for (i = 0; i < MaxPMAuxProc; i++) |
| { |
| worker = &PMAuxProcList[i]; |
| |
| if (worker->bgw_start_rule && |
| !worker->bgw_start_rule(worker->bgw_main_arg)) |
| continue; |
| |
| /* skip already registered worker */ |
| registered = false; |
| slist_foreach(iter, &BackgroundWorkerList) |
| { |
| RegisteredBgWorker *rw; |
| |
| rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur); |
| |
| if (!strcmp(rw->rw_worker.bgw_name, worker->bgw_name) && |
| strcmp(rw->rw_worker.bgw_library_name, worker->bgw_library_name) == 0 && |
| strcmp(rw->rw_worker.bgw_function_name, worker->bgw_function_name) == 0 && |
| rw->rw_worker.bgw_start_rule == worker->bgw_start_rule) |
| registered = true; |
| } |
| if (registered) |
| continue; |
| |
| RegisterBackgroundWorker(worker); |
| } |
| } |
| |
| bool |
| isAuxiliaryBgWorker(BackgroundWorker *worker) |
| { |
| BackgroundWorker *aux_worker; |
| int i; |
| |
| Assert(worker); |
| |
| for (i = 0; i < MaxPMAuxProc; i++) |
| { |
| aux_worker = &PMAuxProcList[i]; |
| |
| if (!strcmp(aux_worker->bgw_name, worker->bgw_name) && |
| strcmp(aux_worker->bgw_library_name, worker->bgw_library_name) == 0 && |
| strcmp(aux_worker->bgw_function_name, worker->bgw_function_name) == 0 && |
| aux_worker->bgw_start_rule == worker->bgw_start_rule) |
| return true; |
| } |
| |
| return false; |
| } |
| |
| bool |
| amAuxiliaryBgWorker(void) |
| { |
| if (!IsBackgroundWorker) |
| return false; |
| |
| Assert(MyBgworkerEntry); |
| |
| return isAuxiliaryBgWorker(MyBgworkerEntry); |
| } |