| /* ---------- |
| * pgstat.c |
| * |
| * All the statistics collector stuff hacked up in one big, ugly file. |
| * |
| * TODO: - Separate collector, postmaster and backend stuff |
| * into different files. |
| * |
| * - Add some automatic call for pgstat vacuuming. |
| * |
| * - Add a pgstat config column to pg_database, so this |
| * entire thing can be enabled/disabled on a per db basis. |
| * |
| * Copyright (c) 2001-2009, PostgreSQL Global Development Group |
| * |
| * $PostgreSQL: pgsql/src/backend/postmaster/pgstat.c,v 1.189 2009/06/11 14:49:01 momjian Exp $ |
| * ---------- |
| */ |
| #include "postgres.h" |
| |
| #include <unistd.h> |
| #include <fcntl.h> |
| #include <sys/param.h> |
| #include <sys/time.h> |
| #include <sys/socket.h> |
| #include <netdb.h> |
| #include <netinet/in.h> |
| #include <arpa/inet.h> |
| #include <signal.h> |
| #include <time.h> |
| #ifdef HAVE_POLL_H |
| #include <poll.h> |
| #endif |
| #ifdef HAVE_SYS_POLL_H |
| #include <sys/poll.h> |
| #endif |
| |
| #include "pgstat.h" |
| |
| #include "access/heapam.h" |
| #include "access/transam.h" |
| #include "access/twophase_rmgr.h" |
| #include "access/xact.h" |
| #include "catalog/pg_database.h" |
| #include "catalog/pg_proc.h" |
| #include "libpq/ip.h" |
| #include "libpq/libpq.h" |
| #include "libpq/pqsignal.h" |
| #include "mb/pg_wchar.h" |
| #include "miscadmin.h" |
| #include "executor/instrument.h" |
| #include "pg_trace.h" |
| #include "postmaster/autovacuum.h" |
| #include "postmaster/fork_process.h" |
| #include "postmaster/postmaster.h" |
| #include "storage/backendid.h" |
| #include "storage/fd.h" |
| #include "storage/ipc.h" |
| #include "storage/pg_shmem.h" |
| #include "storage/pmsignal.h" |
| #include "utils/guc.h" |
| #include "utils/memutils.h" |
| #include "utils/ps_status.h" |
| #include "utils/rel.h" |
| #include "utils/tqual.h" |
| #include "cdb/cdbvars.h" |
| |
| // for mkdir |
| #include "sys/stat.h" |
| |
| /* ---------- |
| * Paths for the statistics files (relative to installation's $PGDATA). |
| * ---------- |
| */ |
| #define PGSTAT_STAT_PERMANENT_FILENAME "global/pgstat.stat" |
| #define PGSTAT_STAT_PERMANENT_TMPFILE "global/pgstat.tmp" |
| |
| /* ---------- |
| * Timer definitions. |
| * ---------- |
| */ |
| #define PGSTAT_STAT_INTERVAL 500 /* Minimum time between stats file |
| * updates; in milliseconds. */ |
| |
| #define PGSTAT_RETRY_DELAY 10 /* How long to wait between statistics |
| * update requests; in milliseconds. */ |
| |
| #define PGSTAT_MAX_WAIT_TIME 5000 /* Maximum time to wait for a stats |
| * file update; in milliseconds. */ |
| |
| #define PGSTAT_RESTART_INTERVAL 60 /* How often to attempt to restart a |
| * failed statistics collector; in |
| * seconds. */ |
| |
| #define PGSTAT_SELECT_TIMEOUT 2 /* How often to check for postmaster |
| * death; in seconds. */ |
| |
| #define PGSTAT_POLL_LOOP_COUNT (PGSTAT_MAX_WAIT_TIME / PGSTAT_RETRY_DELAY) |
| |
| |
| /* ---------- |
| * The initial size hints for the hash tables used in the collector. |
| * ---------- |
| */ |
| #define PGSTAT_DB_HASH_SIZE 16 |
| #define PGSTAT_TAB_HASH_SIZE 512 |
| #define PGSTAT_QUEUE_HASH_SIZE 8 |
| #define PGSTAT_FUNCTION_HASH_SIZE 512 |
| |
| |
| /* ---------- |
| * GUC parameters |
| * ---------- |
| */ |
| bool pgstat_track_activities = false; |
| bool pgstat_track_counts = false; |
| |
| bool pgstat_collect_queuelevel = false; |
| |
| int pgstat_track_functions = TRACK_FUNC_OFF; |
| int pgstat_track_activity_query_size = PGBE_ACTIVITY_SIZE; |
| |
| /* ---------- |
| * Built from GUC parameter |
| * ---------- |
| */ |
| char *pgstat_stat_filename = NULL; |
| char *pgstat_stat_tmpname = NULL; |
| |
| /* |
| * BgWriter global statistics counters (unused in other processes). |
| * Stored directly in a stats message structure so it can be sent |
| * without needing to copy things around. We assume this inits to zeroes. |
| */ |
| PgStat_MsgBgWriter BgWriterStats; |
| |
| /* ---------- |
| * Local data |
| * ---------- |
| */ |
| NON_EXEC_STATIC int pgStatSock = -1; |
| |
| static struct sockaddr_storage pgStatAddr; |
| |
| static time_t last_pgstat_start_time; |
| |
| static bool pgStatRunningInCollector = false; |
| |
| /* |
| * Structures in which backends store per-table info that's waiting to be |
| * sent to the collector. |
| * |
| * NOTE: once allocated, TabStatusArray structures are never moved or deleted |
| * for the life of the backend. Also, we zero out the t_id fields of the |
| * contained PgStat_TableStatus structs whenever they are not actively in use. |
| * This allows relcache pgstat_info pointers to be treated as long-lived data, |
| * avoiding repeated searches in pgstat_initstats() when a relation is |
| * repeatedly opened during a transaction. |
| */ |
| #define TABSTAT_QUANTUM 100 /* we alloc this many at a time */ |
| |
| typedef struct TabStatusArray |
| { |
| struct TabStatusArray *tsa_next; /* link to next array, if any */ |
| int tsa_used; /* # entries currently used */ |
| PgStat_TableStatus tsa_entries[TABSTAT_QUANTUM]; /* per-table data */ |
| } TabStatusArray; |
| |
| static TabStatusArray *pgStatTabList = NULL; |
| |
| /* |
| * Backends store per-function info that's waiting to be sent to the collector |
| * in this hash table (indexed by function OID). |
| */ |
| static HTAB *pgStatFunctions = NULL; |
| |
| /* |
| * Indicates if backend has some function stats that it hasn't yet |
| * sent to the collector. |
| */ |
| static bool have_function_stats = false; |
| |
| /* |
| * Tuple insertion/deletion counts for an open transaction can't be propagated |
| * into PgStat_TableStatus counters until we know if it is going to commit |
| * or abort. Hence, we keep these counts in per-subxact structs that live |
| * in TopTransactionContext. This data structure is designed on the assumption |
| * that subxacts won't usually modify very many tables. |
| */ |
| typedef struct PgStat_SubXactStatus |
| { |
| int nest_level; /* subtransaction nest level */ |
| struct PgStat_SubXactStatus *prev; /* higher-level subxact if any */ |
| PgStat_TableXactStatus *first; /* head of list for this subxact */ |
| } PgStat_SubXactStatus; |
| |
| static PgStat_SubXactStatus *pgStatXactStack = NULL; |
| |
| static int pgStatXactCommit = 0; |
| static int pgStatXactRollback = 0; |
| |
| /* Record that's written to 2PC state file when pgstat state is persisted */ |
| typedef struct TwoPhasePgStatRecord |
| { |
| PgStat_Counter tuples_inserted; /* tuples inserted in xact */ |
| PgStat_Counter tuples_deleted; /* tuples deleted in xact */ |
| Oid t_id; /* table's OID */ |
| bool t_shared; /* is it a shared catalog? */ |
| } TwoPhasePgStatRecord; |
| |
| /* |
| * Info about current "snapshot" of stats file |
| */ |
| static MemoryContext pgStatLocalContext = NULL; |
| static HTAB *pgStatDBHash = NULL; |
| |
| static HTAB *pgStatQueueHash = NULL; /* GPDB */ |
| static HTAB *localStatPortalHash = NULL; /* GPDB. per backend portal queue stats.*/ |
| |
| static PgBackendStatus *localBackendStatusTable = NULL; |
| static int localNumBackends = 0; |
| |
| /* |
| * Cluster wide statistics, kept in the stats collector. |
| * Contains statistics that are not collected per database |
| * or per table. |
| */ |
| static PgStat_GlobalStats globalStats; |
| |
| /* Last time the collector successfully wrote the stats file */ |
| static TimestampTz last_statwrite; |
| |
| /* Latest statistics request time from backends */ |
| static TimestampTz last_statrequest; |
| |
| static volatile bool need_exit = false; |
| static volatile bool got_SIGHUP = false; |
| |
| /* |
| * Total time charged to functions so far in the current backend. |
| * We use this to help separate "self" and "other" time charges. |
| * (We assume this initializes to zero.) |
| */ |
| static instr_time total_func_time; |
| |
| |
| /* ---------- |
| * Local function forward declarations |
| * ---------- |
| */ |
| #ifdef EXEC_BACKEND |
| static pid_t pgstat_forkexec(void); |
| #endif |
| |
| NON_EXEC_STATIC void PgstatCollectorMain(int argc, char *argv[]); |
| static void pgstat_exit(SIGNAL_ARGS); |
| static void pgstat_beshutdown_hook(int code, Datum arg); |
| static void pgstat_sighup_handler(SIGNAL_ARGS); |
| |
| static PgStat_StatDBEntry *pgstat_get_db_entry(Oid databaseid, bool create); |
| |
| static PgStat_StatQueueEntry *pgstat_get_queue_entry(Oid queueid, bool create); /*GPDB*/ |
| |
| static void pgstat_write_statsfile(bool permanent); |
| static HTAB *pgstat_read_statsfile(Oid onlydb, bool permanent); |
| static void backend_read_statsfile(void); |
| static void pgstat_read_current_status(void); |
| |
| static void pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg); |
| static void pgstat_send_funcstats(void); |
| static HTAB *pgstat_collect_oids(Oid catalogid); |
| |
| static PgStat_TableStatus *get_tabstat_entry(Oid rel_id, bool isshared); |
| |
| static void pgstat_setup_memcxt(void); |
| |
| static void pgstat_setheader(PgStat_MsgHdr *hdr, StatMsgType mtype); |
| static void pgstat_send(void *msg, int len); |
| |
| static void pgstat_recv_inquiry(PgStat_MsgInquiry *msg, int len); |
| static void pgstat_recv_tabstat(PgStat_MsgTabstat *msg, int len); |
| static void pgstat_recv_tabpurge(PgStat_MsgTabpurge *msg, int len); |
| static void pgstat_recv_dropdb(PgStat_MsgDropdb *msg, int len); |
| static void pgstat_recv_resetcounter(PgStat_MsgResetcounter *msg, int len); |
| static void pgstat_recv_autovac(PgStat_MsgAutovacStart *msg, int len); |
| static void pgstat_recv_vacuum(PgStat_MsgVacuum *msg, int len); |
| static void pgstat_recv_analyze(PgStat_MsgAnalyze *msg, int len); |
| static void pgstat_recv_queuestat(PgStat_MsgQueuestat *msg, int len); /* GPDB */ |
| static void pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len); |
| static void pgstat_recv_funcstat(PgStat_MsgFuncstat *msg, int len); |
| static void pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len); |
| |
| |
| /* ------------------------------------------------------------ |
| * Public functions called from postmaster follow |
| * ------------------------------------------------------------ |
| */ |
| |
| /* ---------- |
| * pgstat_init() - |
| * |
| * Called from postmaster at startup. Create the resources required |
| * by the statistics collector process. If unable to do so, do not |
| * fail --- better to let the postmaster start with stats collection |
| * disabled. |
| * ---------- |
| */ |
| void |
| pgstat_init(void) |
| { |
| socklen_t alen; |
| struct addrinfo *addrs = NULL, |
| *addr, |
| hints; |
| int ret; |
| fd_set rset; |
| struct timeval tv; |
| char test_byte; |
| int sel_res; |
| int tries = 0; |
| |
| #define TESTBYTEVAL ((char) 199) |
| |
| /* |
| * Create stats temp directory if not present; ignore errors. |
| * This avoids the need to initdb... This is temporary code, and |
| * can be removed in the future, as initdb does this for us. |
| */ |
| mkdir("pg_stat_tmp", 0700); |
| |
| /* |
| * Create the UDP socket for sending and receiving statistic messages |
| */ |
| hints.ai_flags = AI_PASSIVE; |
| hints.ai_family = PF_UNSPEC; |
| hints.ai_socktype = SOCK_DGRAM; |
| hints.ai_protocol = 0; |
| hints.ai_addrlen = 0; |
| hints.ai_addr = NULL; |
| hints.ai_canonname = NULL; |
| hints.ai_next = NULL; |
| ret = pg_getaddrinfo_all("localhost", NULL, &hints, &addrs); |
| if (ret || !addrs) |
| { |
| ereport(LOG, |
| (errmsg("could not resolve \"localhost\": %s", |
| gai_strerror(ret)))); |
| goto startup_failed; |
| } |
| |
| /* |
| * On some platforms, pg_getaddrinfo_all() may return multiple addresses |
| * only one of which will actually work (eg, both IPv6 and IPv4 addresses |
| * when kernel will reject IPv6). Worse, the failure may occur at the |
| * bind() or perhaps even connect() stage. So we must loop through the |
| * results till we find a working combination. We will generate LOG |
| * messages, but no error, for bogus combinations. |
| */ |
| for (addr = addrs; addr; addr = addr->ai_next) |
| { |
| #ifdef HAVE_UNIX_SOCKETS |
| /* Ignore AF_UNIX sockets, if any are returned. */ |
| if (addr->ai_family == AF_UNIX) |
| continue; |
| #endif |
| |
| if (++tries > 1) |
| ereport(LOG, |
| (errmsg("trying another address for the statistics collector"))); |
| |
| /* |
| * Create the socket. |
| */ |
| if ((pgStatSock = socket(addr->ai_family, SOCK_DGRAM, 0)) < 0) |
| { |
| ereport(LOG, |
| (errcode_for_socket_access(), |
| errmsg("could not create socket for statistics collector: %m"))); |
| continue; |
| } |
| |
| /* |
| * Bind it to a kernel assigned port on localhost and get the assigned |
| * port via getsockname(). |
| */ |
| if (bind(pgStatSock, addr->ai_addr, addr->ai_addrlen) < 0) |
| { |
| ereport(LOG, |
| (errcode_for_socket_access(), |
| errmsg("could not bind socket for statistics collector: %m"))); |
| closesocket(pgStatSock); |
| pgStatSock = -1; |
| continue; |
| } |
| |
| alen = sizeof(pgStatAddr); |
| if (getsockname(pgStatSock, (struct sockaddr *) & pgStatAddr, &alen) < 0) |
| { |
| ereport(LOG, |
| (errcode_for_socket_access(), |
| errmsg("could not get address of socket for statistics collector: %m"))); |
| closesocket(pgStatSock); |
| pgStatSock = -1; |
| continue; |
| } |
| |
| /* |
| * Connect the socket to its own address. This saves a few cycles by |
| * not having to respecify the target address on every send. This also |
| * provides a kernel-level check that only packets from this same |
| * address will be received. |
| */ |
| if (connect(pgStatSock, (struct sockaddr *) & pgStatAddr, alen) < 0) |
| { |
| ereport(LOG, |
| (errcode_for_socket_access(), |
| errmsg("could not connect socket for statistics collector: %m"))); |
| closesocket(pgStatSock); |
| pgStatSock = -1; |
| continue; |
| } |
| |
| /* |
| * Try to send and receive a one-byte test message on the socket. This |
| * is to catch situations where the socket can be created but will not |
| * actually pass data (for instance, because kernel packet filtering |
| * rules prevent it). |
| */ |
| test_byte = TESTBYTEVAL; |
| |
| retry1: |
| if (send(pgStatSock, &test_byte, 1, 0) != 1) |
| { |
| if (errno == EINTR) |
| goto retry1; /* if interrupted, just retry */ |
| ereport(LOG, |
| (errcode_for_socket_access(), |
| errmsg("could not send test message on socket for statistics collector: %m"))); |
| closesocket(pgStatSock); |
| pgStatSock = -1; |
| continue; |
| } |
| |
| /* |
| * There could possibly be a little delay before the message can be |
| * received. We arbitrarily allow up to half a second before deciding |
| * it's broken. |
| */ |
| for (;;) /* need a loop to handle EINTR */ |
| { |
| FD_ZERO(&rset); |
| FD_SET (pgStatSock, &rset); |
| |
| tv.tv_sec = 0; |
| tv.tv_usec = 500000; |
| sel_res = select(pgStatSock + 1, &rset, NULL, NULL, &tv); |
| if (sel_res >= 0 || errno != EINTR) |
| break; |
| } |
| if (sel_res < 0) |
| { |
| ereport(LOG, |
| (errcode_for_socket_access(), |
| errmsg("select() failed in statistics collector: %m"))); |
| closesocket(pgStatSock); |
| pgStatSock = -1; |
| continue; |
| } |
| if (sel_res == 0 || !FD_ISSET(pgStatSock, &rset)) |
| { |
| /* |
| * This is the case we actually think is likely, so take pains to |
| * give a specific message for it. |
| * |
| * errno will not be set meaningfully here, so don't use it. |
| */ |
| ereport(LOG, |
| (errcode(ERRCODE_CONNECTION_FAILURE), |
| errmsg("test message did not get through on socket for statistics collector"))); |
| closesocket(pgStatSock); |
| pgStatSock = -1; |
| continue; |
| } |
| |
| test_byte++; /* just make sure variable is changed */ |
| |
| retry2: |
| if (recv(pgStatSock, &test_byte, 1, 0) != 1) |
| { |
| if (errno == EINTR) |
| goto retry2; /* if interrupted, just retry */ |
| ereport(LOG, |
| (errcode_for_socket_access(), |
| errmsg("could not receive test message on socket for statistics collector: %m"))); |
| closesocket(pgStatSock); |
| pgStatSock = -1; |
| continue; |
| } |
| |
| if (test_byte != TESTBYTEVAL) /* strictly paranoia ... */ |
| { |
| ereport(LOG, |
| (errcode(ERRCODE_INTERNAL_ERROR), |
| errmsg("incorrect test message transmission on socket for statistics collector"))); |
| closesocket(pgStatSock); |
| pgStatSock = -1; |
| continue; |
| } |
| |
| /* If we get here, we have a working socket */ |
| break; |
| } |
| |
| /* Did we find a working address? */ |
| if (!addr || pgStatSock < 0) |
| goto startup_failed; |
| |
| /* |
| * Set the socket to non-blocking IO. This ensures that if the collector |
| * falls behind, statistics messages will be discarded; backends won't |
| * block waiting to send messages to the collector. |
| */ |
| if (!pg_set_noblock(pgStatSock)) |
| { |
| ereport(LOG, |
| (errcode_for_socket_access(), |
| errmsg("could not set statistics collector socket to nonblocking mode: %m"))); |
| goto startup_failed; |
| } |
| |
| pg_freeaddrinfo_all(hints.ai_family, addrs); |
| |
| return; |
| |
| startup_failed: |
| ereport(LOG, |
| (errmsg("disabling statistics collector for lack of working socket"))); |
| |
| if (addrs) |
| pg_freeaddrinfo_all(hints.ai_family, addrs); |
| |
| if (pgStatSock >= 0) |
| closesocket(pgStatSock); |
| pgStatSock = -1; |
| |
| /* |
| * Adjust GUC variables to suppress useless activity, and for debugging |
| * purposes (seeing track_counts off is a clue that we failed here). We |
| * use PGC_S_OVERRIDE because there is no point in trying to turn it back |
| * on from postgresql.conf without a restart. |
| */ |
| |
| SetConfigOption("track_counts", "off", PGC_INTERNAL, PGC_S_OVERRIDE); |
| pgstat_track_counts = false; |
| pgstat_collect_queuelevel = false; |
| } |
| |
| /* |
| * pgstat_reset_all() - |
| * |
| * Remove the stats file. This is currently used only if WAL |
| * recovery is needed after a crash. |
| */ |
| void |
| pgstat_reset_all(void) |
| { |
| unlink(pgstat_stat_filename); |
| unlink(PGSTAT_STAT_PERMANENT_FILENAME); |
| } |
| |
| #ifdef EXEC_BACKEND |
| |
| /* |
| * pgstat_forkexec() - |
| * |
| * Format up the arglist for, then fork and exec, statistics collector process |
| */ |
| static pid_t |
| pgstat_forkexec(void) |
| { |
| char *av[10]; |
| int ac = 0; |
| |
| av[ac++] = "postgres"; |
| av[ac++] = "--forkcol"; |
| av[ac++] = NULL; /* filled in by postmaster_forkexec */ |
| |
| av[ac] = NULL; |
| Assert(ac < lengthof(av)); |
| |
| return postmaster_forkexec(ac, av); |
| } |
| #endif /* EXEC_BACKEND */ |
| |
| |
| /* |
| * pgstat_start() - |
| * |
| * Called from postmaster at startup or after an existing collector |
| * died. Attempt to fire up a fresh statistics collector. |
| * |
| * Returns PID of child process, or 0 if fail. |
| * |
| * Note: if fail, we will be called again from the postmaster main loop. |
| */ |
| int |
| pgstat_start(void) |
| { |
| time_t curtime; |
| pid_t pgStatPid; |
| |
| /* |
| * Check that the socket is there, else pgstat_init failed and we can do |
| * nothing useful. |
| */ |
| if (pgStatSock < 0) |
| return 0; |
| |
| /* |
| * Do nothing if too soon since last collector start. This is a safety |
| * valve to protect against continuous respawn attempts if the collector |
| * is dying immediately at launch. Note that since we will be re-called |
| * from the postmaster main loop, we will get another chance later. |
| */ |
| curtime = time(NULL); |
| if ((unsigned int) (curtime - last_pgstat_start_time) < |
| (unsigned int) PGSTAT_RESTART_INTERVAL) |
| return 0; |
| last_pgstat_start_time = curtime; |
| |
| /* |
| * Okay, fork off the collector. |
| */ |
| #ifdef EXEC_BACKEND |
| switch ((pgStatPid = pgstat_forkexec())) |
| #else |
| switch ((pgStatPid = fork_process())) |
| #endif |
| { |
| case -1: |
| ereport(LOG, |
| (errmsg("could not fork statistics collector: %m"))); |
| return 0; |
| |
| #ifndef EXEC_BACKEND |
| case 0: |
| /* in postmaster child ... */ |
| /* Close the postmaster's sockets */ |
| ClosePostmasterPorts(false); |
| |
| /* Lose the postmaster's on-exit routines */ |
| on_exit_reset(); |
| |
| /* Drop our connection to postmaster's shared memory, as well */ |
| PGSharedMemoryDetach(); |
| |
| PgstatCollectorMain(0, NULL); |
| break; |
| #endif |
| |
| default: |
| return (int) pgStatPid; |
| } |
| |
| /* shouldn't get here */ |
| return 0; |
| } |
| |
| void |
| allow_immediate_pgstat_restart(void) |
| { |
| last_pgstat_start_time = 0; |
| } |
| |
| /* ------------------------------------------------------------ |
| * Public functions used by backends follow |
| *------------------------------------------------------------ |
| */ |
| |
| |
| /* ---------- |
| * pgstat_report_stat() - |
| * |
| * Called from tcop/postgres.c to send the so far collected per-table |
| * and function usage statistics to the collector. Note that this is |
| * called only when not within a transaction, so it is fair to use |
| * transaction stop time as an approximation of current time. |
| * ---------- |
| */ |
| void |
| pgstat_report_stat(bool force) |
| { |
| /* we assume this inits to all zeroes: */ |
| static const PgStat_TableCounts all_zeroes; |
| static TimestampTz last_report = 0; |
| |
| TimestampTz now; |
| PgStat_MsgTabstat regular_msg; |
| PgStat_MsgTabstat shared_msg; |
| TabStatusArray *tsa; |
| int i; |
| |
| /* Don't expend a clock check if nothing to do */ |
| if ((pgStatTabList == NULL || pgStatTabList->tsa_used == 0) |
| && !have_function_stats) |
| return; |
| |
| /* |
| * Don't send a message unless it's been at least PGSTAT_STAT_INTERVAL |
| * msec since we last sent one, or the caller wants to force stats out. |
| */ |
| now = GetCurrentTransactionStopTimestamp(); |
| if (!force && |
| !TimestampDifferenceExceeds(last_report, now, PGSTAT_STAT_INTERVAL)) |
| return; |
| last_report = now; |
| |
| /* |
| * Scan through the TabStatusArray struct(s) to find tables that actually |
| * have counts, and build messages to send. We have to separate shared |
| * relations from regular ones because the databaseid field in the message |
| * header has to depend on that. |
| */ |
| regular_msg.m_databaseid = MyDatabaseId; |
| shared_msg.m_databaseid = InvalidOid; |
| regular_msg.m_nentries = 0; |
| shared_msg.m_nentries = 0; |
| |
| for (tsa = pgStatTabList; tsa != NULL; tsa = tsa->tsa_next) |
| { |
| for (i = 0; i < tsa->tsa_used; i++) |
| { |
| PgStat_TableStatus *entry = &tsa->tsa_entries[i]; |
| PgStat_MsgTabstat *this_msg; |
| PgStat_TableEntry *this_ent; |
| |
| /* Shouldn't have any pending transaction-dependent counts */ |
| Assert(entry->trans == NULL); |
| |
| /* |
| * Ignore entries that didn't accumulate any actual counts, such |
| * as indexes that were opened by the planner but not used. |
| */ |
| if (memcmp(&entry->t_counts, &all_zeroes, |
| sizeof(PgStat_TableCounts)) == 0) |
| continue; |
| |
| /* |
| * OK, insert data into the appropriate message, and send if full. |
| */ |
| this_msg = entry->t_shared ? &shared_msg : ®ular_msg; |
| this_ent = &this_msg->m_entry[this_msg->m_nentries]; |
| this_ent->t_id = entry->t_id; |
| memcpy(&this_ent->t_counts, &entry->t_counts, |
| sizeof(PgStat_TableCounts)); |
| if (++this_msg->m_nentries >= PGSTAT_NUM_TABENTRIES) |
| { |
| pgstat_send_tabstat(this_msg); |
| this_msg->m_nentries = 0; |
| } |
| } |
| /* zero out TableStatus structs after use */ |
| MemSet(tsa->tsa_entries, 0, |
| tsa->tsa_used * sizeof(PgStat_TableStatus)); |
| tsa->tsa_used = 0; |
| } |
| |
| /* |
| * Send partial messages. If force is true, make sure that any pending |
| * xact commit/abort gets counted, even if no table stats to send. |
| */ |
| if (regular_msg.m_nentries > 0 || |
| (force && (pgStatXactCommit > 0 || pgStatXactRollback > 0))) |
| pgstat_send_tabstat(®ular_msg); |
| if (shared_msg.m_nentries > 0) |
| pgstat_send_tabstat(&shared_msg); |
| |
| /* Now, send function statistics */ |
| pgstat_send_funcstats(); |
| } |
| |
| /* |
| * Subroutine for pgstat_report_stat: finish and send a tabstat message |
| */ |
| static void |
| pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg) |
| { |
| int n; |
| int len; |
| |
| /* It's unlikely we'd get here with no socket, but maybe not impossible */ |
| if (pgStatSock < 0) |
| return; |
| |
| /* |
| * Report accumulated xact commit/rollback whenever we send a normal |
| * tabstat message |
| */ |
| if (OidIsValid(tsmsg->m_databaseid)) |
| { |
| tsmsg->m_xact_commit = pgStatXactCommit; |
| tsmsg->m_xact_rollback = pgStatXactRollback; |
| pgStatXactCommit = 0; |
| pgStatXactRollback = 0; |
| } |
| else |
| { |
| tsmsg->m_xact_commit = 0; |
| tsmsg->m_xact_rollback = 0; |
| } |
| |
| n = tsmsg->m_nentries; |
| len = offsetof(PgStat_MsgTabstat, m_entry[0]) + |
| n * sizeof(PgStat_TableEntry); |
| |
| pgstat_setheader(&tsmsg->m_hdr, PGSTAT_MTYPE_TABSTAT); |
| pgstat_send(tsmsg, len); |
| } |
| |
| /* |
| * Subroutine for pgstat_report_stat: populate and send a function stat message |
| */ |
| static void |
| pgstat_send_funcstats(void) |
| { |
| /* we assume this inits to all zeroes: */ |
| static const PgStat_FunctionCounts all_zeroes; |
| |
| PgStat_MsgFuncstat msg; |
| PgStat_BackendFunctionEntry *entry; |
| HASH_SEQ_STATUS fstat; |
| |
| if (pgStatFunctions == NULL) |
| return; |
| |
| pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_FUNCSTAT); |
| msg.m_databaseid = MyDatabaseId; |
| msg.m_nentries = 0; |
| |
| hash_seq_init(&fstat, pgStatFunctions); |
| while ((entry = (PgStat_BackendFunctionEntry *) hash_seq_search(&fstat)) != NULL) |
| { |
| PgStat_FunctionEntry *m_ent; |
| |
| /* Skip it if no counts accumulated since last time */ |
| if (memcmp(&entry->f_counts, &all_zeroes, |
| sizeof(PgStat_FunctionCounts)) == 0) |
| continue; |
| |
| /* need to convert format of time accumulators */ |
| m_ent = &msg.m_entry[msg.m_nentries]; |
| m_ent->f_id = entry->f_id; |
| m_ent->f_numcalls = entry->f_counts.f_numcalls; |
| m_ent->f_time = INSTR_TIME_GET_MICROSEC(entry->f_counts.f_time); |
| m_ent->f_time_self = INSTR_TIME_GET_MICROSEC(entry->f_counts.f_time_self); |
| |
| if (++msg.m_nentries >= PGSTAT_NUM_FUNCENTRIES) |
| { |
| pgstat_send(&msg, offsetof(PgStat_MsgFuncstat, m_entry[0]) + |
| msg.m_nentries * sizeof(PgStat_FunctionEntry)); |
| msg.m_nentries = 0; |
| } |
| |
| /* reset the entry's counts */ |
| MemSet(&entry->f_counts, 0, sizeof(PgStat_FunctionCounts)); |
| } |
| |
| if (msg.m_nentries > 0) |
| pgstat_send(&msg, offsetof(PgStat_MsgFuncstat, m_entry[0]) + |
| msg.m_nentries * sizeof(PgStat_FunctionEntry)); |
| |
| have_function_stats = false; |
| } |
| |
| |
| /* ---------- |
| * pgstat_vacuum_stat() - |
| * |
| * Will tell the collector about objects he can get rid of. |
| * ---------- |
| */ |
| void |
| pgstat_vacuum_stat(void) |
| { |
| HTAB *htab; |
| PgStat_MsgTabpurge msg; |
| PgStat_MsgFuncpurge f_msg; |
| HASH_SEQ_STATUS hstat; |
| PgStat_StatDBEntry *dbentry; |
| PgStat_StatTabEntry *tabentry; |
| PgStat_StatFuncEntry *funcentry; |
| int len; |
| |
| if (pgStatSock < 0) |
| return; |
| |
| /* |
| * If not done for this transaction, read the statistics collector stats |
| * file into some hash tables. |
| */ |
| backend_read_statsfile(); |
| |
| /* |
| * Read pg_database and make a list of OIDs of all existing databases |
| */ |
| htab = pgstat_collect_oids(DatabaseRelationId); |
| |
| /* |
| * Search the database hash table for dead databases and tell the |
| * collector to drop them. |
| */ |
| hash_seq_init(&hstat, pgStatDBHash); |
| while ((dbentry = (PgStat_StatDBEntry *) hash_seq_search(&hstat)) != NULL) |
| { |
| Oid dbid = dbentry->databaseid; |
| |
| CHECK_FOR_INTERRUPTS(); |
| |
| /* the DB entry for shared tables (with InvalidOid) is never dropped */ |
| if (OidIsValid(dbid) && |
| hash_search(htab, (void *) &dbid, HASH_FIND, NULL) == NULL) |
| pgstat_drop_database(dbid); |
| } |
| |
| /* Clean up */ |
| hash_destroy(htab); |
| |
| /* |
| * Lookup our own database entry; if not found, nothing more to do. |
| */ |
| dbentry = (PgStat_StatDBEntry *) hash_search(pgStatDBHash, |
| (void *) &MyDatabaseId, |
| HASH_FIND, NULL); |
| if (dbentry == NULL || dbentry->tables == NULL) |
| return; |
| |
| /* |
| * Similarly to above, make a list of all known relations in this DB. |
| */ |
| htab = pgstat_collect_oids(RelationRelationId); |
| |
| /* |
| * Initialize our messages table counter to zero |
| */ |
| msg.m_nentries = 0; |
| |
| /* |
| * Check for all tables listed in stats hashtable if they still exist. |
| */ |
| hash_seq_init(&hstat, dbentry->tables); |
| while ((tabentry = (PgStat_StatTabEntry *) hash_seq_search(&hstat)) != NULL) |
| { |
| Oid tabid = tabentry->tableid; |
| |
| CHECK_FOR_INTERRUPTS(); |
| |
| if (hash_search(htab, (void *) &tabid, HASH_FIND, NULL) != NULL) |
| continue; |
| |
| /* |
| * Not there, so add this table's Oid to the message |
| */ |
| msg.m_tableid[msg.m_nentries++] = tabid; |
| |
| /* |
| * If the message is full, send it out and reinitialize to empty |
| */ |
| if (msg.m_nentries >= PGSTAT_NUM_TABPURGE) |
| { |
| len = offsetof(PgStat_MsgTabpurge, m_tableid[0]) |
| +msg.m_nentries * sizeof(Oid); |
| |
| pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_TABPURGE); |
| msg.m_databaseid = MyDatabaseId; |
| pgstat_send(&msg, len); |
| |
| msg.m_nentries = 0; |
| } |
| } |
| |
| /* |
| * Send the rest |
| */ |
| if (msg.m_nentries > 0) |
| { |
| len = offsetof(PgStat_MsgTabpurge, m_tableid[0]) |
| +msg.m_nentries * sizeof(Oid); |
| |
| pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_TABPURGE); |
| msg.m_databaseid = MyDatabaseId; |
| pgstat_send(&msg, len); |
| } |
| |
| /* Clean up */ |
| hash_destroy(htab); |
| |
| /* |
| * Now repeat the above steps for functions. However, we needn't bother |
| * in the common case where no function stats are being collected. |
| */ |
| if (dbentry->functions != NULL && |
| hash_get_num_entries(dbentry->functions) > 0) |
| { |
| htab = pgstat_collect_oids(ProcedureRelationId); |
| |
| pgstat_setheader(&f_msg.m_hdr, PGSTAT_MTYPE_FUNCPURGE); |
| f_msg.m_databaseid = MyDatabaseId; |
| f_msg.m_nentries = 0; |
| |
| hash_seq_init(&hstat, dbentry->functions); |
| while ((funcentry = (PgStat_StatFuncEntry *) hash_seq_search(&hstat)) != NULL) |
| { |
| Oid funcid = funcentry->functionid; |
| |
| CHECK_FOR_INTERRUPTS(); |
| |
| if (hash_search(htab, (void *) &funcid, HASH_FIND, NULL) != NULL) |
| continue; |
| |
| /* |
| * Not there, so add this function's Oid to the message |
| */ |
| f_msg.m_functionid[f_msg.m_nentries++] = funcid; |
| |
| /* |
| * If the message is full, send it out and reinitialize to empty |
| */ |
| if (f_msg.m_nentries >= PGSTAT_NUM_FUNCPURGE) |
| { |
| len = offsetof(PgStat_MsgFuncpurge, m_functionid[0]) |
| +f_msg.m_nentries * sizeof(Oid); |
| |
| pgstat_send(&f_msg, len); |
| |
| f_msg.m_nentries = 0; |
| } |
| } |
| |
| /* |
| * Send the rest |
| */ |
| if (f_msg.m_nentries > 0) |
| { |
| len = offsetof(PgStat_MsgFuncpurge, m_functionid[0]) |
| +f_msg.m_nentries * sizeof(Oid); |
| |
| pgstat_send(&f_msg, len); |
| } |
| |
| hash_destroy(htab); |
| } |
| } |
| |
| |
| /* ---------- |
| * pgstat_collect_oids() - |
| * |
| * Collect the OIDs of all objects listed in the specified system catalog |
| * into a temporary hash table. Caller should hash_destroy the result |
| * when done with it. |
| * ---------- |
| */ |
| static HTAB * |
| pgstat_collect_oids(Oid catalogid) |
| { |
| HTAB *htab; |
| HASHCTL hash_ctl; |
| Relation rel; |
| HeapScanDesc scan; |
| HeapTuple tup; |
| |
| memset(&hash_ctl, 0, sizeof(hash_ctl)); |
| hash_ctl.keysize = sizeof(Oid); |
| hash_ctl.entrysize = sizeof(Oid); |
| hash_ctl.hash = oid_hash; |
| htab = hash_create("Temporary table of OIDs", |
| PGSTAT_TAB_HASH_SIZE, |
| &hash_ctl, |
| HASH_ELEM | HASH_FUNCTION); |
| |
| rel = heap_open(catalogid, AccessShareLock); |
| scan = heap_beginscan(rel, SnapshotNow, 0, NULL); |
| while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL) |
| { |
| Oid thisoid = HeapTupleGetOid(tup); |
| |
| CHECK_FOR_INTERRUPTS(); |
| |
| (void) hash_search(htab, (void *) &thisoid, HASH_ENTER, NULL); |
| } |
| heap_endscan(scan); |
| heap_close(rel, AccessShareLock); |
| |
| return htab; |
| } |
| |
| |
| /* ---------- |
| * pgstat_drop_database() - |
| * |
| * Tell the collector that we just dropped a database. |
| * (If the message gets lost, we will still clean the dead DB eventually |
| * via future invocations of pgstat_vacuum_stat().) |
| * ---------- |
| */ |
| void |
| pgstat_drop_database(Oid databaseid) |
| { |
| PgStat_MsgDropdb msg; |
| |
| if (pgStatSock < 0) |
| return; |
| |
| pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_DROPDB); |
| msg.m_databaseid = databaseid; |
| pgstat_send(&msg, sizeof(msg)); |
| } |
| |
| |
| /* ---------- |
| * pgstat_drop_relation() - |
| * |
| * Tell the collector that we just dropped a relation. |
| * (If the message gets lost, we will still clean the dead entry eventually |
| * via future invocations of pgstat_vacuum_stat().) |
| * |
| * Currently not used for lack of any good place to call it; we rely |
| * entirely on pgstat_vacuum_stat() to clean out stats for dead rels. |
| * ---------- |
| */ |
| #ifdef NOT_USED |
| void |
| pgstat_drop_relation(Oid relid) |
| { |
| PgStat_MsgTabpurge msg; |
| int len; |
| |
| if (pgStatSock < 0) |
| return; |
| |
| msg.m_tableid[0] = relid; |
| msg.m_nentries = 1; |
| |
| len = offsetof(PgStat_MsgTabpurge, m_tableid[0]) +sizeof(Oid); |
| |
| pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_TABPURGE); |
| msg.m_databaseid = MyDatabaseId; |
| pgstat_send(&msg, len); |
| } |
| #endif /* NOT_USED */ |
| |
| |
| /* ---------- |
| * pgstat_reset_counters() - |
| * |
| * Tell the statistics collector to reset counters for our database. |
| * ---------- |
| */ |
| void |
| pgstat_reset_counters(void) |
| { |
| PgStat_MsgResetcounter msg; |
| |
| if (pgStatSock < 0) |
| return; |
| |
| if (!superuser()) |
| ereport(ERROR, |
| (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), |
| errmsg("must be superuser to reset statistics counters"))); |
| |
| pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_RESETCOUNTER); |
| msg.m_databaseid = MyDatabaseId; |
| pgstat_send(&msg, sizeof(msg)); |
| } |
| |
| |
| /* ---------- |
| * pgstat_report_autovac() - |
| * |
| * Called from autovacuum.c to report startup of an autovacuum process. |
| * We are called before InitPostgres is done, so can't rely on MyDatabaseId; |
| * the db OID must be passed in, instead. |
| * ---------- |
| */ |
| void |
| pgstat_report_autovac(Oid dboid) |
| { |
| PgStat_MsgAutovacStart msg; |
| |
| if (pgStatSock < 0) |
| return; |
| |
| pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_AUTOVAC_START); |
| msg.m_databaseid = dboid; |
| msg.m_start_time = GetCurrentTimestamp(); |
| |
| pgstat_send(&msg, sizeof(msg)); |
| } |
| |
| |
| /* --------- |
| * pgstat_report_vacuum() - |
| * |
| * Tell the collector about the table we just vacuumed. |
| * --------- |
| */ |
| void |
| pgstat_report_vacuum(Oid tableoid, bool shared, bool scanned_all, |
| bool analyze, PgStat_Counter tuples) |
| { |
| PgStat_MsgVacuum msg; |
| |
| if (pgStatSock < 0 || !pgstat_track_counts) |
| return; |
| |
| pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_VACUUM); |
| msg.m_databaseid = shared ? InvalidOid : MyDatabaseId; |
| msg.m_tableoid = tableoid; |
| msg.m_scanned_all = scanned_all; |
| msg.m_analyze = analyze; |
| msg.m_autovacuum = IsAutoVacuumProcess(); /* is this autovacuum? */ |
| msg.m_vacuumtime = GetCurrentTimestamp(); |
| msg.m_tuples = tuples; |
| pgstat_send(&msg, sizeof(msg)); |
| } |
| |
| /* -------- |
| * pgstat_report_analyze() - |
| * |
| * Tell the collector about the table we just analyzed. |
| * -------- |
| */ |
| void |
| pgstat_report_analyze(Relation rel, PgStat_Counter livetuples, |
| PgStat_Counter deadtuples) |
| { |
| PgStat_MsgAnalyze msg; |
| |
| if (pgStatSock < 0 || !pgstat_track_counts) |
| return; |
| |
| /* |
| * Unlike VACUUM, ANALYZE might be running inside a transaction that has |
| * already inserted and/or deleted rows in the target table. ANALYZE will |
| * have counted such rows as live or dead respectively. Because we will |
| * report our counts of such rows at transaction end, we should subtract |
| * off these counts from what we send to the collector now, else they'll |
| * be double-counted after commit. (This approach also ensures that the |
| * collector ends up with the right numbers if we abort instead of |
| * committing.) |
| */ |
| if (rel->pgstat_info != NULL) |
| { |
| PgStat_TableXactStatus *trans; |
| |
| for (trans = rel->pgstat_info->trans; trans; trans = trans->upper) |
| { |
| livetuples -= trans->tuples_inserted - trans->tuples_deleted; |
| deadtuples -= trans->tuples_deleted; |
| } |
| /* count stuff inserted by already-aborted subxacts, too */ |
| deadtuples -= rel->pgstat_info->t_counts.t_new_dead_tuples; |
| /* Since ANALYZE's counts are estimates, we could have underflowed */ |
| livetuples = Max(livetuples, 0); |
| deadtuples = Max(deadtuples, 0); |
| } |
| |
| pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_ANALYZE); |
| msg.m_databaseid = rel->rd_rel->relisshared ? InvalidOid : MyDatabaseId; |
| msg.m_tableoid = RelationGetRelid(rel); |
| msg.m_autovacuum = IsAutoVacuumProcess(); /* is this autovacuum? */ |
| msg.m_analyzetime = GetCurrentTimestamp(); |
| msg.m_live_tuples = livetuples; |
| msg.m_dead_tuples = deadtuples; |
| pgstat_send(&msg, sizeof(msg)); |
| } |
| |
| |
| /* ---------- |
| * pgstat_ping() - |
| * |
| * Send some junk data to the collector to increase traffic. |
| * ---------- |
| */ |
| void |
| pgstat_ping(void) |
| { |
| PgStat_MsgDummy msg; |
| |
| if (pgStatSock < 0) |
| return; |
| |
| pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_DUMMY); |
| pgstat_send(&msg, sizeof(msg)); |
| } |
| |
| /* ---------- |
| * pgstat_send_inquiry() - |
| * |
| * Notify collector that we need fresh data. |
| * ts specifies the minimum acceptable timestamp for the stats file. |
| * ---------- |
| */ |
| static void |
| pgstat_send_inquiry(TimestampTz ts) |
| { |
| PgStat_MsgInquiry msg; |
| |
| pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_INQUIRY); |
| msg.inquiry_time = ts; |
| pgstat_send(&msg, sizeof(msg)); |
| } |
| |
| |
| /* |
| * Initialize function call usage data. |
| * Called by the executor before invoking a function. |
| */ |
| void |
| pgstat_init_function_usage(FunctionCallInfoData *fcinfo, |
| PgStat_FunctionCallUsage *fcu) |
| { |
| PgStat_BackendFunctionEntry *htabent; |
| bool found; |
| |
| if (pgstat_track_functions <= fcinfo->flinfo->fn_stats) |
| { |
| /* stats not wanted */ |
| fcu->fs = NULL; |
| return; |
| } |
| |
| if (!pgStatFunctions) |
| { |
| /* First time through - initialize function stat table */ |
| HASHCTL hash_ctl; |
| |
| memset(&hash_ctl, 0, sizeof(hash_ctl)); |
| hash_ctl.keysize = sizeof(Oid); |
| hash_ctl.entrysize = sizeof(PgStat_BackendFunctionEntry); |
| hash_ctl.hash = oid_hash; |
| pgStatFunctions = hash_create("Function stat entries", |
| PGSTAT_FUNCTION_HASH_SIZE, |
| &hash_ctl, |
| HASH_ELEM | HASH_FUNCTION); |
| } |
| |
| /* Get the stats entry for this function, create if necessary */ |
| htabent = hash_search(pgStatFunctions, &fcinfo->flinfo->fn_oid, |
| HASH_ENTER, &found); |
| if (!found) |
| MemSet(&htabent->f_counts, 0, sizeof(PgStat_FunctionCounts)); |
| |
| fcu->fs = &htabent->f_counts; |
| |
| /* save stats for this function, later used to compensate for recursion */ |
| fcu->save_f_time = htabent->f_counts.f_time; |
| |
| /* save current backend-wide total time */ |
| fcu->save_total = total_func_time; |
| |
| /* get clock time as of function start */ |
| INSTR_TIME_SET_CURRENT(fcu->f_start); |
| } |
| |
| /* |
| * Calculate function call usage and update stat counters. |
| * Called by the executor after invoking a function. |
| * |
| * In the case of a set-returning function that runs in value-per-call mode, |
| * we will see multiple pgstat_init_function_usage/pgstat_end_function_usage |
| * calls for what the user considers a single call of the function. The |
| * finalize flag should be TRUE on the last call. |
| */ |
| void |
| pgstat_end_function_usage(PgStat_FunctionCallUsage *fcu, bool finalize) |
| { |
| PgStat_FunctionCounts *fs = fcu->fs; |
| instr_time f_total; |
| instr_time f_others; |
| instr_time f_self; |
| |
| /* stats not wanted? */ |
| if (fs == NULL) |
| return; |
| |
| /* total elapsed time in this function call */ |
| INSTR_TIME_SET_CURRENT(f_total); |
| INSTR_TIME_SUBTRACT(f_total, fcu->f_start); |
| |
| /* self usage: elapsed minus anything already charged to other calls */ |
| f_others = total_func_time; |
| INSTR_TIME_SUBTRACT(f_others, fcu->save_total); |
| f_self = f_total; |
| INSTR_TIME_SUBTRACT(f_self, f_others); |
| |
| /* update backend-wide total time */ |
| INSTR_TIME_ADD(total_func_time, f_self); |
| |
| /* |
| * Compute the new total f_time as the total elapsed time added to the |
| * pre-call value of f_time. This is necessary to avoid double-counting |
| * any time taken by recursive calls of myself. (We do not need any |
| * similar kluge for self time, since that already excludes any recursive |
| * calls.) |
| */ |
| INSTR_TIME_ADD(f_total, fcu->save_f_time); |
| |
| /* update counters in function stats table */ |
| if (finalize) |
| fs->f_numcalls++; |
| fs->f_time = f_total; |
| INSTR_TIME_ADD(fs->f_time_self, f_self); |
| |
| /* indicate that we have something to send */ |
| have_function_stats = true; |
| } |
| |
| |
| /* ---------- |
| * pgstat_initstats() - |
| * |
| * Initialize a relcache entry to count access statistics. |
| * Called whenever a relation is opened. |
| * |
| * We assume that a relcache entry's pgstat_info field is zeroed by |
| * relcache.c when the relcache entry is made; thereafter it is long-lived |
| * data. We can avoid repeated searches of the TabStatus arrays when the |
| * same relation is touched repeatedly within a transaction. |
| * ---------- |
| */ |
| void |
| pgstat_initstats(Relation rel) |
| { |
| Oid rel_id = rel->rd_id; |
| char relkind = rel->rd_rel->relkind; |
| |
| /* We only count stats for things that have storage */ |
| if (!(relkind == RELKIND_RELATION || |
| relkind == RELKIND_INDEX || |
| relkind == RELKIND_TOASTVALUE || |
| relkind == RELKIND_SEQUENCE)) |
| { |
| rel->pgstat_info = NULL; |
| return; |
| } |
| |
| if (pgStatSock < 0 || !pgstat_track_counts) |
| { |
| /* We're not counting at all */ |
| rel->pgstat_info = NULL; |
| return; |
| } |
| |
| /* |
| * If we already set up this relation in the current transaction, nothing |
| * to do. |
| */ |
| if (rel->pgstat_info != NULL && |
| rel->pgstat_info->t_id == rel_id) |
| return; |
| |
| /* Else find or make the PgStat_TableStatus entry, and update link */ |
| rel->pgstat_info = get_tabstat_entry(rel_id, rel->rd_rel->relisshared); |
| } |
| |
| /* |
| * get_tabstat_entry - find or create a PgStat_TableStatus entry for rel |
| */ |
| static PgStat_TableStatus * |
| get_tabstat_entry(Oid rel_id, bool isshared) |
| { |
| PgStat_TableStatus *entry; |
| TabStatusArray *tsa; |
| TabStatusArray *prev_tsa; |
| int i; |
| |
| /* |
| * Search the already-used tabstat slots for this relation. |
| */ |
| prev_tsa = NULL; |
| for (tsa = pgStatTabList; tsa != NULL; prev_tsa = tsa, tsa = tsa->tsa_next) |
| { |
| for (i = 0; i < tsa->tsa_used; i++) |
| { |
| entry = &tsa->tsa_entries[i]; |
| if (entry->t_id == rel_id) |
| return entry; |
| } |
| |
| if (tsa->tsa_used < TABSTAT_QUANTUM) |
| { |
| /* |
| * It must not be present, but we found a free slot instead. Fine, |
| * let's use this one. We assume the entry was already zeroed, |
| * either at creation or after last use. |
| */ |
| entry = &tsa->tsa_entries[tsa->tsa_used++]; |
| entry->t_id = rel_id; |
| entry->t_shared = isshared; |
| return entry; |
| } |
| } |
| |
| /* |
| * We ran out of tabstat slots, so allocate more. Be sure they're zeroed. |
| */ |
| tsa = (TabStatusArray *) MemoryContextAllocZero(TopMemoryContext, |
| sizeof(TabStatusArray)); |
| if (prev_tsa) |
| prev_tsa->tsa_next = tsa; |
| else |
| pgStatTabList = tsa; |
| |
| /* |
| * Use the first entry of the new TabStatusArray. |
| */ |
| entry = &tsa->tsa_entries[tsa->tsa_used++]; |
| entry->t_id = rel_id; |
| entry->t_shared = isshared; |
| return entry; |
| } |
| |
| /* |
| * get_tabstat_stack_level - add a new (sub)transaction stack entry if needed |
| */ |
| static PgStat_SubXactStatus * |
| get_tabstat_stack_level(int nest_level) |
| { |
| PgStat_SubXactStatus *xact_state; |
| |
| xact_state = pgStatXactStack; |
| if (xact_state == NULL || xact_state->nest_level != nest_level) |
| { |
| xact_state = (PgStat_SubXactStatus *) |
| MemoryContextAlloc(TopTransactionContext, |
| sizeof(PgStat_SubXactStatus)); |
| xact_state->nest_level = nest_level; |
| xact_state->prev = pgStatXactStack; |
| xact_state->first = NULL; |
| pgStatXactStack = xact_state; |
| } |
| return xact_state; |
| } |
| |
| /* |
| * add_tabstat_xact_level - add a new (sub)transaction state record |
| */ |
| static void |
| add_tabstat_xact_level(PgStat_TableStatus *pgstat_info, int nest_level) |
| { |
| PgStat_SubXactStatus *xact_state; |
| PgStat_TableXactStatus *trans; |
| |
| /* |
| * If this is the first rel to be modified at the current nest level, we |
| * first have to push a transaction stack entry. |
| */ |
| xact_state = get_tabstat_stack_level(nest_level); |
| |
| /* Now make a per-table stack entry */ |
| trans = (PgStat_TableXactStatus *) |
| MemoryContextAllocZero(TopTransactionContext, |
| sizeof(PgStat_TableXactStatus)); |
| trans->nest_level = nest_level; |
| trans->upper = pgstat_info->trans; |
| trans->parent = pgstat_info; |
| trans->next = xact_state->first; |
| xact_state->first = trans; |
| pgstat_info->trans = trans; |
| } |
| |
| /* |
| * pgstat_count_heap_insert - count a tuple insertion |
| */ |
| void |
| pgstat_count_heap_insert(Relation rel) |
| { |
| PgStat_TableStatus *pgstat_info = rel->pgstat_info; |
| |
| if (pgstat_track_counts && pgstat_info != NULL) |
| { |
| int nest_level = GetCurrentTransactionNestLevel(); |
| |
| /* t_tuples_inserted is nontransactional, so just advance it */ |
| pgstat_info->t_counts.t_tuples_inserted++; |
| |
| /* We have to log the transactional effect at the proper level */ |
| if (pgstat_info->trans == NULL || |
| pgstat_info->trans->nest_level != nest_level) |
| add_tabstat_xact_level(pgstat_info, nest_level); |
| |
| pgstat_info->trans->tuples_inserted++; |
| } |
| } |
| |
| /* |
| * pgstat_count_heap_update - count a tuple update |
| */ |
| void |
| pgstat_count_heap_update(Relation rel, bool hot) |
| { |
| PgStat_TableStatus *pgstat_info = rel->pgstat_info; |
| |
| if (pgstat_track_counts && pgstat_info != NULL) |
| { |
| int nest_level = GetCurrentTransactionNestLevel(); |
| |
| /* t_tuples_updated is nontransactional, so just advance it */ |
| pgstat_info->t_counts.t_tuples_updated++; |
| /* ditto for the hot_update counter */ |
| if (hot) |
| pgstat_info->t_counts.t_tuples_hot_updated++; |
| |
| /* We have to log the transactional effect at the proper level */ |
| if (pgstat_info->trans == NULL || |
| pgstat_info->trans->nest_level != nest_level) |
| add_tabstat_xact_level(pgstat_info, nest_level); |
| |
| /* An UPDATE both inserts a new tuple and deletes the old */ |
| pgstat_info->trans->tuples_inserted++; |
| pgstat_info->trans->tuples_deleted++; |
| } |
| } |
| |
| /* |
| * pgstat_count_heap_delete - count a tuple deletion |
| */ |
| void |
| pgstat_count_heap_delete(Relation rel) |
| { |
| PgStat_TableStatus *pgstat_info = rel->pgstat_info; |
| |
| if (pgstat_track_counts && pgstat_info != NULL) |
| { |
| int nest_level = GetCurrentTransactionNestLevel(); |
| |
| /* t_tuples_deleted is nontransactional, so just advance it */ |
| pgstat_info->t_counts.t_tuples_deleted++; |
| |
| /* We have to log the transactional effect at the proper level */ |
| if (pgstat_info->trans == NULL || |
| pgstat_info->trans->nest_level != nest_level) |
| add_tabstat_xact_level(pgstat_info, nest_level); |
| |
| pgstat_info->trans->tuples_deleted++; |
| } |
| } |
| |
| /* |
| * pgstat_update_heap_dead_tuples - update dead-tuples count |
| * |
| * The semantics of this are that we are reporting the nontransactional |
| * recovery of "delta" dead tuples; so t_new_dead_tuples decreases |
| * rather than increasing, and the change goes straight into the per-table |
| * counter, not into transactional state. |
| */ |
| void |
| pgstat_update_heap_dead_tuples(Relation rel, int delta) |
| { |
| PgStat_TableStatus *pgstat_info = rel->pgstat_info; |
| |
| if (pgstat_track_counts && pgstat_info != NULL) |
| pgstat_info->t_counts.t_new_dead_tuples -= delta; |
| } |
| |
| |
| /* ---------- |
| * AtEOXact_PgStat |
| * |
| * Called from access/transam/xact.c at top-level transaction commit/abort. |
| * ---------- |
| */ |
| void |
| AtEOXact_PgStat(bool isCommit) |
| { |
| PgStat_SubXactStatus *xact_state; |
| |
| /* |
| * Count transaction commit or abort. (We use counters, not just bools, |
| * in case the reporting message isn't sent right away.) |
| */ |
| if (isCommit) |
| pgStatXactCommit++; |
| else |
| pgStatXactRollback++; |
| |
| /* |
| * Transfer transactional insert/update counts into the base tabstat |
| * entries. We don't bother to free any of the transactional state, since |
| * it's all in TopTransactionContext and will go away anyway. |
| */ |
| xact_state = pgStatXactStack; |
| if (xact_state != NULL) |
| { |
| PgStat_TableXactStatus *trans; |
| |
| Assert(xact_state->nest_level == 1); |
| Assert(xact_state->prev == NULL); |
| for (trans = xact_state->first; trans != NULL; trans = trans->next) |
| { |
| PgStat_TableStatus *tabstat; |
| |
| Assert(trans->nest_level == 1); |
| Assert(trans->upper == NULL); |
| tabstat = trans->parent; |
| Assert(tabstat->trans == trans); |
| if (isCommit) |
| { |
| tabstat->t_counts.t_new_live_tuples += |
| trans->tuples_inserted - trans->tuples_deleted; |
| tabstat->t_counts.t_new_dead_tuples += trans->tuples_deleted; |
| } |
| else |
| { |
| /* inserted tuples are dead, deleted tuples are unaffected */ |
| tabstat->t_counts.t_new_dead_tuples += trans->tuples_inserted; |
| } |
| tabstat->trans = NULL; |
| } |
| } |
| pgStatXactStack = NULL; |
| |
| /* Make sure any stats snapshot is thrown away */ |
| pgstat_clear_snapshot(); |
| } |
| |
| /* ---------- |
| * AtEOSubXact_PgStat |
| * |
| * Called from access/transam/xact.c at subtransaction commit/abort. |
| * ---------- |
| */ |
| void |
| AtEOSubXact_PgStat(bool isCommit, int nestDepth) |
| { |
| PgStat_SubXactStatus *xact_state; |
| |
| /* |
| * Transfer transactional insert/update counts into the next higher |
| * subtransaction state. |
| */ |
| xact_state = pgStatXactStack; |
| if (xact_state != NULL && |
| xact_state->nest_level >= nestDepth) |
| { |
| PgStat_TableXactStatus *trans; |
| PgStat_TableXactStatus *next_trans; |
| |
| /* delink xact_state from stack immediately to simplify reuse case */ |
| pgStatXactStack = xact_state->prev; |
| |
| for (trans = xact_state->first; trans != NULL; trans = next_trans) |
| { |
| PgStat_TableStatus *tabstat; |
| |
| next_trans = trans->next; |
| Assert(trans->nest_level == nestDepth); |
| tabstat = trans->parent; |
| Assert(tabstat->trans == trans); |
| if (isCommit) |
| { |
| if (trans->upper && trans->upper->nest_level == nestDepth - 1) |
| { |
| trans->upper->tuples_inserted += trans->tuples_inserted; |
| trans->upper->tuples_deleted += trans->tuples_deleted; |
| tabstat->trans = trans->upper; |
| pfree(trans); |
| } |
| else |
| { |
| /* |
| * When there isn't an immediate parent state, we can just |
| * reuse the record instead of going through a |
| * palloc/pfree pushup (this works since it's all in |
| * TopTransactionContext anyway). We have to re-link it |
| * into the parent level, though, and that might mean |
| * pushing a new entry into the pgStatXactStack. |
| */ |
| PgStat_SubXactStatus *upper_xact_state; |
| |
| upper_xact_state = get_tabstat_stack_level(nestDepth - 1); |
| trans->next = upper_xact_state->first; |
| upper_xact_state->first = trans; |
| trans->nest_level = nestDepth - 1; |
| } |
| } |
| else |
| { |
| /* |
| * On abort, inserted tuples are dead (and can be bounced out |
| * to the top-level tabstat), deleted tuples are unaffected |
| */ |
| tabstat->t_counts.t_new_dead_tuples += trans->tuples_inserted; |
| tabstat->trans = trans->upper; |
| pfree(trans); |
| } |
| } |
| pfree(xact_state); |
| } |
| } |
| |
| |
| /* |
| * AtPrepare_PgStat |
| * Save the transactional stats state at 2PC transaction prepare. |
| * |
| * In this phase we just generate 2PC records for all the pending |
| * transaction-dependent stats work. |
| */ |
| void |
| AtPrepare_PgStat(void) |
| { |
| PgStat_SubXactStatus *xact_state; |
| |
| xact_state = pgStatXactStack; |
| if (xact_state != NULL) |
| { |
| PgStat_TableXactStatus *trans; |
| |
| Assert(xact_state->nest_level == 1); |
| Assert(xact_state->prev == NULL); |
| for (trans = xact_state->first; trans != NULL; trans = trans->next) |
| { |
| PgStat_TableStatus *tabstat; |
| TwoPhasePgStatRecord record; |
| |
| Assert(trans->nest_level == 1); |
| Assert(trans->upper == NULL); |
| tabstat = trans->parent; |
| Assert(tabstat->trans == trans); |
| |
| record.tuples_inserted = trans->tuples_inserted; |
| record.tuples_deleted = trans->tuples_deleted; |
| record.t_id = tabstat->t_id; |
| record.t_shared = tabstat->t_shared; |
| |
| RegisterTwoPhaseRecord(TWOPHASE_RM_PGSTAT_ID, 0, |
| &record, sizeof(TwoPhasePgStatRecord)); |
| } |
| } |
| } |
| |
| /* |
| * PostPrepare_PgStat |
| * Clean up after successful PREPARE. |
| * |
| * All we need do here is unlink the transaction stats state from the |
| * nontransactional state. The nontransactional action counts will be |
| * reported to the stats collector immediately, while the effects on live |
| * and dead tuple counts are preserved in the 2PC state file. |
| * |
| * Note: AtEOXact_PgStat is not called during PREPARE. |
| */ |
| void |
| PostPrepare_PgStat(void) |
| { |
| PgStat_SubXactStatus *xact_state; |
| |
| /* |
| * We don't bother to free any of the transactional state, since it's all |
| * in TopTransactionContext and will go away anyway. |
| */ |
| xact_state = pgStatXactStack; |
| if (xact_state != NULL) |
| { |
| PgStat_TableXactStatus *trans; |
| |
| for (trans = xact_state->first; trans != NULL; trans = trans->next) |
| { |
| PgStat_TableStatus *tabstat; |
| |
| tabstat = trans->parent; |
| tabstat->trans = NULL; |
| } |
| } |
| pgStatXactStack = NULL; |
| |
| /* Make sure any stats snapshot is thrown away */ |
| pgstat_clear_snapshot(); |
| } |
| |
| /* |
| * 2PC processing routine for COMMIT PREPARED case. |
| * |
| * Load the saved counts into our local pgstats state. |
| */ |
| void |
| pgstat_twophase_postcommit(TransactionId xid, uint16 info, |
| void *recdata, uint32 len) |
| { |
| TwoPhasePgStatRecord *rec = (TwoPhasePgStatRecord *) recdata; |
| PgStat_TableStatus *pgstat_info; |
| |
| /* Find or create a tabstat entry for the rel */ |
| pgstat_info = get_tabstat_entry(rec->t_id, rec->t_shared); |
| |
| pgstat_info->t_counts.t_new_live_tuples += |
| rec->tuples_inserted - rec->tuples_deleted; |
| pgstat_info->t_counts.t_new_dead_tuples += rec->tuples_deleted; |
| } |
| |
| /* |
| * 2PC processing routine for ROLLBACK PREPARED case. |
| * |
| * Load the saved counts into our local pgstats state, but treat them |
| * as aborted. |
| */ |
| void |
| pgstat_twophase_postabort(TransactionId xid, uint16 info, |
| void *recdata, uint32 len) |
| { |
| TwoPhasePgStatRecord *rec = (TwoPhasePgStatRecord *) recdata; |
| PgStat_TableStatus *pgstat_info; |
| |
| /* Find or create a tabstat entry for the rel */ |
| pgstat_info = get_tabstat_entry(rec->t_id, rec->t_shared); |
| |
| /* inserted tuples are dead, deleted tuples are no-ops */ |
| pgstat_info->t_counts.t_new_dead_tuples += rec->tuples_inserted; |
| } |
| |
| |
| /* ---------- |
| * pgstat_fetch_stat_dbentry() - |
| * |
| * Support function for the SQL-callable pgstat* functions. Returns |
| * the collected statistics for one database or NULL. NULL doesn't mean |
| * that the database doesn't exist, it is just not yet known by the |
| * collector, so the caller is better off to report ZERO instead. |
| * ---------- |
| */ |
| PgStat_StatDBEntry * |
| pgstat_fetch_stat_dbentry(Oid dbid) |
| { |
| /* |
| * If not done for this transaction, read the statistics collector stats |
| * file into some hash tables. |
| */ |
| backend_read_statsfile(); |
| |
| /* |
| * Lookup the requested database; return NULL if not found |
| */ |
| return (PgStat_StatDBEntry *) hash_search(pgStatDBHash, |
| (void *) &dbid, |
| HASH_FIND, NULL); |
| } |
| |
| |
| /* ---------- |
| * pgstat_fetch_stat_tabentry() - |
| * |
| * Support function for the SQL-callable pgstat* functions. Returns |
| * the collected statistics for one table or NULL. NULL doesn't mean |
| * that the table doesn't exist, it is just not yet known by the |
| * collector, so the caller is better off to report ZERO instead. |
| * ---------- |
| */ |
| PgStat_StatTabEntry * |
| pgstat_fetch_stat_tabentry(Oid relid) |
| { |
| Oid dbid; |
| PgStat_StatDBEntry *dbentry; |
| PgStat_StatTabEntry *tabentry; |
| |
| /* |
| * If not done for this transaction, read the statistics collector stats |
| * file into some hash tables. |
| */ |
| backend_read_statsfile(); |
| |
| /* |
| * Lookup our database, then look in its table hash table. |
| */ |
| dbid = MyDatabaseId; |
| dbentry = (PgStat_StatDBEntry *) hash_search(pgStatDBHash, |
| (void *) &dbid, |
| HASH_FIND, NULL); |
| if (dbentry != NULL && dbentry->tables != NULL) |
| { |
| tabentry = (PgStat_StatTabEntry *) hash_search(dbentry->tables, |
| (void *) &relid, |
| HASH_FIND, NULL); |
| if (tabentry) |
| return tabentry; |
| } |
| |
| /* |
| * If we didn't find it, maybe it's a shared table. |
| */ |
| dbid = InvalidOid; |
| dbentry = (PgStat_StatDBEntry *) hash_search(pgStatDBHash, |
| (void *) &dbid, |
| HASH_FIND, NULL); |
| if (dbentry != NULL && dbentry->tables != NULL) |
| { |
| tabentry = (PgStat_StatTabEntry *) hash_search(dbentry->tables, |
| (void *) &relid, |
| HASH_FIND, NULL); |
| if (tabentry) |
| return tabentry; |
| } |
| |
| return NULL; |
| } |
| |
| |
| /* ---------- |
| * pgstat_fetch_stat_funcentry() - |
| * |
| * Support function for the SQL-callable pgstat* functions. Returns |
| * the collected statistics for one function or NULL. |
| * ---------- |
| */ |
| PgStat_StatFuncEntry * |
| pgstat_fetch_stat_funcentry(Oid func_id) |
| { |
| PgStat_StatDBEntry *dbentry; |
| PgStat_StatFuncEntry *funcentry = NULL; |
| |
| /* load the stats file if needed */ |
| backend_read_statsfile(); |
| |
| /* Lookup our database, then find the requested function. */ |
| dbentry = pgstat_fetch_stat_dbentry(MyDatabaseId); |
| if (dbentry != NULL && dbentry->functions != NULL) |
| { |
| funcentry = (PgStat_StatFuncEntry *) hash_search(dbentry->functions, |
| (void *) &func_id, |
| HASH_FIND, NULL); |
| } |
| |
| return funcentry; |
| } |
| |
| |
| /* ---------- |
| * pgstat_fetch_stat_beentry() - |
| * |
| * Support function for the SQL-callable pgstat* functions. Returns |
| * our local copy of the current-activity entry for one backend. |
| * |
| * NB: caller is responsible for a check if the user is permitted to see |
| * this info (especially the querystring). |
| * ---------- |
| */ |
| PgBackendStatus * |
| pgstat_fetch_stat_beentry(int beid) |
| { |
| pgstat_read_current_status(); |
| |
| if (beid < 1 || beid > localNumBackends) |
| return NULL; |
| |
| return &localBackendStatusTable[beid - 1]; |
| } |
| |
| |
| /* ---------- |
| * pgstat_fetch_stat_numbackends() - |
| * |
| * Support function for the SQL-callable pgstat* functions. Returns |
| * the maximum current backend id. |
| * ---------- |
| */ |
| int |
| pgstat_fetch_stat_numbackends(void) |
| { |
| pgstat_read_current_status(); |
| |
| return localNumBackends; |
| } |
| |
| /* |
| * --------- |
| * pgstat_fetch_global() - |
| * |
| * Support function for the SQL-callable pgstat* functions. Returns |
| * a pointer to the global statistics struct. |
| * --------- |
| */ |
| PgStat_GlobalStats * |
| pgstat_fetch_global(void) |
| { |
| backend_read_statsfile(); |
| |
| return &globalStats; |
| } |
| |
| |
| /* ------------------------------------------------------------ |
| * Functions for management of the shared-memory PgBackendStatus array |
| * ------------------------------------------------------------ |
| */ |
| |
| static PgBackendStatus *BackendStatusArray = NULL; |
| static PgBackendStatus *MyBEEntry = NULL; |
| static char *BackendAppnameBuffer = NULL; |
| static char *BackendActivityBuffer = NULL; |
| |
| |
| /* |
| * Report shared-memory space needed by CreateSharedBackendStatus. |
| */ |
| Size |
| BackendStatusShmemSize(void) |
| { |
| Size size; |
| |
| size = mul_size(sizeof(PgBackendStatus), MaxBackends); |
| size = add_size(size, |
| mul_size(NAMEDATALEN, MaxBackends)); |
| size = add_size(size, |
| mul_size(pgstat_track_activity_query_size, MaxBackends)); |
| return size; |
| } |
| |
| /* |
| * Initialize the shared status array and activity/appname string buffers |
| * during postmaster startup. |
| */ |
| void |
| CreateSharedBackendStatus(void) |
| { |
| Size size; |
| bool found; |
| int i; |
| char *buffer; |
| |
| /* Create or attach to the shared array */ |
| size = mul_size(sizeof(PgBackendStatus), MaxBackends); |
| BackendStatusArray = (PgBackendStatus *) |
| ShmemInitStruct("Backend Status Array", size, &found); |
| |
| if (!found) |
| { |
| /* |
| * We're the first - initialize. |
| */ |
| MemSet(BackendStatusArray, 0, size); |
| } |
| |
| /* Create or attach to the shared appname buffer */ |
| size = mul_size(NAMEDATALEN, MaxBackends); |
| BackendAppnameBuffer = (char *) |
| ShmemInitStruct("Backend Application Name Buffer", size, &found); |
| |
| if (!found) |
| { |
| MemSet(BackendAppnameBuffer, 0, size); |
| |
| /* Initialize st_appname pointers. */ |
| buffer = BackendAppnameBuffer; |
| for (i = 0; i < MaxBackends; i++) |
| { |
| BackendStatusArray[i].st_appname = buffer; |
| buffer += NAMEDATALEN; |
| } |
| } |
| |
| /* Create or attach to the shared activity buffer */ |
| size = mul_size(pgstat_track_activity_query_size, MaxBackends); |
| BackendActivityBuffer = (char *) |
| ShmemInitStruct("Backend Activity Buffer", size, &found); |
| |
| if (!found) |
| { |
| MemSet(BackendActivityBuffer, 0, size); |
| |
| /* Initialize st_activity pointers. */ |
| buffer = BackendActivityBuffer; |
| for (i = 0; i < MaxBackends; i++) |
| { |
| BackendStatusArray[i].st_activity = buffer; |
| buffer += pgstat_track_activity_query_size; |
| } |
| } |
| } |
| |
| |
| /* ---------- |
| * pgstat_initialize() - |
| * |
| * Initialize pgstats state, and set up our on-proc-exit hook. |
| * Called from InitPostgres. MyBackendId must be set, |
| * but we must not have started any transaction yet (since the |
| * exit hook must run after the last transaction exit). |
| * NOTE: MyDatabaseId isn't set yet; so the shutdown hook has to be careful. |
| * ---------- |
| */ |
| void |
| pgstat_initialize(void) |
| { |
| /* Initialize MyBEEntry */ |
| Assert(MyBackendId >= 1 && MyBackendId <= MaxBackends); |
| MyBEEntry = &BackendStatusArray[MyBackendId - 1]; |
| |
| /* Set up a process-exit hook to clean up */ |
| on_shmem_exit(pgstat_beshutdown_hook, 0); |
| } |
| |
| /* ---------- |
| * pgstat_bestart() - |
| * |
| * Initialize this backend's entry in the PgBackendStatus array. |
| * Called from InitPostgres. |
| * MyDatabaseId, session userid, and application_name must be set |
| * (hence, this cannot be combined with pgstat_initialize). |
| * ---------- |
| */ |
| void |
| pgstat_bestart(void) |
| { |
| TimestampTz proc_start_timestamp; |
| Oid userid; |
| SockAddr clientaddr; |
| volatile PgBackendStatus *beentry; |
| |
| /* |
| * To minimize the time spent modifying the PgBackendStatus entry, fetch |
| * all the needed data first. |
| * |
| * If we have a MyProcPort, use its session start time (for consistency, |
| * and to save a kernel call). |
| */ |
| if (MyProcPort) |
| proc_start_timestamp = MyProcPort->SessionStartTime; |
| else |
| proc_start_timestamp = GetCurrentTimestamp(); |
| userid = GetSessionUserId(); |
| |
| /* |
| * We may not have a MyProcPort (eg, if this is the autovacuum process). |
| * If so, use all-zeroes client address, which is dealt with specially in |
| * pg_stat_get_backend_client_addr and pg_stat_get_backend_client_port. |
| */ |
| if (MyProcPort) |
| memcpy(&clientaddr, &MyProcPort->raddr, sizeof(clientaddr)); |
| else |
| MemSet(&clientaddr, 0, sizeof(clientaddr)); |
| |
| /* |
| * Initialize my status entry, following the protocol of bumping |
| * st_changecount before and after; and make sure it's even afterwards. We |
| * use a volatile pointer here to ensure the compiler doesn't try to get |
| * cute. |
| */ |
| beentry = MyBEEntry; |
| do |
| { |
| beentry->st_changecount++; |
| } while ((beentry->st_changecount & 1) == 0); |
| |
| beentry->st_procpid = MyProcPid; |
| beentry->st_proc_start_timestamp = proc_start_timestamp; |
| beentry->st_activity_start_timestamp = 0; |
| beentry->st_xact_start_timestamp = 0; |
| beentry->st_databaseid = MyDatabaseId; |
| beentry->st_userid = userid; |
| beentry->st_session_id = gp_session_id; /* GPDB only */ |
| beentry->st_clientaddr = clientaddr; |
| beentry->st_waiting = false; |
| beentry->st_appname[0] = '\0'; |
| beentry->st_activity[0] = '\0'; |
| /* Also make sure the last byte in each string area is always 0 */ |
| beentry->st_appname[NAMEDATALEN - 1] = '\0'; |
| beentry->st_activity[pgstat_track_activity_query_size - 1] = '\0'; |
| beentry->st_waiting_resource = false; |
| |
| beentry->st_changecount++; |
| Assert((beentry->st_changecount & 1) == 0); |
| |
| /* |
| * GPDB: Initialize per-portal statistics hash for resource queues. |
| */ |
| pgstat_init_localportalhash(); |
| |
| |
| /* |
| * GPDB: Set up a process-exit hook to clean up. |
| */ |
| on_shmem_exit(pgstat_beshutdown_hook, 0); |
| |
| |
| /* Update app name to current GUC setting */ |
| if (application_name) |
| pgstat_report_appname(application_name); |
| } |
| |
| /* |
| * Shut down a single backend's statistics reporting at process exit. |
| * |
| * Flush any remaining statistics counts out to the collector. |
| * Without this, operations triggered during backend exit (such as |
| * temp table deletions) won't be counted. |
| * |
| * Lastly, clear out our entry in the PgBackendStatus array. |
| */ |
| static void |
| pgstat_beshutdown_hook(int code, Datum arg) |
| { |
| volatile PgBackendStatus *beentry = MyBEEntry; |
| |
| /* |
| * If we got as far as discovering our own database ID, we can report what |
| * we did to the collector. Otherwise, we'd be sending an invalid |
| * database ID, so forget it. (This means that accesses to pg_database |
| * during failed backend starts might never get counted.) |
| */ |
| if (OidIsValid(MyDatabaseId)) |
| pgstat_report_stat(true); |
| |
| /* |
| * Clear my status entry, following the protocol of bumping st_changecount |
| * before and after. We use a volatile pointer here to ensure the |
| * compiler doesn't try to get cute. |
| */ |
| beentry->st_changecount++; |
| |
| beentry->st_procpid = 0; /* mark invalid */ |
| beentry->st_session_id = 0; |
| |
| beentry->st_changecount++; |
| Assert((beentry->st_changecount & 1) == 0); |
| } |
| |
| |
| /* ---------- |
| * pgstat_report_activity() - |
| * |
| * Called from tcop/postgres.c to report what the backend is actually doing |
| * (usually "<IDLE>" or the start of the query to be executed). |
| * ---------- |
| */ |
| void |
| pgstat_report_activity(const char *cmd_str) |
| { |
| volatile PgBackendStatus *beentry = MyBEEntry; |
| TimestampTz start_timestamp; |
| int len; |
| |
| // TRACE_POSTGRESQL_STATEMENT_STATUS(cmd_str); |
| |
| if (!pgstat_track_activities || !beentry) |
| return; |
| |
| /* |
| * To minimize the time spent modifying the entry, fetch all the needed |
| * data first. |
| */ |
| start_timestamp = GetCurrentStatementStartTimestamp(); |
| |
| len = strlen(cmd_str); |
| len = pg_mbcliplen(cmd_str, len, pgstat_track_activity_query_size - 1); |
| |
| /* |
| * Update my status entry, following the protocol of bumping |
| * st_changecount before and after. We use a volatile pointer here to |
| * ensure the compiler doesn't try to get cute. |
| */ |
| beentry->st_changecount++; |
| |
| beentry->st_activity_start_timestamp = start_timestamp; |
| memcpy((char *) beentry->st_activity, cmd_str, len); |
| beentry->st_activity[len] = '\0'; |
| |
| beentry->st_changecount++; |
| Assert((beentry->st_changecount & 1) == 0); |
| } |
| |
| /* ---------- |
| * pgstat_report_appname() - |
| * |
| * Called to update our application name. |
| * ---------- |
| */ |
| void |
| pgstat_report_appname(const char *appname) |
| { |
| volatile PgBackendStatus *beentry = MyBEEntry; |
| int len; |
| |
| if (!beentry) |
| return; |
| |
| /* This should be unnecessary if GUC did its job, but be safe */ |
| len = pg_mbcliplen(appname, strlen(appname), NAMEDATALEN - 1); |
| |
| /* |
| * Update my status entry, following the protocol of bumping |
| * st_changecount before and after. We use a volatile pointer here to |
| * ensure the compiler doesn't try to get cute. |
| */ |
| beentry->st_changecount++; |
| |
| memcpy((char *) beentry->st_appname, appname, len); |
| beentry->st_appname[len] = '\0'; |
| |
| beentry->st_changecount++; |
| Assert((beentry->st_changecount & 1) == 0); |
| } |
| |
| /* |
| * Report current transaction start timestamp as the specified value. |
| * Zero means there is no active transaction. |
| */ |
| void |
| pgstat_report_xact_timestamp(TimestampTz tstamp) |
| { |
| volatile PgBackendStatus *beentry = MyBEEntry; |
| |
| if (!pgstat_track_activities || !beentry) |
| return; |
| |
| /* |
| * Update my status entry, following the protocol of bumping |
| * st_changecount before and after. We use a volatile pointer here to |
| * ensure the compiler doesn't try to get cute. |
| */ |
| beentry->st_changecount++; |
| beentry->st_xact_start_timestamp = tstamp; |
| beentry->st_changecount++; |
| Assert((beentry->st_changecount & 1) == 0); |
| } |
| |
| /* ---------- |
| * pgstat_report_waiting() - |
| * |
| * Called from lock manager to report beginning or end of a lock wait. |
| * |
| * NB: this *must* be able to survive being called before MyBEEntry has been |
| * initialized. |
| * ---------- |
| */ |
| void |
| pgstat_report_waiting(bool waiting) |
| { |
| volatile PgBackendStatus *beentry = MyBEEntry; |
| |
| if (!pgstat_track_activities || !beentry) |
| return; |
| |
| /* |
| * Since this is a single-byte field in a struct that only this process |
| * may modify, there seems no need to bother with the st_changecount |
| * protocol. The update must appear atomic in any case. |
| */ |
| beentry->st_waiting = waiting; |
| } |
| |
| /* ---------- |
| * pgstat_report_waiting_resource() - |
| * |
| * report beginning or end of a resource wait. |
| * |
| * NB: this *must* be able to survive being called before MyBEEntry has been |
| * initialized. |
| * ---------- |
| */ |
| void |
| pgstat_report_waiting_resource(bool waiting) |
| { |
| volatile PgBackendStatus *beentry = MyBEEntry; |
| |
| if (!pgstat_track_activities || !beentry) |
| return; |
| |
| /* |
| * Since this is a single-byte field in a struct that only this process |
| * may modify, there seems no need to bother with the st_changecount |
| * protocol. The update must appear atomic in any case. |
| */ |
| beentry->st_waiting_resource = waiting; |
| } |
| |
| /* ---------- |
| * pgstat_read_current_status() - |
| * |
| * Copy the current contents of the PgBackendStatus array to local memory, |
| * if not already done in this transaction. |
| * ---------- |
| */ |
| static void |
| pgstat_read_current_status(void) |
| { |
| volatile PgBackendStatus *beentry; |
| PgBackendStatus *localtable; |
| PgBackendStatus *localentry; |
| char *localappname; |
| char *localactivity; |
| int i; |
| |
| Assert(!pgStatRunningInCollector); |
| if (localBackendStatusTable) |
| return; /* already done */ |
| |
| pgstat_setup_memcxt(); |
| |
| localtable = (PgBackendStatus *) |
| MemoryContextAlloc(pgStatLocalContext, |
| sizeof(PgBackendStatus) * MaxBackends); |
| localappname = (char *) |
| MemoryContextAlloc(pgStatLocalContext, |
| NAMEDATALEN * MaxBackends); |
| localactivity = (char *) |
| MemoryContextAlloc(pgStatLocalContext, |
| pgstat_track_activity_query_size * MaxBackends); |
| localNumBackends = 0; |
| |
| beentry = BackendStatusArray; |
| localentry = localtable; |
| for (i = 1; i <= MaxBackends; i++) |
| { |
| /* |
| * Follow the protocol of retrying if st_changecount changes while we |
| * copy the entry, or if it's odd. (The check for odd is needed to |
| * cover the case where we are able to completely copy the entry while |
| * the source backend is between increment steps.) We use a volatile |
| * pointer here to ensure the compiler doesn't try to get cute. |
| */ |
| for (;;) |
| { |
| int save_changecount = beentry->st_changecount; |
| |
| localentry->st_procpid = beentry->st_procpid; |
| if (localentry->st_procpid > 0) |
| { |
| memcpy(localentry, (char *) beentry, sizeof(PgBackendStatus)); |
| |
| /* |
| * strcpy is safe even if the string is modified concurrently, |
| * because there's always a \0 at the end of the buffer. |
| */ |
| strcpy(localappname, (char *) beentry->st_appname); |
| localentry->st_appname = localappname; |
| strcpy(localactivity, (char *) beentry->st_activity); |
| localentry->st_activity = localactivity; |
| } |
| |
| if (save_changecount == beentry->st_changecount && |
| (save_changecount & 1) == 0) |
| break; |
| |
| /* Make sure we can break out of loop if stuck... */ |
| CHECK_FOR_INTERRUPTS(); |
| } |
| |
| beentry++; |
| /* Only valid entries get included into the local array */ |
| if (localentry->st_procpid > 0) |
| { |
| localentry++; |
| localappname += NAMEDATALEN; |
| localactivity += pgstat_track_activity_query_size; |
| localNumBackends++; |
| } |
| } |
| |
| /* Set the pointer only after completion of a valid table */ |
| localBackendStatusTable = localtable; |
| } |
| |
| |
| /* ---------- |
| * pgstat_get_backend_current_activity() - |
| * |
| * Return a string representing the current activity of the backend with |
| * the specified PID. This looks directly at the BackendStatusArray, |
| * and so will provide current information regardless of the age of our |
| * transaction's snapshot of the status array. |
| * |
| * It is the caller's responsibility to invoke this only for backends whose |
| * state is expected to remain stable while the result is in use. The |
| * only current use is in deadlock reporting, where we can expect that |
| * the target backend is blocked on a lock. (There are corner cases |
| * where the target's wait could get aborted while we are looking at it, |
| * but the very worst consequence is to return a pointer to a string |
| * that's been changed, so we won't worry too much.) |
| * |
| * Note: return strings for special cases match pg_stat_get_backend_activity. |
| * ---------- |
| */ |
| const char * |
| pgstat_get_backend_current_activity(int pid, bool checkUser) |
| { |
| PgBackendStatus *beentry; |
| int i; |
| |
| beentry = BackendStatusArray; |
| for (i = 1; i <= MaxBackends; i++) |
| { |
| /* |
| * Although we expect the target backend's entry to be stable, that |
| * doesn't imply that anyone else's is. To avoid identifying the |
| * wrong backend, while we check for a match to the desired PID we |
| * must follow the protocol of retrying if st_changecount changes |
| * while we examine the entry, or if it's odd. (This might be |
| * unnecessary, since fetching or storing an int is almost certainly |
| * atomic, but let's play it safe.) We use a volatile pointer here to |
| * ensure the compiler doesn't try to get cute. |
| */ |
| volatile PgBackendStatus *vbeentry = beentry; |
| bool found; |
| |
| for (;;) |
| { |
| int save_changecount = vbeentry->st_changecount; |
| |
| found = (vbeentry->st_procpid == pid); |
| |
| if (save_changecount == vbeentry->st_changecount && |
| (save_changecount & 1) == 0) |
| break; |
| |
| /* Make sure we can break out of loop if stuck... */ |
| CHECK_FOR_INTERRUPTS(); |
| } |
| |
| if (found) |
| { |
| /* Now it is safe to use the non-volatile pointer */ |
| if (checkUser && !superuser() && beentry->st_userid != GetUserId()) |
| return "<insufficient privilege>"; |
| else if (*(beentry->st_activity) == '\0') |
| return "<command string not enabled>"; |
| else |
| return beentry->st_activity; |
| } |
| |
| beentry++; |
| } |
| |
| /* If we get here, caller is in error ... */ |
| return "<backend information not available>"; |
| } |
| |
| |
| /* ------------------------------------------------------------ |
| * Local support functions follow |
| * ------------------------------------------------------------ |
| */ |
| |
| |
| /* ---------- |
| * pgstat_setheader() - |
| * |
| * Set common header fields in a statistics message |
| * ---------- |
| */ |
| static void |
| pgstat_setheader(PgStat_MsgHdr *hdr, StatMsgType mtype) |
| { |
| hdr->m_type = mtype; |
| } |
| |
| |
| /* ---------- |
| * pgstat_send() - |
| * |
| * Send out one statistics message to the collector |
| * ---------- |
| */ |
| static void |
| pgstat_send(void *msg, int len) |
| { |
| int rc; |
| |
| if (pgStatSock == PGINVALID_SOCKET) |
| return; |
| |
| ((PgStat_MsgHdr *) msg)->m_size = len; |
| |
| /* We'll retry after EINTR, but ignore all other failures */ |
| do |
| { |
| rc = send(pgStatSock, msg, len, 0); |
| } while (rc < 0 && errno == EINTR); |
| |
| #ifdef USE_ASSERT_CHECKING |
| /* In debug builds, log send failures ... */ |
| if (rc < 0) |
| elog(LOG, "could not send to statistics collector: %m"); |
| #endif |
| } |
| |
| /* ---------- |
| * pgstat_send_bgwriter() - |
| * |
| * Send bgwriter statistics to the collector |
| * ---------- |
| */ |
| void |
| pgstat_send_bgwriter(void) |
| { |
| /* We assume this initializes to zeroes */ |
| static const PgStat_MsgBgWriter all_zeroes; |
| |
| /* |
| * This function can be called even if nothing at all has happened. In |
| * this case, avoid sending a completely empty message to the stats |
| * collector. |
| */ |
| if (memcmp(&BgWriterStats, &all_zeroes, sizeof(PgStat_MsgBgWriter)) == 0) |
| return; |
| |
| /* |
| * Prepare and send the message |
| */ |
| pgstat_setheader(&BgWriterStats.m_hdr, PGSTAT_MTYPE_BGWRITER); |
| pgstat_send(&BgWriterStats, sizeof(BgWriterStats)); |
| |
| /* |
| * Clear out the statistics buffer, so it can be re-used. |
| */ |
| MemSet(&BgWriterStats, 0, sizeof(BgWriterStats)); |
| } |
| |
| |
| /* ---------- |
| * PgstatCollectorMain() - |
| * |
| * Start up the statistics collector process. This is the body of the |
| * postmaster child process. |
| * |
| * The argc/argv parameters are valid only in EXEC_BACKEND case. |
| * ---------- |
| */ |
| NON_EXEC_STATIC void |
| PgstatCollectorMain(int argc, char *argv[]) |
| { |
| int len; |
| PgStat_Msg msg; |
| |
| #ifndef WIN32 |
| #ifdef HAVE_POLL |
| struct pollfd input_fd; |
| #else |
| struct timeval sel_timeout; |
| fd_set rfds; |
| #endif |
| #endif |
| |
| IsUnderPostmaster = true; /* we are a postmaster subprocess now */ |
| |
| MyProcPid = getpid(); /* reset MyProcPid */ |
| |
| MyStartTime = time(NULL); /* record Start Time for logging */ |
| |
| /* |
| * If possible, make this process a group leader, so that the postmaster |
| * can signal any child processes too. (pgstat probably never has any |
| * child processes, but for consistency we make all postmaster child |
| * processes do this.) |
| */ |
| #ifdef HAVE_SETSID |
| if (setsid() < 0) |
| elog(FATAL, "setsid() failed: %m"); |
| #endif |
| |
| /* |
| * Ignore all signals usually bound to some action in the postmaster, |
| * except SIGQUIT. |
| */ |
| pqsignal(SIGHUP, pgstat_sighup_handler); |
| pqsignal(SIGINT, SIG_IGN); |
| pqsignal(SIGTERM, SIG_IGN); |
| pqsignal(SIGQUIT, pgstat_exit); |
| pqsignal(SIGALRM, SIG_IGN); |
| pqsignal(SIGPIPE, SIG_IGN); |
| pqsignal(SIGUSR1, SIG_IGN); |
| pqsignal(SIGUSR2, SIG_IGN); |
| pqsignal(SIGCHLD, SIG_DFL); |
| pqsignal(SIGTTIN, SIG_DFL); |
| pqsignal(SIGTTOU, SIG_DFL); |
| pqsignal(SIGCONT, SIG_DFL); |
| pqsignal(SIGWINCH, SIG_DFL); |
| PG_SETMASK(&UnBlockSig); |
| |
| /* |
| * Identify myself via ps |
| */ |
| init_ps_display("stats collector process", "", "", ""); |
| |
| /* |
| * Arrange to write the initial status file right away |
| */ |
| last_statrequest = GetCurrentTimestamp(); |
| last_statwrite = last_statrequest - 1; |
| |
| /* |
| * Read in an existing statistics stats file or initialize the stats to |
| * zero. |
| */ |
| pgStatRunningInCollector = true; |
| pgStatDBHash = pgstat_read_statsfile(InvalidOid, true); |
| |
| /* |
| * Setup the descriptor set for select(2). Since only one bit in the set |
| * ever changes, we need not repeat FD_ZERO each time. |
| */ |
| #if !defined(HAVE_POLL) && !defined(WIN32) |
| FD_ZERO(&rfds); |
| #endif |
| |
| /* |
| * Loop to process messages until we get SIGQUIT or detect ungraceful |
| * death of our parent postmaster. |
| * |
| * For performance reasons, we don't want to do a PostmasterIsAlive() test |
| * after every message; instead, do it only when select()/poll() is |
| * interrupted by timeout. In essence, we'll stay alive as long as |
| * backends keep sending us stuff often, even if the postmaster is gone. |
| */ |
| for (;;) |
| { |
| int got_data; |
| |
| /* |
| * Quit if we get SIGQUIT from the postmaster. |
| */ |
| if (need_exit) |
| break; |
| |
| /* |
| * Reload configuration if we got SIGHUP from the postmaster. |
| */ |
| if (got_SIGHUP) |
| { |
| ProcessConfigFile(PGC_SIGHUP); |
| got_SIGHUP = false; |
| } |
| |
| /* |
| * Write the stats file if a new request has arrived that is not |
| * satisfied by existing file. |
| */ |
| if (last_statwrite < last_statrequest) |
| pgstat_write_statsfile(false); |
| |
| /* |
| * Wait for a message to arrive; but not for more than |
| * PGSTAT_SELECT_TIMEOUT seconds. (This determines how quickly we will |
| * shut down after an ungraceful postmaster termination; so it needn't |
| * be very fast. However, on some systems SIGQUIT won't interrupt the |
| * poll/select call, so this also limits speed of response to SIGQUIT, |
| * which is more important.) |
| * |
| * We use poll(2) if available, otherwise select(2). Win32 has its own |
| * implementation. |
| */ |
| #ifndef WIN32 |
| #ifdef HAVE_POLL |
| input_fd.fd = pgStatSock; |
| input_fd.events = POLLIN | POLLERR; |
| input_fd.revents = 0; |
| |
| if (poll(&input_fd, 1, PGSTAT_SELECT_TIMEOUT * 1000) < 0) |
| { |
| if (errno == EINTR) |
| continue; |
| ereport(ERROR, |
| (errcode_for_socket_access(), |
| errmsg("poll() failed in statistics collector: %m"))); |
| } |
| |
| got_data = (input_fd.revents != 0); |
| #else /* !HAVE_POLL */ |
| |
| FD_SET(pgStatSock, &rfds); |
| |
| /* |
| * timeout struct is modified by select() on some operating systems, |
| * so re-fill it each time. |
| */ |
| sel_timeout.tv_sec = PGSTAT_SELECT_TIMEOUT; |
| sel_timeout.tv_usec = 0; |
| |
| if (select(pgStatSock + 1, &rfds, NULL, NULL, &sel_timeout) < 0) |
| { |
| if (errno == EINTR) |
| continue; |
| ereport(ERROR, |
| (errcode_for_socket_access(), |
| errmsg("select() failed in statistics collector: %m"))); |
| } |
| |
| got_data = FD_ISSET(pgStatSock, &rfds); |
| #endif /* HAVE_POLL */ |
| #else /* WIN32 */ |
| got_data = pgwin32_waitforsinglesocket(pgStatSock, FD_READ, |
| PGSTAT_SELECT_TIMEOUT*1000); |
| #endif |
| |
| /* |
| * If there is a message on the socket, read it and check for |
| * validity. |
| */ |
| if (got_data) |
| { |
| len = recv(pgStatSock, (char *) &msg, |
| sizeof(PgStat_Msg), 0); |
| if (len < 0) |
| { |
| if (errno == EINTR) |
| continue; |
| ereport(ERROR, |
| (errcode_for_socket_access(), |
| errmsg("could not read statistics message: %m"))); |
| } |
| |
| /* |
| * We ignore messages that are smaller than our common header |
| */ |
| if (len < sizeof(PgStat_MsgHdr)) |
| continue; |
| |
| /* |
| * The received length must match the length in the header |
| */ |
| if (msg.msg_hdr.m_size != len) |
| continue; |
| |
| /* |
| * O.K. - we accept this message. Process it. |
| */ |
| switch (msg.msg_hdr.m_type) |
| { |
| case PGSTAT_MTYPE_DUMMY: |
| break; |
| |
| case PGSTAT_MTYPE_INQUIRY: |
| pgstat_recv_inquiry((PgStat_MsgInquiry *) &msg, len); |
| break; |
| |
| case PGSTAT_MTYPE_TABSTAT: |
| pgstat_recv_tabstat((PgStat_MsgTabstat *) &msg, len); |
| break; |
| |
| case PGSTAT_MTYPE_TABPURGE: |
| pgstat_recv_tabpurge((PgStat_MsgTabpurge *) &msg, len); |
| break; |
| |
| case PGSTAT_MTYPE_DROPDB: |
| pgstat_recv_dropdb((PgStat_MsgDropdb *) &msg, len); |
| break; |
| |
| case PGSTAT_MTYPE_RESETCOUNTER: |
| pgstat_recv_resetcounter((PgStat_MsgResetcounter *) &msg, |
| len); |
| break; |
| |
| case PGSTAT_MTYPE_AUTOVAC_START: |
| pgstat_recv_autovac((PgStat_MsgAutovacStart *) &msg, len); |
| break; |
| |
| case PGSTAT_MTYPE_VACUUM: |
| pgstat_recv_vacuum((PgStat_MsgVacuum *) &msg, len); |
| break; |
| |
| case PGSTAT_MTYPE_ANALYZE: |
| pgstat_recv_analyze((PgStat_MsgAnalyze *) &msg, len); |
| break; |
| |
| case PGSTAT_MTYPE_BGWRITER: |
| pgstat_recv_bgwriter((PgStat_MsgBgWriter *) &msg, len); |
| break; |
| |
| case PGSTAT_MTYPE_QUEUESTAT: /* GPDB */ |
| pgstat_recv_queuestat((PgStat_MsgQueuestat *) &msg, len); |
| break; |
| |
| case PGSTAT_MTYPE_FUNCSTAT: |
| pgstat_recv_funcstat((PgStat_MsgFuncstat *) &msg, len); |
| break; |
| |
| case PGSTAT_MTYPE_FUNCPURGE: |
| pgstat_recv_funcpurge((PgStat_MsgFuncpurge *) &msg, len); |
| break; |
| |
| default: |
| break; |
| } |
| } |
| else |
| { |
| /* |
| * We can only get here if the select/poll timeout elapsed. Check |
| * for postmaster death. |
| */ |
| if (!PostmasterIsAlive(true)) |
| break; |
| } |
| } /* end of message-processing loop */ |
| |
| /* |
| * Save the final stats to reuse at next startup. |
| */ |
| pgstat_write_statsfile(true); |
| |
| exit(0); |
| } |
| |
| |
| /* SIGQUIT signal handler for collector process */ |
| static void |
| pgstat_exit(SIGNAL_ARGS) |
| { |
| need_exit = true; |
| } |
| |
| /* SIGHUP handler for collector process */ |
| static void |
| pgstat_sighup_handler(SIGNAL_ARGS) |
| { |
| got_SIGHUP = true; |
| } |
| |
| |
| /* |
| * Lookup the hash table entry for the specified database. If no hash |
| * table entry exists, initialize it, if the create parameter is true. |
| * Else, return NULL. |
| */ |
| static PgStat_StatDBEntry * |
| pgstat_get_db_entry(Oid databaseid, bool create) |
| { |
| PgStat_StatDBEntry *result; |
| bool found; |
| HASHACTION action = (create ? HASH_ENTER : HASH_FIND); |
| |
| /* Lookup or create the hash table entry for this database */ |
| result = (PgStat_StatDBEntry *) hash_search(pgStatDBHash, |
| &databaseid, |
| action, &found); |
| |
| if (!create && !found) |
| return NULL; |
| |
| /* If not found, initialize the new one. */ |
| if (!found) |
| { |
| HASHCTL hash_ctl; |
| |
| result->tables = NULL; |
| result->functions = NULL; |
| result->n_xact_commit = 0; |
| result->n_xact_rollback = 0; |
| result->n_blocks_fetched = 0; |
| result->n_blocks_hit = 0; |
| result->n_tuples_returned = 0; |
| result->n_tuples_fetched = 0; |
| result->n_tuples_inserted = 0; |
| result->n_tuples_updated = 0; |
| result->n_tuples_deleted = 0; |
| result->last_autovac_time = 0; |
| |
| memset(&hash_ctl, 0, sizeof(hash_ctl)); |
| hash_ctl.keysize = sizeof(Oid); |
| hash_ctl.entrysize = sizeof(PgStat_StatTabEntry); |
| hash_ctl.hash = oid_hash; |
| result->tables = hash_create("Per-database table", |
| PGSTAT_TAB_HASH_SIZE, |
| &hash_ctl, |
| HASH_ELEM | HASH_FUNCTION); |
| |
| hash_ctl.keysize = sizeof(Oid); |
| hash_ctl.entrysize = sizeof(PgStat_StatFuncEntry); |
| hash_ctl.hash = oid_hash; |
| result->functions = hash_create("Per-database function", |
| PGSTAT_FUNCTION_HASH_SIZE, |
| &hash_ctl, |
| HASH_ELEM | HASH_FUNCTION); |
| } |
| |
| return result; |
| } |
| |
| |
| /* ---------- |
| * pgstat_write_statsfile() - |
| * |
| * Tell the news. |
| * If writing to the permanent file (happens when the collector is |
| * shutting down only), remove the temporary file so that backends |
| * starting up under a new postmaster can't read the old data before |
| * the new collector is ready. |
| * ---------- |
| */ |
| static void |
| pgstat_write_statsfile(bool permanent) |
| { |
| HASH_SEQ_STATUS hstat; |
| HASH_SEQ_STATUS tstat; |
| HASH_SEQ_STATUS fstat; |
| PgStat_StatDBEntry *dbentry; |
| PgStat_StatTabEntry *tabentry; |
| PgStat_StatFuncEntry *funcentry; |
| FILE *fpout; |
| int32 format_id; |
| const char *tmpfile = permanent ? PGSTAT_STAT_PERMANENT_TMPFILE : pgstat_stat_tmpname; |
| const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_FILENAME : pgstat_stat_filename; |
| |
| /* |
| * Open the statistics temp file to write out the current values. |
| */ |
| fpout = AllocateFile(tmpfile, PG_BINARY_W); |
| if (fpout == NULL) |
| { |
| ereport(LOG, |
| (errcode_for_file_access(), |
| errmsg("could not open temporary statistics file \"%s\": %m", |
| tmpfile))); |
| return; |
| } |
| |
| /* |
| * Set the timestamp of the stats file. |
| */ |
| globalStats.stats_timestamp = GetCurrentTimestamp(); |
| |
| /* |
| * Write the file header --- currently just a format ID. |
| */ |
| format_id = PGSTAT_FILE_FORMAT_ID; |
| fwrite(&format_id, sizeof(format_id), 1, fpout); |
| |
| /* |
| * Write global stats struct |
| */ |
| fwrite(&globalStats, sizeof(globalStats), 1, fpout); |
| |
| /* |
| * Walk through the database table. |
| */ |
| hash_seq_init(&hstat, pgStatDBHash); |
| while ((dbentry = (PgStat_StatDBEntry *) hash_seq_search(&hstat)) != NULL) |
| { |
| /* |
| * Write out the DB entry including the number of live backends. We |
| * don't write the tables or functions pointers, since they're of no |
| * use to any other process. |
| */ |
| fputc('D', fpout); |
| fwrite(dbentry, offsetof(PgStat_StatDBEntry, tables), 1, fpout); |
| |
| /* |
| * Walk through the database's access stats per table. |
| */ |
| hash_seq_init(&tstat, dbentry->tables); |
| while ((tabentry = (PgStat_StatTabEntry *) hash_seq_search(&tstat)) != NULL) |
| { |
| fputc('T', fpout); |
| fwrite(tabentry, sizeof(PgStat_StatTabEntry), 1, fpout); |
| } |
| |
| /* |
| * Walk through the database's function stats table. |
| */ |
| hash_seq_init(&fstat, dbentry->functions); |
| while ((funcentry = (PgStat_StatFuncEntry *) hash_seq_search(&fstat)) != NULL) |
| { |
| fputc('F', fpout); |
| fwrite(funcentry, sizeof(PgStat_StatFuncEntry), 1, fpout); |
| } |
| |
| /* |
| * Mark the end of this DB |
| */ |
| fputc('d', fpout); |
| } |
| |
| /* |
| * No more output to be done. Close the temp file and replace the old |
| * pgstat.stat with it. The ferror() check replaces testing for error |
| * after each individual fputc or fwrite above. |
| */ |
| fputc('E', fpout); |
| |
| if (ferror(fpout)) |
| { |
| ereport(LOG, |
| (errcode_for_file_access(), |
| errmsg("could not write temporary statistics file \"%s\": %m", |
| tmpfile))); |
| FreeFile(fpout); |
| unlink(tmpfile); |
| } |
| else if (FreeFile(fpout) < 0) |
| { |
| ereport(LOG, |
| (errcode_for_file_access(), |
| errmsg("could not close temporary statistics file \"%s\": %m", |
| tmpfile))); |
| unlink(tmpfile); |
| } |
| else if (rename(tmpfile, statfile) < 0) |
| { |
| ereport(LOG, |
| (errcode_for_file_access(), |
| errmsg("could not rename temporary statistics file \"%s\" to \"%s\": %m", |
| tmpfile, statfile))); |
| unlink(tmpfile); |
| } |
| else |
| { |
| /* |
| * Successful write, so update last_statwrite. |
| */ |
| last_statwrite = globalStats.stats_timestamp; |
| |
| /* |
| * It's not entirely clear whether there could be clock skew between |
| * backends and the collector; but just in case someone manages to |
| * send us a stats request time that's far in the future, reset it. |
| * This ensures that no inquiry message can cause more than one stats |
| * file write to occur. |
| */ |
| last_statrequest = last_statwrite; |
| } |
| |
| if (permanent) |
| unlink(pgstat_stat_filename); |
| } |
| |
| |
| /* ---------- |
| * pgstat_read_statsfile() - |
| * |
| * Reads in an existing statistics collector file and initializes the |
| * databases' hash table (whose entries point to the tables' hash tables). |
| * ---------- |
| */ |
| static HTAB * |
| pgstat_read_statsfile(Oid onlydb, bool permanent) |
| { |
| PgStat_StatDBEntry *dbentry; |
| PgStat_StatDBEntry dbbuf; |
| PgStat_StatTabEntry *tabentry; |
| PgStat_StatTabEntry tabbuf; |
| PgStat_StatFuncEntry funcbuf; |
| PgStat_StatFuncEntry *funcentry; |
| PgStat_StatQueueEntry queuebuf; /* GPDB */ |
| PgStat_StatQueueEntry *queueentry; /* GPDB */ |
| HASHCTL hash_ctl; |
| HTAB *dbhash; |
| HTAB *tabhash = NULL; |
| HTAB *funchash = NULL; |
| HTAB *queuehash = NULL; /* GPDB */ |
| FILE *fpin; |
| int32 format_id; |
| bool found; |
| const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_FILENAME : pgstat_stat_filename; |
| |
| /* |
| * The tables will live in pgStatLocalContext. |
| */ |
| pgstat_setup_memcxt(); |
| |
| /* |
| * Create the DB hashtable |
| */ |
| memset(&hash_ctl, 0, sizeof(hash_ctl)); |
| hash_ctl.keysize = sizeof(Oid); |
| hash_ctl.entrysize = sizeof(PgStat_StatDBEntry); |
| hash_ctl.hash = oid_hash; |
| hash_ctl.hcxt = pgStatLocalContext; |
| dbhash = hash_create("Databases hash", PGSTAT_DB_HASH_SIZE, &hash_ctl, |
| HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT); |
| |
| /* |
| * Clear out global statistics so they start from zero in case we can't |
| * load an existing statsfile. |
| */ |
| memset(&globalStats, 0, sizeof(globalStats)); |
| |
| |
| /** |
| ** Create the Queue hashtable |
| **/ |
| memset(&hash_ctl, 0, sizeof(hash_ctl)); |
| hash_ctl.keysize = sizeof(Oid); |
| hash_ctl.entrysize = sizeof(PgStat_StatQueueEntry); |
| hash_ctl.hash = oid_hash; |
| hash_ctl.hcxt = pgStatLocalContext; |
| queuehash = hash_create("Queues hash", PGSTAT_QUEUE_HASH_SIZE, &hash_ctl, |
| HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT); |
| pgStatQueueHash = queuehash; |
| |
| /* |
| * Clear out global statistics so they start from zero in case we can't |
| * load an existing statsfile. |
| */ |
| memset(&globalStats, 0, sizeof(globalStats)); |
| |
| /* |
| * Try to open the status file. If it doesn't exist, the backends simply |
| * return zero for anything and the collector simply starts from scratch |
| * with empty counters. |
| */ |
| if ((fpin = AllocateFile(statfile, PG_BINARY_R)) == NULL) |
| return dbhash; |
| |
| /* |
| * Try to open the status file. If it doesn't exist, the backends simply |
| * return zero for anything and the collector simply starts from scratch |
| * with empty counters. |
| */ |
| if ((fpin = AllocateFile(statfile, PG_BINARY_R)) == NULL) |
| return dbhash; |
| |
| /* |
| * Verify it's of the expected format. |
| */ |
| if (fread(&format_id, 1, sizeof(format_id), fpin) != sizeof(format_id) |
| || format_id != PGSTAT_FILE_FORMAT_ID) |
| { |
| ereport(pgStatRunningInCollector ? LOG : WARNING, |
| (errmsg("corrupted pgstat.stat file"))); |
| goto done; |
| } |
| |
| /* |
| * Read global stats struct |
| */ |
| if (fread(&globalStats, 1, sizeof(globalStats), fpin) != sizeof(globalStats)) |
| { |
| ereport(pgStatRunningInCollector ? LOG : WARNING, |
| (errmsg("corrupted pgstat.stat file"))); |
| goto done; |
| } |
| |
| /* |
| * We found an existing collector stats file. Read it and put all the |
| * hashtable entries into place. |
| */ |
| for (;;) |
| { |
| switch (fgetc(fpin)) |
| { |
| /* |
| * 'D' A PgStat_StatDBEntry struct describing a database |
| * follows. Subsequently, zero to many 'T' and 'F' entries |
| * will follow until a 'd' is encountered. |
| */ |
| case 'D': |
| if (fread(&dbbuf, 1, offsetof(PgStat_StatDBEntry, tables), |
| fpin) != offsetof(PgStat_StatDBEntry, tables)) |
| { |
| ereport(pgStatRunningInCollector ? LOG : WARNING, |
| (errmsg("corrupted pgstat.stat file"))); |
| goto done; |
| } |
| |
| /* |
| * Add to the DB hash |
| */ |
| dbentry = (PgStat_StatDBEntry *) hash_search(dbhash, |
| (void *) &dbbuf.databaseid, |
| HASH_ENTER, |
| &found); |
| if (found) |
| { |
| ereport(pgStatRunningInCollector ? LOG : WARNING, |
| (errmsg("corrupted pgstat.stat file"))); |
| goto done; |
| } |
| |
| memcpy(dbentry, &dbbuf, sizeof(PgStat_StatDBEntry)); |
| dbentry->tables = NULL; |
| dbentry->functions = NULL; |
| |
| /* |
| * Don't collect tables if not the requested DB (or the |
| * shared-table info) |
| */ |
| if (onlydb != InvalidOid) |
| { |
| if (dbbuf.databaseid != onlydb && |
| dbbuf.databaseid != InvalidOid) |
| break; |
| } |
| |
| memset(&hash_ctl, 0, sizeof(hash_ctl)); |
| hash_ctl.keysize = sizeof(Oid); |
| hash_ctl.entrysize = sizeof(PgStat_StatTabEntry); |
| hash_ctl.hash = oid_hash; |
| hash_ctl.hcxt = pgStatLocalContext; |
| dbentry->tables = hash_create("Per-database table", |
| PGSTAT_TAB_HASH_SIZE, |
| &hash_ctl, |
| HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT); |
| |
| memset(&hash_ctl, 0, sizeof(hash_ctl)); |
| hash_ctl.keysize = sizeof(Oid); |
| hash_ctl.entrysize = sizeof(PgStat_StatFuncEntry); |
| hash_ctl.hash = oid_hash; |
| hash_ctl.hcxt = pgStatLocalContext; |
| dbentry->functions = hash_create("Per-database function", |
| PGSTAT_FUNCTION_HASH_SIZE, |
| &hash_ctl, |
| HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT); |
| |
| /* |
| * Arrange that following records add entries to this |
| * database's hash tables. |
| */ |
| tabhash = dbentry->tables; |
| funchash = dbentry->functions; |
| break; |
| |
| /* |
| * 'd' End of this database. |
| */ |
| case 'd': |
| tabhash = NULL; |
| funchash = NULL; |
| break; |
| |
| /* |
| * 'T' A PgStat_StatTabEntry follows. |
| */ |
| case 'T': |
| if (fread(&tabbuf, 1, sizeof(PgStat_StatTabEntry), |
| fpin) != sizeof(PgStat_StatTabEntry)) |
| { |
| ereport(pgStatRunningInCollector ? LOG : WARNING, |
| (errmsg("corrupted pgstat.stat file"))); |
| goto done; |
| } |
| |
| /* |
| * Skip if table belongs to a not requested database. |
| */ |
| if (tabhash == NULL) |
| break; |
| |
| tabentry = (PgStat_StatTabEntry *) hash_search(tabhash, |
| (void *) &tabbuf.tableid, |
| HASH_ENTER, &found); |
| |
| if (found) |
| { |
| ereport(pgStatRunningInCollector ? LOG : WARNING, |
| (errmsg("corrupted pgstat.stat file"))); |
| goto done; |
| } |
| |
| memcpy(tabentry, &tabbuf, sizeof(tabbuf)); |
| break; |
| |
| /* |
| * 'F' A PgStat_StatFuncEntry follows. |
| */ |
| case 'F': |
| if (fread(&funcbuf, 1, sizeof(PgStat_StatFuncEntry), |
| fpin) != sizeof(PgStat_StatFuncEntry)) |
| { |
| ereport(pgStatRunningInCollector ? LOG : WARNING, |
| (errmsg("corrupted pgstat.stat file"))); |
| goto done; |
| } |
| |
| /* |
| * Skip if function belongs to a not requested database. |
| */ |
| if (funchash == NULL) |
| break; |
| |
| funcentry = (PgStat_StatFuncEntry *) hash_search(funchash, |
| (void *) &funcbuf.functionid, |
| HASH_ENTER, &found); |
| |
| if (found) |
| { |
| ereport(pgStatRunningInCollector ? LOG : WARNING, |
| (errmsg("corrupted pgstat.stat file"))); |
| goto done; |
| } |
| |
| memcpy(funcentry, &funcbuf, sizeof(funcbuf)); |
| break; |
| /* |
| * 'Q' A PgStat_StatQueueEntry follows. (GPDB) |
| */ |
| case 'Q': |
| if (fread(&queuebuf, 1, sizeof(PgStat_StatQueueEntry), |
| fpin) != sizeof(PgStat_StatQueueEntry)) |
| { |
| ereport(pgStatRunningInCollector ? LOG : WARNING, |
| (errmsg("corrupted pgstat.stat file"))); |
| goto done; |
| } |
| |
| if (queuehash == NULL) |
| break; |
| |
| /* |
| * Add it to the queue hash. |
| */ |
| queueentry = (PgStat_StatQueueEntry *) hash_search(queuehash, |
| (void *) &queuebuf.queueid, |
| HASH_ENTER, &found); |
| |
| if (found) |
| { |
| ereport(pgStatRunningInCollector ? LOG : WARNING, |
| (errmsg("corrupted pgstat.stat file"))); |
| goto done; |
| } |
| |
| memcpy(queueentry, &queuebuf, sizeof(PgStat_StatQueueEntry)); |
| break; |
| |
| /* |
| * 'E' The EOF marker of a complete stats file. |
| */ |
| case 'E': |
| goto done; |
| |
| default: |
| ereport(pgStatRunningInCollector ? LOG : WARNING, |
| (errmsg("corrupted pgstat.stat file"))); |
| goto done; |
| } |
| } |
| |
| done: |
| FreeFile(fpin); |
| |
| if (permanent) |
| unlink(PGSTAT_STAT_PERMANENT_FILENAME); |
| |
| return dbhash; |
| } |
| |
| /* ---------- |
| * pgstat_read_statsfile_timestamp() - |
| * |
| * Attempt to fetch the timestamp of an existing stats file. |
| * Returns TRUE if successful (timestamp is stored at *ts). |
| * ---------- |
| */ |
| static bool |
| pgstat_read_statsfile_timestamp(bool permanent, TimestampTz *ts) |
| { |
| PgStat_GlobalStats myGlobalStats; |
| FILE *fpin; |
| int32 format_id; |
| const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_FILENAME : pgstat_stat_filename; |
| |
| /* |
| * Try to open the status file. |
| */ |
| if ((fpin = AllocateFile(statfile, PG_BINARY_R)) == NULL) |
| return false; |
| |
| /* |
| * Verify it's of the expected format. |
| */ |
| if (fread(&format_id, 1, sizeof(format_id), fpin) != sizeof(format_id) |
| || format_id != PGSTAT_FILE_FORMAT_ID) |
| { |
| FreeFile(fpin); |
| return false; |
| } |
| |
| /* |
| * Read global stats struct |
| */ |
| if (fread(&myGlobalStats, 1, sizeof(myGlobalStats), fpin) != sizeof(myGlobalStats)) |
| { |
| FreeFile(fpin); |
| return false; |
| } |
| |
| *ts = myGlobalStats.stats_timestamp; |
| |
| FreeFile(fpin); |
| return true; |
| } |
| |
| /* |
| * If not already done, read the statistics collector stats file into |
| * some hash tables. The results will be kept until pgstat_clear_snapshot() |
| * is called (typically, at end of transaction). |
| */ |
| static void |
| backend_read_statsfile(void) |
| { |
| TimestampTz min_ts; |
| int count; |
| |
| /* already read it? */ |
| if (pgStatDBHash) |
| return; |
| Assert(!pgStatRunningInCollector); |
| |
| /* |
| * We set the minimum acceptable timestamp to PGSTAT_STAT_INTERVAL msec |
| * before now. This indirectly ensures that the collector needn't write |
| * the file more often than PGSTAT_STAT_INTERVAL. In an autovacuum |
| * worker, however, we want a lower delay to avoid using stale data, so we |
| * use PGSTAT_RETRY_DELAY (since the number of worker is low, this |
| * shouldn't be a problem). |
| * |
| * Note that we don't recompute min_ts after sleeping; so we might end up |
| * accepting a file a bit older than PGSTAT_STAT_INTERVAL. In practice |
| * that shouldn't happen, though, as long as the sleep time is less than |
| * PGSTAT_STAT_INTERVAL; and we don't want to lie to the collector about |
| * what our cutoff time really is. |
| */ |
| //if (IsAutoVacuumWorkerProcess()) |
| if (IsAutoVacuumProcess()) |
| min_ts = TimestampTzPlusMilliseconds(GetCurrentTimestamp(), |
| -PGSTAT_RETRY_DELAY); |
| else |
| min_ts = TimestampTzPlusMilliseconds(GetCurrentTimestamp(), |
| -PGSTAT_STAT_INTERVAL); |
| |
| /* |
| * Loop until fresh enough stats file is available or we ran out of time. |
| * The stats inquiry message is sent repeatedly in case collector drops |
| * it. |
| */ |
| for (count = 0; count < PGSTAT_POLL_LOOP_COUNT; count++) |
| { |
| TimestampTz file_ts = 0; |
| |
| CHECK_FOR_INTERRUPTS(); |
| |
| if (pgstat_read_statsfile_timestamp(false, &file_ts) && |
| file_ts >= min_ts) |
| break; |
| |
| /* Not there or too old, so kick the collector and wait a bit */ |
| pgstat_send_inquiry(min_ts); |
| pg_usleep(PGSTAT_RETRY_DELAY * 1000L); |
| } |
| |
| if (count >= PGSTAT_POLL_LOOP_COUNT) |
| elog(WARNING, "pgstat wait timeout"); |
| |
| /* Autovacuum launcher wants stats about all databases */ |
| //if (IsAutoVacuumLauncherProcess()) |
| pgStatDBHash = pgstat_read_statsfile(InvalidOid, false); |
| //else |
| // pgStatDBHash = pgstat_read_statsfile(MyDatabaseId, false); |
| } |
| |
| |
| /* ---------- |
| * pgstat_setup_memcxt() - |
| * |
| * Create pgStatLocalContext, if not already done. |
| * ---------- |
| */ |
| static void |
| pgstat_setup_memcxt(void) |
| { |
| if (!pgStatLocalContext) |
| pgStatLocalContext = AllocSetContextCreate(TopMemoryContext, |
| "Statistics snapshot", |
| ALLOCSET_SMALL_MINSIZE, |
| ALLOCSET_SMALL_INITSIZE, |
| ALLOCSET_SMALL_MAXSIZE); |
| } |
| |
| |
| /* ---------- |
| * pgstat_clear_snapshot() - |
| * |
| * Discard any data collected in the current transaction. Any subsequent |
| * request will cause new snapshots to be read. |
| * |
| * This is also invoked during transaction commit or abort to discard |
| * the no-longer-wanted snapshot. |
| * ---------- |
| */ |
| void |
| pgstat_clear_snapshot(void) |
| { |
| /* Release memory, if any was allocated */ |
| if (pgStatLocalContext) |
| MemoryContextDelete(pgStatLocalContext); |
| |
| /* Reset variables */ |
| pgStatLocalContext = NULL; |
| pgStatDBHash = NULL; |
| localBackendStatusTable = NULL; |
| localNumBackends = 0; |
| } |
| |
| |
| /* ---------- |
| * pgstat_recv_inquiry() - |
| * |
| * Process stat inquiry requests. |
| * ---------- |
| */ |
| static void |
| pgstat_recv_inquiry(PgStat_MsgInquiry *msg, int len) |
| { |
| if (msg->inquiry_time > last_statrequest) |
| last_statrequest = msg->inquiry_time; |
| } |
| |
| |
| /* ---------- |
| * pgstat_recv_tabstat() - |
| * |
| * Count what the backend has done. |
| * ---------- |
| */ |
| static void |
| pgstat_recv_tabstat(PgStat_MsgTabstat *msg, int len) |
| { |
| PgStat_TableEntry *tabmsg = &(msg->m_entry[0]); |
| PgStat_StatDBEntry *dbentry; |
| PgStat_StatTabEntry *tabentry; |
| int i; |
| bool found; |
| |
| dbentry = pgstat_get_db_entry(msg->m_databaseid, true); |
| |
| /* |
| * Update database-wide stats. |
| */ |
| dbentry->n_xact_commit += (PgStat_Counter) (msg->m_xact_commit); |
| dbentry->n_xact_rollback += (PgStat_Counter) (msg->m_xact_rollback); |
| |
| /* |
| * Process all table entries in the message. |
| */ |
| for (i = 0; i < msg->m_nentries; i++) |
| { |
| tabentry = (PgStat_StatTabEntry *) hash_search(dbentry->tables, |
| (void *) &(tabmsg[i].t_id), |
| HASH_ENTER, &found); |
| |
| if (!found) |
| { |
| /* |
| * If it's a new table entry, initialize counters to the values we |
| * just got. |
| */ |
| tabentry->numscans = tabmsg[i].t_counts.t_numscans; |
| tabentry->tuples_returned = tabmsg[i].t_counts.t_tuples_returned; |
| tabentry->tuples_fetched = tabmsg[i].t_counts.t_tuples_fetched; |
| tabentry->tuples_inserted = tabmsg[i].t_counts.t_tuples_inserted; |
| tabentry->tuples_updated = tabmsg[i].t_counts.t_tuples_updated; |
| tabentry->tuples_deleted = tabmsg[i].t_counts.t_tuples_deleted; |
| tabentry->tuples_hot_updated = tabmsg[i].t_counts.t_tuples_hot_updated; |
| tabentry->n_live_tuples = tabmsg[i].t_counts.t_new_live_tuples; |
| tabentry->n_dead_tuples = tabmsg[i].t_counts.t_new_dead_tuples; |
| tabentry->blocks_fetched = tabmsg[i].t_counts.t_blocks_fetched; |
| tabentry->blocks_hit = tabmsg[i].t_counts.t_blocks_hit; |
| |
| tabentry->last_anl_tuples = 0; |
| tabentry->vacuum_timestamp = 0; |
| tabentry->autovac_vacuum_timestamp = 0; |
| tabentry->analyze_timestamp = 0; |
| tabentry->autovac_analyze_timestamp = 0; |
| } |
| else |
| { |
| /* |
| * Otherwise add the values to the existing entry. |
| */ |
| tabentry->numscans += tabmsg[i].t_counts.t_numscans; |
| tabentry->tuples_returned += tabmsg[i].t_counts.t_tuples_returned; |
| tabentry->tuples_fetched += tabmsg[i].t_counts.t_tuples_fetched; |
| tabentry->tuples_inserted += tabmsg[i].t_counts.t_tuples_inserted; |
| tabentry->tuples_updated += tabmsg[i].t_counts.t_tuples_updated; |
| tabentry->tuples_deleted += tabmsg[i].t_counts.t_tuples_deleted; |
| tabentry->tuples_hot_updated += tabmsg[i].t_counts.t_tuples_hot_updated; |
| tabentry->n_live_tuples += tabmsg[i].t_counts.t_new_live_tuples; |
| tabentry->n_dead_tuples += tabmsg[i].t_counts.t_new_dead_tuples; |
| tabentry->blocks_fetched += tabmsg[i].t_counts.t_blocks_fetched; |
| tabentry->blocks_hit += tabmsg[i].t_counts.t_blocks_hit; |
| } |
| |
| /* Clamp n_live_tuples in case of negative new_live_tuples */ |
| tabentry->n_live_tuples = Max(tabentry->n_live_tuples, 0); |
| /* Likewise for n_dead_tuples */ |
| tabentry->n_dead_tuples = Max(tabentry->n_dead_tuples, 0); |
| |
| /* |
| * Add per-table stats to the per-database entry, too. |
| */ |
| dbentry->n_tuples_returned += tabmsg[i].t_counts.t_tuples_returned; |
| dbentry->n_tuples_fetched += tabmsg[i].t_counts.t_tuples_fetched; |
| dbentry->n_tuples_inserted += tabmsg[i].t_counts.t_tuples_inserted; |
| dbentry->n_tuples_updated += tabmsg[i].t_counts.t_tuples_updated; |
| dbentry->n_tuples_deleted += tabmsg[i].t_counts.t_tuples_deleted; |
| dbentry->n_blocks_fetched += tabmsg[i].t_counts.t_blocks_fetched; |
| dbentry->n_blocks_hit += tabmsg[i].t_counts.t_blocks_hit; |
| } |
| } |
| |
| |
| /* ---------- |
| * pgstat_recv_tabpurge() - |
| * |
| * Arrange for dead table removal. |
| * ---------- |
| */ |
| static void |
| pgstat_recv_tabpurge(PgStat_MsgTabpurge *msg, int len) |
| { |
| PgStat_StatDBEntry *dbentry; |
| int i; |
| |
| dbentry = pgstat_get_db_entry(msg->m_databaseid, false); |
| |
| /* |
| * No need to purge if we don't even know the database. |
| */ |
| if (!dbentry || !dbentry->tables) |
| return; |
| |
| /* |
| * Process all table entries in the message. |
| */ |
| for (i = 0; i < msg->m_nentries; i++) |
| { |
| /* Remove from hashtable if present; we don't care if it's not. */ |
| (void) hash_search(dbentry->tables, |
| (void *) &(msg->m_tableid[i]), |
| HASH_REMOVE, NULL); |
| } |
| } |
| |
| |
| /* ---------- |
| * pgstat_recv_dropdb() - |
| * |
| * Arrange for dead database removal |
| * ---------- |
| */ |
| static void |
| pgstat_recv_dropdb(PgStat_MsgDropdb *msg, int len) |
| { |
| PgStat_StatDBEntry *dbentry; |
| |
| /* |
| * Lookup the database in the hashtable. |
| */ |
| dbentry = pgstat_get_db_entry(msg->m_databaseid, false); |
| |
| /* |
| * If found, remove it. |
| */ |
| if (dbentry) |
| { |
| if (dbentry->tables != NULL) |
| hash_destroy(dbentry->tables); |
| if (dbentry->functions != NULL) |
| hash_destroy(dbentry->functions); |
| |
| if (hash_search(pgStatDBHash, |
| (void *) &(dbentry->databaseid), |
| HASH_REMOVE, NULL) == NULL) |
| ereport(ERROR, |
| (errmsg("database hash table corrupted " |
| "during cleanup --- abort"))); |
| } |
| } |
| |
| |
| /* ---------- |
| * pgstat_recv_resetcounter() - |
| * |
| * Reset the statistics for the specified database. |
| * ---------- |
| */ |
| static void |
| pgstat_recv_resetcounter(PgStat_MsgResetcounter *msg, int len) |
| { |
| HASHCTL hash_ctl; |
| PgStat_StatDBEntry *dbentry; |
| |
| /* |
| * Lookup the database in the hashtable. Nothing to do if not there. |
| */ |
| dbentry = pgstat_get_db_entry(msg->m_databaseid, false); |
| |
| if (!dbentry) |
| return; |
| |
| /* |
| * We simply throw away all the database's table entries by recreating a |
| * new hash table for them. |
| */ |
| if (dbentry->tables != NULL) |
| hash_destroy(dbentry->tables); |
| if (dbentry->functions != NULL) |
| hash_destroy(dbentry->functions); |
| |
| dbentry->tables = NULL; |
| dbentry->functions = NULL; |
| dbentry->n_xact_commit = 0; |
| dbentry->n_xact_rollback = 0; |
| dbentry->n_blocks_fetched = 0; |
| dbentry->n_blocks_hit = 0; |
| |
| memset(&hash_ctl, 0, sizeof(hash_ctl)); |
| hash_ctl.keysize = sizeof(Oid); |
| hash_ctl.entrysize = sizeof(PgStat_StatTabEntry); |
| hash_ctl.hash = oid_hash; |
| dbentry->tables = hash_create("Per-database table", |
| PGSTAT_TAB_HASH_SIZE, |
| &hash_ctl, |
| HASH_ELEM | HASH_FUNCTION); |
| |
| hash_ctl.keysize = sizeof(Oid); |
| hash_ctl.entrysize = sizeof(PgStat_StatFuncEntry); |
| hash_ctl.hash = oid_hash; |
| dbentry->functions = hash_create("Per-database function", |
| PGSTAT_FUNCTION_HASH_SIZE, |
| &hash_ctl, |
| HASH_ELEM | HASH_FUNCTION); |
| } |
| |
| /* ---------- |
| * pgstat_recv_autovac() - |
| * |
| * Process an autovacuum signalling message. |
| * ---------- |
| */ |
| static void |
| pgstat_recv_autovac(PgStat_MsgAutovacStart *msg, int len) |
| { |
| PgStat_StatDBEntry *dbentry; |
| |
| /* |
| * Lookup the database in the hashtable. Don't create the entry if it |
| * doesn't exist, because autovacuum may be processing a template |
| * database. If this isn't the case, the database is most likely to have |
| * an entry already. (If it doesn't, not much harm is done anyway -- |
| * it'll get created as soon as somebody actually uses the database.) |
| */ |
| dbentry = pgstat_get_db_entry(msg->m_databaseid, false); |
| if (dbentry == NULL) |
| return; |
| |
| /* |
| * Store the last autovacuum time in the database entry. |
| */ |
| dbentry->last_autovac_time = msg->m_start_time; |
| } |
| |
| /* ---------- |
| * pgstat_recv_vacuum() - |
| * |
| * Process a VACUUM message. |
| * ---------- |
| */ |
| static void |
| pgstat_recv_vacuum(PgStat_MsgVacuum *msg, int len) |
| { |
| PgStat_StatDBEntry *dbentry; |
| PgStat_StatTabEntry *tabentry; |
| |
| /* |
| * Don't create either the database or table entry if it doesn't already |
| * exist. This avoids bloating the stats with entries for stuff that is |
| * only touched by vacuum and not by live operations. |
| */ |
| dbentry = pgstat_get_db_entry(msg->m_databaseid, false); |
| if (dbentry == NULL) |
| return; |
| |
| tabentry = hash_search(dbentry->tables, &(msg->m_tableoid), |
| HASH_FIND, NULL); |
| if (tabentry == NULL) |
| return; |
| |
| if (msg->m_autovacuum) |
| tabentry->autovac_vacuum_timestamp = msg->m_vacuumtime; |
| else |
| tabentry->vacuum_timestamp = msg->m_vacuumtime; |
| if (msg->m_scanned_all) |
| tabentry->n_live_tuples = msg->m_tuples; |
| /* Resetting dead_tuples to 0 is an approximation ... */ |
| tabentry->n_dead_tuples = 0; |
| if (msg->m_analyze) |
| { |
| if (msg->m_scanned_all) |
| tabentry->last_anl_tuples = msg->m_tuples; |
| else |
| { |
| /* last_anl_tuples must never exceed n_live_tuples+n_dead_tuples */ |
| tabentry->last_anl_tuples = Min(tabentry->last_anl_tuples, |
| tabentry->n_live_tuples); |
| } |
| |
| if (msg->m_autovacuum) |
| tabentry->autovac_analyze_timestamp = msg->m_vacuumtime; |
| else |
| tabentry->analyze_timestamp = msg->m_vacuumtime; |
| } |
| else |
| { |
| /* last_anl_tuples must never exceed n_live_tuples+n_dead_tuples */ |
| tabentry->last_anl_tuples = Min(tabentry->last_anl_tuples, |
| tabentry->n_live_tuples); |
| } |
| } |
| |
| /* ---------- |
| * pgstat_recv_analyze() - |
| * |
| * Process an ANALYZE message. |
| * ---------- |
| */ |
| static void |
| pgstat_recv_analyze(PgStat_MsgAnalyze *msg, int len) |
| { |
| PgStat_StatDBEntry *dbentry; |
| PgStat_StatTabEntry *tabentry; |
| |
| /* |
| * Don't create either the database or table entry if it doesn't already |
| * exist. This avoids bloating the stats with entries for stuff that is |
| * only touched by analyze and not by live operations. |
| */ |
| dbentry = pgstat_get_db_entry(msg->m_databaseid, false); |
| if (dbentry == NULL) |
| return; |
| |
| tabentry = hash_search(dbentry->tables, &(msg->m_tableoid), |
| HASH_FIND, NULL); |
| if (tabentry == NULL) |
| return; |
| |
| if (msg->m_autovacuum) |
| tabentry->autovac_analyze_timestamp = msg->m_analyzetime; |
| else |
| tabentry->analyze_timestamp = msg->m_analyzetime; |
| tabentry->n_live_tuples = msg->m_live_tuples; |
| tabentry->n_dead_tuples = msg->m_dead_tuples; |
| tabentry->last_anl_tuples = msg->m_live_tuples + msg->m_dead_tuples; |
| } |
| |
| |
| /* ---------- |
| * pgstat_recv_bgwriter() - |
| * |
| * Process a BGWRITER message. |
| * ---------- |
| */ |
| static void |
| pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len) |
| { |
| globalStats.timed_checkpoints += msg->m_timed_checkpoints; |
| globalStats.requested_checkpoints += msg->m_requested_checkpoints; |
| globalStats.buf_written_checkpoints += msg->m_buf_written_checkpoints; |
| globalStats.buf_written_clean += msg->m_buf_written_clean; |
| globalStats.maxwritten_clean += msg->m_maxwritten_clean; |
| globalStats.buf_written_backend += msg->m_buf_written_backend; |
| globalStats.buf_alloc += msg->m_buf_alloc; |
| } |
| |
| |
| /* |
| * GPDB: Lookup the hash table entry for the specified resource queue. If no hash |
| * table entry exists, initialize it, if the create parameter is true. |
| * Else, return NULL. |
| */ |
| static PgStat_StatQueueEntry * |
| pgstat_get_queue_entry(Oid queueid, bool create) |
| { |
| PgStat_StatQueueEntry *result; |
| bool found; |
| HASHACTION action = (create ? HASH_ENTER : HASH_FIND); |
| |
| /* Lookup or create the hash table entry for this queue */ |
| result = (PgStat_StatQueueEntry *) hash_search(pgStatQueueHash, |
| &queueid, |
| action, &found); |
| |
| if (!create && !found) |
| return NULL; |
| |
| /* If not found, initialize the new one. */ |
| if (!found) |
| { |
| result->queueid = queueid; |
| result->n_queries_exec = 0; |
| result->n_queries_wait = 0; |
| result->elapsed_exec = 0; |
| result->elapsed_wait = 0; |
| } |
| |
| return result; |
| } |
| |
| /* ---------- |
| * pgstat_recv_queuestat() - |
| * |
| * Process resource queue activity for a backend. |
| * ---------- |
| */ |
| static void |
| pgstat_recv_queuestat(PgStat_MsgQueuestat *msg, int len) |
| { |
| PgStat_StatQueueEntry *queueentry; |
| |
| /* Get or create an entry for this resource queue. */ |
| queueentry = pgstat_get_queue_entry(msg->m_queueid, true); |
| |
| /* Update the metrics. */ |
| queueentry->n_queries_exec += msg->m_queries_exec; |
| queueentry->n_queries_wait += msg->m_queries_wait; |
| queueentry->elapsed_exec += msg->m_elapsed_exec; |
| queueentry->elapsed_wait += msg->m_elapsed_wait; |
| |
| } |
| |
| |
| /* ---------- |
| * pgstat_init_localportalhash() - |
| * |
| * Cache for portal statistics for a backend. |
| * ---------- |
| */ |
| void |
| pgstat_init_localportalhash(void) |
| { |
| |
| HASHCTL info; |
| int hash_flags; |
| |
| |
| info.keysize = sizeof(uint32); |
| info.entrysize = sizeof(PgStat_StatPortalEntry); |
| info.hash = tag_hash; |
| hash_flags = (HASH_ELEM | HASH_FUNCTION); |
| |
| localStatPortalHash = hash_create("Local Stat Portal Hash", |
| 1, |
| &info, |
| hash_flags); |
| |
| return; |
| } |
| |
| |
| /* ---------- |
| * pgstat_getportalentry() - |
| * |
| * Return the (PgStat_StatPortalEntry *) for a given portal (and backend). |
| * ---------- |
| */ |
| PgStat_StatPortalEntry * |
| pgstat_getportalentry(uint32 portalid, Oid queueid) |
| { |
| PgStat_StatPortalEntry *portalentry; |
| |
| bool found; |
| |
| portalentry = hash_search(localStatPortalHash, |
| (void *) &portalid, |
| HASH_ENTER, &found); |
| |
| Assert(portalentry != NULL); |
| |
| /* Initialize if this we have not seen this portal before! */ |
| if (!found) |
| { |
| portalentry->portalid = portalid; |
| portalentry->queueentry.queueid = queueid; |
| portalentry->queueentry.n_queries_exec = 0; |
| portalentry->queueentry.n_queries_wait = 0; |
| portalentry->queueentry.elapsed_exec = 0; |
| portalentry->queueentry.elapsed_wait = 0; |
| } |
| |
| return portalentry; |
| } |
| |
| |
| /* ---------- |
| * pgstat_report_queuestat() - |
| * |
| * Called from tcop/postgres.c to send the so far collected |
| * per resource queue statistics to the collector. |
| * ---------- |
| */ |
| void |
| pgstat_report_queuestat() |
| { |
| HASH_SEQ_STATUS hstat; |
| PgStat_StatPortalEntry *pentry; |
| PgStat_MsgQueuestat msg; |
| |
| if (pgStatSock < 0 || !pgstat_collect_queuelevel) |
| { |
| return; /* Not collecting queue stats or collector disabled. */ |
| } |
| |
| |
| /* Do a sequential scan through the local portal/queue hash*/ |
| hash_seq_init(&hstat, localStatPortalHash); |
| while ((pentry = (PgStat_StatPortalEntry *) hash_seq_search(&hstat)) != NULL) |
| { |
| /* Skip if message payload will be trivial. */ |
| if (pentry->queueentry.n_queries_exec == 0 && |
| pentry->queueentry.n_queries_wait == 0 && |
| pentry->queueentry.elapsed_exec == 0 && |
| pentry->queueentry.elapsed_wait == 0) |
| continue; |
| |
| /* Initialize a message to send to the collector. */ |
| pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_QUEUESTAT); |
| msg.m_queueid = pentry->queueentry.queueid; |
| msg.m_queries_exec = pentry->queueentry.n_queries_exec; |
| msg.m_queries_wait = pentry->queueentry.n_queries_wait; |
| msg.m_elapsed_exec = pentry->queueentry.elapsed_exec; |
| msg.m_elapsed_wait = pentry->queueentry.elapsed_wait; |
| |
| /* Reset the counters for this entry. */ |
| pentry->queueentry.n_queries_exec = 0; |
| pentry->queueentry.n_queries_wait = 0; |
| pentry->queueentry.elapsed_exec = 0; |
| pentry->queueentry.elapsed_wait = 0; |
| |
| pgstat_send(&msg, sizeof(msg)); |
| |
| } |
| |
| |
| } |
| |
| |
| /* ---------- |
| * pgstat_fetch_stat_queueentry() - |
| * |
| * Support function for the SQL-callable pgstat* functions. Returns |
| * the collected statistics for one resource queue or NULL. NULL doesn't mean |
| * that the queue doesn't exist, it is just not yet known by the |
| * collector, so the caller is better off to report ZERO instead. |
| * ---------- |
| */ |
| PgStat_StatQueueEntry * |
| pgstat_fetch_stat_queueentry(Oid queueid) |
| { |
| /* |
| * If not done for this transaction, read the statistics collector stats |
| * file into some hash tables. |
| */ |
| backend_read_statsfile(); |
| |
| /* |
| * Lookup the requested database; return NULL if not found |
| */ |
| return (PgStat_StatQueueEntry *) hash_search(pgStatQueueHash, |
| (void *) &queueid, |
| HASH_FIND, NULL); |
| } |
| |
| |
| /* ---------- |
| * pgstat_recv_funcstat() - |
| * |
| * Count what the backend has done. |
| * ---------- |
| */ |
| static void |
| pgstat_recv_funcstat(PgStat_MsgFuncstat *msg, int len) |
| { |
| PgStat_FunctionEntry *funcmsg = &(msg->m_entry[0]); |
| PgStat_StatDBEntry *dbentry; |
| PgStat_StatFuncEntry *funcentry; |
| int i; |
| bool found; |
| |
| dbentry = pgstat_get_db_entry(msg->m_databaseid, true); |
| |
| /* |
| * Process all function entries in the message. |
| */ |
| for (i = 0; i < msg->m_nentries; i++, funcmsg++) |
| { |
| funcentry = (PgStat_StatFuncEntry *) hash_search(dbentry->functions, |
| (void *) &(funcmsg->f_id), |
| HASH_ENTER, &found); |
| |
| if (!found) |
| { |
| /* |
| * If it's a new function entry, initialize counters to the values |
| * we just got. |
| */ |
| funcentry->f_numcalls = funcmsg->f_numcalls; |
| funcentry->f_time = funcmsg->f_time; |
| funcentry->f_time_self = funcmsg->f_time_self; |
| } |
| else |
| { |
| /* |
| * Otherwise add the values to the existing entry. |
| */ |
| funcentry->f_numcalls += funcmsg->f_numcalls; |
| funcentry->f_time += funcmsg->f_time; |
| funcentry->f_time_self += funcmsg->f_time_self; |
| } |
| } |
| } |
| |
| /* ---------- |
| * pgstat_recv_funcpurge() - |
| * |
| * Arrange for dead function removal. |
| * ---------- |
| */ |
| static void |
| pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len) |
| { |
| PgStat_StatDBEntry *dbentry; |
| int i; |
| |
| dbentry = pgstat_get_db_entry(msg->m_databaseid, false); |
| |
| /* |
| * No need to purge if we don't even know the database. |
| */ |
| if (!dbentry || !dbentry->functions) |
| return; |
| |
| /* |
| * Process all function entries in the message. |
| */ |
| for (i = 0; i < msg->m_nentries; i++) |
| { |
| /* Remove from hashtable if present; we don't care if it's not. */ |
| (void) hash_search(dbentry->functions, |
| (void *) &(msg->m_functionid[i]), |
| HASH_REMOVE, NULL); |
| } |
| } |