| /* ------------------------------------------------------------------------- |
| * |
| * pgstat_io.c |
| * Implementation of IO statistics. |
| * |
| * This file contains the implementation of IO statistics. It is kept separate |
| * from pgstat.c to enforce the line between the statistics access / storage |
| * implementation and the details about individual types of statistics. |
| * |
| * Copyright (c) 2021-2023, PostgreSQL Global Development Group |
| * |
| * IDENTIFICATION |
| * src/backend/utils/activity/pgstat_io.c |
| * ------------------------------------------------------------------------- |
| */ |
| |
| #include "postgres.h" |
| |
| #include "executor/instrument.h" |
| #include "storage/bufmgr.h" |
| #include "utils/pgstat_internal.h" |
| |
| |
| typedef struct PgStat_PendingIO |
| { |
| PgStat_Counter counts[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES]; |
| instr_time pending_times[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES]; |
| } PgStat_PendingIO; |
| |
| |
| static PgStat_PendingIO PendingIOStats; |
| bool have_iostats = false; |
| |
| |
| /* |
| * Check that stats have not been counted for any combination of IOObject, |
| * IOContext, and IOOp which are not tracked for the passed-in BackendType. If |
| * stats are tracked for this combination and IO times are non-zero, counts |
| * should be non-zero. |
| * |
| * The passed-in PgStat_BktypeIO must contain stats from the BackendType |
| * specified by the second parameter. Caller is responsible for locking the |
| * passed-in PgStat_BktypeIO, if needed. |
| */ |
| bool |
| pgstat_bktype_io_stats_valid(PgStat_BktypeIO *backend_io, |
| BackendType bktype) |
| { |
| for (int io_object = 0; io_object < IOOBJECT_NUM_TYPES; io_object++) |
| { |
| for (int io_context = 0; io_context < IOCONTEXT_NUM_TYPES; io_context++) |
| { |
| for (int io_op = 0; io_op < IOOP_NUM_TYPES; io_op++) |
| { |
| /* we do track it */ |
| if (pgstat_tracks_io_op(bktype, io_object, io_context, io_op)) |
| { |
| /* ensure that if IO times are non-zero, counts are > 0 */ |
| if (backend_io->times[io_object][io_context][io_op] != 0 && |
| backend_io->counts[io_object][io_context][io_op] <= 0) |
| return false; |
| |
| continue; |
| } |
| |
| /* we don't track it, and it is not 0 */ |
| if (backend_io->counts[io_object][io_context][io_op] != 0) |
| return false; |
| } |
| } |
| } |
| |
| return true; |
| } |
| |
| void |
| pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op) |
| { |
| pgstat_count_io_op_n(io_object, io_context, io_op, 1); |
| } |
| |
| void |
| pgstat_count_io_op_n(IOObject io_object, IOContext io_context, IOOp io_op, uint32 cnt) |
| { |
| Assert((unsigned int) io_object < IOOBJECT_NUM_TYPES); |
| Assert((unsigned int) io_context < IOCONTEXT_NUM_TYPES); |
| Assert((unsigned int) io_op < IOOP_NUM_TYPES); |
| Assert(pgstat_tracks_io_op(MyBackendType, io_object, io_context, io_op)); |
| |
| PendingIOStats.counts[io_object][io_context][io_op] += cnt; |
| |
| have_iostats = true; |
| } |
| |
| instr_time |
| pgstat_prepare_io_time(void) |
| { |
| instr_time io_start; |
| |
| if (track_io_timing) |
| INSTR_TIME_SET_CURRENT(io_start); |
| else |
| INSTR_TIME_SET_ZERO(io_start); |
| |
| return io_start; |
| } |
| |
| /* |
| * Like pgstat_count_io_op_n() except it also accumulates time. |
| */ |
| void |
| pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, |
| instr_time start_time, uint32 cnt) |
| { |
| if (track_io_timing) |
| { |
| instr_time io_time; |
| |
| INSTR_TIME_SET_CURRENT(io_time); |
| INSTR_TIME_SUBTRACT(io_time, start_time); |
| |
| if (io_op == IOOP_WRITE || io_op == IOOP_EXTEND) |
| { |
| pgstat_count_buffer_write_time(INSTR_TIME_GET_MICROSEC(io_time)); |
| if (io_object == IOOBJECT_RELATION) |
| INSTR_TIME_ADD(pgBufferUsage.blk_write_time, io_time); |
| } |
| else if (io_op == IOOP_READ) |
| { |
| pgstat_count_buffer_read_time(INSTR_TIME_GET_MICROSEC(io_time)); |
| if (io_object == IOOBJECT_RELATION) |
| INSTR_TIME_ADD(pgBufferUsage.blk_read_time, io_time); |
| } |
| |
| INSTR_TIME_ADD(PendingIOStats.pending_times[io_object][io_context][io_op], |
| io_time); |
| } |
| |
| pgstat_count_io_op_n(io_object, io_context, io_op, cnt); |
| } |
| |
| PgStat_IO * |
| pgstat_fetch_stat_io(void) |
| { |
| pgstat_snapshot_fixed(PGSTAT_KIND_IO); |
| |
| return &pgStatLocal.snapshot.io; |
| } |
| |
| /* |
| * Flush out locally pending IO statistics |
| * |
| * If no stats have been recorded, this function returns false. |
| * |
| * If nowait is true, this function returns true if the lock could not be |
| * acquired. Otherwise, return false. |
| */ |
| bool |
| pgstat_flush_io(bool nowait) |
| { |
| LWLock *bktype_lock; |
| PgStat_BktypeIO *bktype_shstats; |
| |
| if (!have_iostats) |
| return false; |
| |
| bktype_lock = &pgStatLocal.shmem->io.locks[MyBackendType]; |
| bktype_shstats = |
| &pgStatLocal.shmem->io.stats.stats[MyBackendType]; |
| |
| if (!nowait) |
| LWLockAcquire(bktype_lock, LW_EXCLUSIVE); |
| else if (!LWLockConditionalAcquire(bktype_lock, LW_EXCLUSIVE)) |
| return true; |
| |
| for (int io_object = 0; io_object < IOOBJECT_NUM_TYPES; io_object++) |
| { |
| for (int io_context = 0; io_context < IOCONTEXT_NUM_TYPES; io_context++) |
| { |
| for (int io_op = 0; io_op < IOOP_NUM_TYPES; io_op++) |
| { |
| instr_time time; |
| |
| bktype_shstats->counts[io_object][io_context][io_op] += |
| PendingIOStats.counts[io_object][io_context][io_op]; |
| |
| time = PendingIOStats.pending_times[io_object][io_context][io_op]; |
| |
| bktype_shstats->times[io_object][io_context][io_op] += |
| INSTR_TIME_GET_MICROSEC(time); |
| } |
| } |
| } |
| |
| Assert(pgstat_bktype_io_stats_valid(bktype_shstats, MyBackendType)); |
| |
| LWLockRelease(bktype_lock); |
| |
| memset(&PendingIOStats, 0, sizeof(PendingIOStats)); |
| |
| have_iostats = false; |
| |
| return false; |
| } |
| |
| const char * |
| pgstat_get_io_context_name(IOContext io_context) |
| { |
| switch (io_context) |
| { |
| case IOCONTEXT_BULKREAD: |
| return "bulkread"; |
| case IOCONTEXT_BULKWRITE: |
| return "bulkwrite"; |
| case IOCONTEXT_NORMAL: |
| return "normal"; |
| case IOCONTEXT_VACUUM: |
| return "vacuum"; |
| } |
| |
| elog(ERROR, "unrecognized IOContext value: %d", io_context); |
| pg_unreachable(); |
| } |
| |
| const char * |
| pgstat_get_io_object_name(IOObject io_object) |
| { |
| switch (io_object) |
| { |
| case IOOBJECT_RELATION: |
| return "relation"; |
| case IOOBJECT_TEMP_RELATION: |
| return "temp relation"; |
| } |
| |
| elog(ERROR, "unrecognized IOObject value: %d", io_object); |
| pg_unreachable(); |
| } |
| |
| void |
| pgstat_io_reset_all_cb(TimestampTz ts) |
| { |
| for (int i = 0; i < BACKEND_NUM_TYPES; i++) |
| { |
| LWLock *bktype_lock = &pgStatLocal.shmem->io.locks[i]; |
| PgStat_BktypeIO *bktype_shstats = &pgStatLocal.shmem->io.stats.stats[i]; |
| |
| LWLockAcquire(bktype_lock, LW_EXCLUSIVE); |
| |
| /* |
| * Use the lock in the first BackendType's PgStat_BktypeIO to protect |
| * the reset timestamp as well. |
| */ |
| if (i == 0) |
| pgStatLocal.shmem->io.stats.stat_reset_timestamp = ts; |
| |
| memset(bktype_shstats, 0, sizeof(*bktype_shstats)); |
| LWLockRelease(bktype_lock); |
| } |
| } |
| |
| void |
| pgstat_io_snapshot_cb(void) |
| { |
| for (int i = 0; i < BACKEND_NUM_TYPES; i++) |
| { |
| LWLock *bktype_lock = &pgStatLocal.shmem->io.locks[i]; |
| PgStat_BktypeIO *bktype_shstats = &pgStatLocal.shmem->io.stats.stats[i]; |
| PgStat_BktypeIO *bktype_snap = &pgStatLocal.snapshot.io.stats[i]; |
| |
| LWLockAcquire(bktype_lock, LW_SHARED); |
| |
| /* |
| * Use the lock in the first BackendType's PgStat_BktypeIO to protect |
| * the reset timestamp as well. |
| */ |
| if (i == 0) |
| pgStatLocal.snapshot.io.stat_reset_timestamp = |
| pgStatLocal.shmem->io.stats.stat_reset_timestamp; |
| |
| /* using struct assignment due to better type safety */ |
| *bktype_snap = *bktype_shstats; |
| LWLockRelease(bktype_lock); |
| } |
| } |
| |
| /* |
| * IO statistics are not collected for all BackendTypes. |
| * |
| * The following BackendTypes do not participate in the cumulative stats |
| * subsystem or do not perform IO on which we currently track: |
| * - Syslogger because it is not connected to shared memory |
| * - Archiver because most relevant archiving IO is delegated to a |
| * specialized command or module |
| * - WAL Receiver and WAL Writer IO is not tracked in pg_stat_io for now |
| * |
| * Function returns true if BackendType participates in the cumulative stats |
| * subsystem for IO and false if it does not. |
| * |
| * When adding a new BackendType, also consider adding relevant restrictions to |
| * pgstat_tracks_io_object() and pgstat_tracks_io_op(). |
| */ |
| bool |
| pgstat_tracks_io_bktype(BackendType bktype) |
| { |
| /* |
| * List every type so that new backend types trigger a warning about |
| * needing to adjust this switch. |
| */ |
| switch (bktype) |
| { |
| case B_INVALID: |
| case B_ARCHIVER: |
| case B_LOGGER: |
| case B_WAL_RECEIVER: |
| case B_WAL_WRITER: |
| case B_LOGIN_MONITOR: |
| case B_LOGIN_MONITOR_WORKER: |
| return false; |
| |
| case B_AUTOVAC_LAUNCHER: |
| case B_AUTOVAC_WORKER: |
| case B_BACKEND: |
| case B_BG_WORKER: |
| case B_BG_WRITER: |
| case B_CHECKPOINTER: |
| case B_STANDALONE_BACKEND: |
| case B_STARTUP: |
| case B_WAL_SENDER: |
| return true; |
| } |
| |
| return false; |
| } |
| |
| /* |
| * Some BackendTypes do not perform IO on certain IOObjects or in certain |
| * IOContexts. Some IOObjects are never operated on in some IOContexts. Check |
| * that the given BackendType is expected to do IO in the given IOContext and |
| * on the given IOObject and that the given IOObject is expected to be operated |
| * on in the given IOContext. |
| */ |
| bool |
| pgstat_tracks_io_object(BackendType bktype, IOObject io_object, |
| IOContext io_context) |
| { |
| bool no_temp_rel; |
| |
| /* |
| * Some BackendTypes should never track IO statistics. |
| */ |
| if (!pgstat_tracks_io_bktype(bktype)) |
| return false; |
| |
| /* |
| * Currently, IO on temporary relations can only occur in the |
| * IOCONTEXT_NORMAL IOContext. |
| */ |
| if (io_context != IOCONTEXT_NORMAL && |
| io_object == IOOBJECT_TEMP_RELATION) |
| return false; |
| |
| /* |
| * In core Postgres, only regular backends and WAL Sender processes |
| * executing queries will use local buffers and operate on temporary |
| * relations. Parallel workers will not use local buffers (see |
| * InitLocalBuffers()); however, extensions leveraging background workers |
| * have no such limitation, so track IO on IOOBJECT_TEMP_RELATION for |
| * BackendType B_BG_WORKER. |
| */ |
| no_temp_rel = bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER || |
| bktype == B_CHECKPOINTER || bktype == B_AUTOVAC_WORKER || |
| bktype == B_STANDALONE_BACKEND || bktype == B_STARTUP; |
| |
| if (no_temp_rel && io_context == IOCONTEXT_NORMAL && |
| io_object == IOOBJECT_TEMP_RELATION) |
| return false; |
| |
| /* |
| * Some BackendTypes do not currently perform any IO in certain |
| * IOContexts, and, while it may not be inherently incorrect for them to |
| * do so, excluding those rows from the view makes the view easier to use. |
| */ |
| if ((bktype == B_CHECKPOINTER || bktype == B_BG_WRITER) && |
| (io_context == IOCONTEXT_BULKREAD || |
| io_context == IOCONTEXT_BULKWRITE || |
| io_context == IOCONTEXT_VACUUM)) |
| return false; |
| |
| if (bktype == B_AUTOVAC_LAUNCHER && io_context == IOCONTEXT_VACUUM) |
| return false; |
| |
| if ((bktype == B_AUTOVAC_WORKER || bktype == B_AUTOVAC_LAUNCHER) && |
| io_context == IOCONTEXT_BULKWRITE) |
| return false; |
| |
| return true; |
| } |
| |
| /* |
| * Some BackendTypes will never do certain IOOps and some IOOps should not |
| * occur in certain IOContexts or on certain IOObjects. Check that the given |
| * IOOp is valid for the given BackendType in the given IOContext and on the |
| * given IOObject. Note that there are currently no cases of an IOOp being |
| * invalid for a particular BackendType only within a certain IOContext and/or |
| * only on a certain IOObject. |
| */ |
| bool |
| pgstat_tracks_io_op(BackendType bktype, IOObject io_object, |
| IOContext io_context, IOOp io_op) |
| { |
| bool strategy_io_context; |
| |
| /* if (io_context, io_object) will never collect stats, we're done */ |
| if (!pgstat_tracks_io_object(bktype, io_object, io_context)) |
| return false; |
| |
| /* |
| * Some BackendTypes will not do certain IOOps. |
| */ |
| if ((bktype == B_BG_WRITER || bktype == B_CHECKPOINTER) && |
| (io_op == IOOP_READ || io_op == IOOP_EVICT || io_op == IOOP_HIT)) |
| return false; |
| |
| if ((bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER || |
| bktype == B_CHECKPOINTER) && io_op == IOOP_EXTEND) |
| return false; |
| |
| /* |
| * Temporary tables are not logged and thus do not require fsync'ing. |
| * Writeback is not requested for temporary tables. |
| */ |
| if (io_object == IOOBJECT_TEMP_RELATION && |
| (io_op == IOOP_FSYNC || io_op == IOOP_WRITEBACK)) |
| return false; |
| |
| /* |
| * Some IOOps are not valid in certain IOContexts and some IOOps are only |
| * valid in certain contexts. |
| */ |
| if (io_context == IOCONTEXT_BULKREAD && io_op == IOOP_EXTEND) |
| return false; |
| |
| strategy_io_context = io_context == IOCONTEXT_BULKREAD || |
| io_context == IOCONTEXT_BULKWRITE || io_context == IOCONTEXT_VACUUM; |
| |
| /* |
| * IOOP_REUSE is only relevant when a BufferAccessStrategy is in use. |
| */ |
| if (!strategy_io_context && io_op == IOOP_REUSE) |
| return false; |
| |
| /* |
| * IOOP_FSYNC IOOps done by a backend using a BufferAccessStrategy are |
| * counted in the IOCONTEXT_NORMAL IOContext. See comment in |
| * register_dirty_segment() for more details. |
| */ |
| if (strategy_io_context && io_op == IOOP_FSYNC) |
| return false; |
| |
| |
| return true; |
| } |