| /*------------------------------------------------------------------------- |
| * |
| * pgstatapprox.c |
| * Bloat estimation functions |
| * |
| * Copyright (c) 2014-2023, PostgreSQL Global Development Group |
| * |
| * IDENTIFICATION |
| * contrib/pgstattuple/pgstatapprox.c |
| * |
| *------------------------------------------------------------------------- |
| */ |
| #include "postgres.h" |
| |
| #include "access/heapam.h" |
| #include "access/htup_details.h" |
| #include "access/multixact.h" |
| #include "access/relation.h" |
| #include "access/transam.h" |
| #include "access/visibilitymap.h" |
| #include "access/xact.h" |
| #include "catalog/namespace.h" |
| #include "catalog/pg_am_d.h" |
| #include "commands/vacuum.h" |
| #include "funcapi.h" |
| #include "miscadmin.h" |
| #include "storage/bufmgr.h" |
| #include "storage/freespace.h" |
| #include "storage/lmgr.h" |
| #include "storage/procarray.h" |
| #include "utils/builtins.h" |
| |
| PG_FUNCTION_INFO_V1(pgstattuple_approx); |
| PG_FUNCTION_INFO_V1(pgstattuple_approx_v1_5); |
| |
| Datum pgstattuple_approx_internal(Oid relid, FunctionCallInfo fcinfo); |
| |
| typedef struct output_type |
| { |
| uint64 table_len; |
| double scanned_percent; |
| uint64 tuple_count; |
| uint64 tuple_len; |
| double tuple_percent; |
| uint64 dead_tuple_count; |
| uint64 dead_tuple_len; |
| double dead_tuple_percent; |
| uint64 free_space; |
| double free_percent; |
| } output_type; |
| |
| #define NUM_OUTPUT_COLUMNS 10 |
| |
| /* |
| * This function takes an already open relation and scans its pages, |
| * skipping those that have the corresponding visibility map bit set. |
| * For pages we skip, we find the free space from the free space map |
| * and approximate tuple_len on that basis. For the others, we count |
| * the exact number of dead tuples etc. |
| * |
| * This scan is loosely based on vacuumlazy.c:lazy_scan_heap(), but |
| * we do not try to avoid skipping single pages. |
| */ |
| static void |
| statapprox_heap(Relation rel, output_type *stat) |
| { |
| BlockNumber scanned, |
| nblocks, |
| blkno; |
| Buffer vmbuffer = InvalidBuffer; |
| BufferAccessStrategy bstrategy; |
| TransactionId OldestXmin; |
| |
| OldestXmin = GetOldestNonRemovableTransactionId(rel); |
| bstrategy = GetAccessStrategy(BAS_BULKREAD); |
| |
| nblocks = RelationGetNumberOfBlocks(rel); |
| scanned = 0; |
| |
| for (blkno = 0; blkno < nblocks; blkno++) |
| { |
| Buffer buf; |
| Page page; |
| OffsetNumber offnum, |
| maxoff; |
| Size freespace; |
| |
| CHECK_FOR_INTERRUPTS(); |
| |
| /* |
| * If the page has only visible tuples, then we can find out the free |
| * space from the FSM and move on. |
| */ |
| if (VM_ALL_VISIBLE(rel, blkno, &vmbuffer)) |
| { |
| freespace = GetRecordedFreeSpace(rel, blkno); |
| stat->tuple_len += BLCKSZ - freespace; |
| stat->free_space += freespace; |
| continue; |
| } |
| |
| buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, |
| RBM_NORMAL, bstrategy); |
| |
| LockBuffer(buf, BUFFER_LOCK_SHARE); |
| |
| page = BufferGetPage(buf); |
| |
| /* |
| * It's not safe to call PageGetHeapFreeSpace() on new pages, so we |
| * treat them as being free space for our purposes. |
| */ |
| if (!PageIsNew(page)) |
| stat->free_space += PageGetHeapFreeSpace(page); |
| else |
| stat->free_space += BLCKSZ - SizeOfPageHeaderData; |
| |
| /* We may count the page as scanned even if it's new/empty */ |
| scanned++; |
| |
| if (PageIsNew(page) || PageIsEmpty(page)) |
| { |
| UnlockReleaseBuffer(buf); |
| continue; |
| } |
| |
| /* |
| * Look at each tuple on the page and decide whether it's live or |
| * dead, then count it and its size. Unlike lazy_scan_heap, we can |
| * afford to ignore problems and special cases. |
| */ |
| maxoff = PageGetMaxOffsetNumber(page); |
| |
| for (offnum = FirstOffsetNumber; |
| offnum <= maxoff; |
| offnum = OffsetNumberNext(offnum)) |
| { |
| ItemId itemid; |
| HeapTupleData tuple; |
| |
| itemid = PageGetItemId(page, offnum); |
| |
| if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid) || |
| ItemIdIsDead(itemid)) |
| { |
| continue; |
| } |
| |
| Assert(ItemIdIsNormal(itemid)); |
| |
| ItemPointerSet(&(tuple.t_self), blkno, offnum); |
| |
| tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); |
| tuple.t_len = ItemIdGetLength(itemid); |
| tuple.t_tableOid = RelationGetRelid(rel); |
| |
| /* |
| * We follow VACUUM's lead in counting INSERT_IN_PROGRESS tuples |
| * as "dead" while DELETE_IN_PROGRESS tuples are "live". We don't |
| * bother distinguishing tuples inserted/deleted by our own |
| * transaction. |
| */ |
| switch (HeapTupleSatisfiesVacuum(rel, &tuple, OldestXmin, buf)) |
| { |
| case HEAPTUPLE_LIVE: |
| case HEAPTUPLE_DELETE_IN_PROGRESS: |
| stat->tuple_len += tuple.t_len; |
| stat->tuple_count++; |
| break; |
| case HEAPTUPLE_DEAD: |
| case HEAPTUPLE_RECENTLY_DEAD: |
| case HEAPTUPLE_INSERT_IN_PROGRESS: |
| stat->dead_tuple_len += tuple.t_len; |
| stat->dead_tuple_count++; |
| break; |
| default: |
| elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result"); |
| break; |
| } |
| } |
| |
| UnlockReleaseBuffer(buf); |
| } |
| |
| stat->table_len = (uint64) nblocks * BLCKSZ; |
| |
| /* |
| * We don't know how many tuples are in the pages we didn't scan, so |
| * extrapolate the live-tuple count to the whole table in the same way |
| * that VACUUM does. (Like VACUUM, we're not taking a random sample, so |
| * just extrapolating linearly seems unsafe.) There should be no dead |
| * tuples in all-visible pages, so no correction is needed for that, and |
| * we already accounted for the space in those pages, too. |
| */ |
| stat->tuple_count = vac_estimate_reltuples(rel, nblocks, scanned, |
| stat->tuple_count); |
| |
| /* It's not clear if we could get -1 here, but be safe. */ |
| stat->tuple_count = Max(stat->tuple_count, 0); |
| |
| /* |
| * Calculate percentages if the relation has one or more pages. |
| */ |
| if (nblocks != 0) |
| { |
| stat->scanned_percent = 100.0 * scanned / nblocks; |
| stat->tuple_percent = 100.0 * stat->tuple_len / stat->table_len; |
| stat->dead_tuple_percent = 100.0 * stat->dead_tuple_len / stat->table_len; |
| stat->free_percent = 100.0 * stat->free_space / stat->table_len; |
| } |
| |
| if (BufferIsValid(vmbuffer)) |
| { |
| ReleaseBuffer(vmbuffer); |
| vmbuffer = InvalidBuffer; |
| } |
| } |
| |
| /* |
| * Returns estimated live/dead tuple statistics for the given relid. |
| * |
| * The superuser() check here must be kept as the library might be upgraded |
| * without the extension being upgraded, meaning that in pre-1.5 installations |
| * these functions could be called by any user. |
| */ |
| Datum |
| pgstattuple_approx(PG_FUNCTION_ARGS) |
| { |
| Oid relid = PG_GETARG_OID(0); |
| |
| if (!superuser()) |
| ereport(ERROR, |
| (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), |
| errmsg("must be superuser to use pgstattuple functions"))); |
| |
| PG_RETURN_DATUM(pgstattuple_approx_internal(relid, fcinfo)); |
| } |
| |
| /* |
| * As of pgstattuple version 1.5, we no longer need to check if the user |
| * is a superuser because we REVOKE EXECUTE on the SQL function from PUBLIC. |
| * Users can then grant access to it based on their policies. |
| * |
| * Otherwise identical to pgstattuple_approx (above). |
| */ |
| Datum |
| pgstattuple_approx_v1_5(PG_FUNCTION_ARGS) |
| { |
| Oid relid = PG_GETARG_OID(0); |
| |
| PG_RETURN_DATUM(pgstattuple_approx_internal(relid, fcinfo)); |
| } |
| |
| Datum |
| pgstattuple_approx_internal(Oid relid, FunctionCallInfo fcinfo) |
| { |
| Relation rel; |
| output_type stat = {0}; |
| TupleDesc tupdesc; |
| bool nulls[NUM_OUTPUT_COLUMNS]; |
| Datum values[NUM_OUTPUT_COLUMNS]; |
| HeapTuple ret; |
| int i = 0; |
| |
| if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) |
| elog(ERROR, "return type must be a row type"); |
| |
| if (tupdesc->natts != NUM_OUTPUT_COLUMNS) |
| elog(ERROR, "incorrect number of output arguments"); |
| |
| rel = relation_open(relid, AccessShareLock); |
| |
| /* |
| * Reject attempts to read non-local temporary relations; we would be |
| * likely to get wrong data since we have no visibility into the owning |
| * session's local buffers. |
| */ |
| if (RELATION_IS_OTHER_TEMP(rel)) |
| ereport(ERROR, |
| (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
| errmsg("cannot access temporary tables of other sessions"))); |
| |
| /* |
| * We support only relation kinds with a visibility map and a free space |
| * map. |
| */ |
| if (!(rel->rd_rel->relkind == RELKIND_RELATION || |
| rel->rd_rel->relkind == RELKIND_AOSEGMENTS || |
| rel->rd_rel->relkind == RELKIND_AOBLOCKDIR || |
| rel->rd_rel->relkind == RELKIND_AOVISIMAP || |
| rel->rd_rel->relkind == RELKIND_MATVIEW || |
| rel->rd_rel->relkind == RELKIND_TOASTVALUE || |
| rel->rd_rel->relkind == RELKIND_DIRECTORY_TABLE)) |
| ereport(ERROR, |
| (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
| errmsg("relation \"%s\" is of wrong relation kind", |
| RelationGetRelationName(rel)), |
| errdetail_relkind_not_supported(rel->rd_rel->relkind))); |
| |
| if (rel->rd_rel->relam != HEAP_TABLE_AM_OID) |
| ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
| errmsg("only heap AM is supported"))); |
| |
| statapprox_heap(rel, &stat); |
| |
| relation_close(rel, AccessShareLock); |
| |
| memset(nulls, 0, sizeof(nulls)); |
| |
| values[i++] = Int64GetDatum(stat.table_len); |
| values[i++] = Float8GetDatum(stat.scanned_percent); |
| values[i++] = Int64GetDatum(stat.tuple_count); |
| values[i++] = Int64GetDatum(stat.tuple_len); |
| values[i++] = Float8GetDatum(stat.tuple_percent); |
| values[i++] = Int64GetDatum(stat.dead_tuple_count); |
| values[i++] = Int64GetDatum(stat.dead_tuple_len); |
| values[i++] = Float8GetDatum(stat.dead_tuple_percent); |
| values[i++] = Int64GetDatum(stat.free_space); |
| values[i++] = Float8GetDatum(stat.free_percent); |
| |
| ret = heap_form_tuple(tupdesc, values, nulls); |
| return HeapTupleGetDatum(ret); |
| } |