| /*------------------------------------------------------------------------- |
| * |
| * pg_buffercache_pages.c |
| * display some contents of the buffer cache |
| * |
| * contrib/pg_buffercache/pg_buffercache_pages.c |
| *------------------------------------------------------------------------- |
| */ |
| #include "postgres.h" |
| |
| #include "access/htup_details.h" |
| #include "catalog/pg_type.h" |
| #include "funcapi.h" |
| #include "storage/buf_internals.h" |
| #include "storage/bufmgr.h" |
| |
| |
| #define NUM_BUFFERCACHE_PAGES_MIN_ELEM 8 |
| #define NUM_BUFFERCACHE_PAGES_ELEM 9 |
| #define NUM_BUFFERCACHE_SUMMARY_ELEM 5 |
| #define NUM_BUFFERCACHE_USAGE_COUNTS_ELEM 4 |
| |
| PG_MODULE_MAGIC; |
| |
| /* |
| * Record structure holding the to be exposed cache data. |
| */ |
| typedef struct |
| { |
| uint32 bufferid; |
| Oid relfilenode; |
| Oid reltablespace; |
| Oid reldatabase; |
| ForkNumber forknum; |
| BlockNumber blocknum; |
| bool isvalid; |
| bool isdirty; |
| uint16 usagecount; |
| |
| /* |
| * An int32 is sufficiently large, as MAX_BACKENDS prevents a buffer from |
| * being pinned by too many backends and each backend will only pin once |
| * because of bufmgr.c's PrivateRefCount infrastructure. |
| */ |
| int32 pinning_backends; |
| } BufferCachePagesRec; |
| |
| |
| /* |
| * Function context for data persisting over repeated calls. |
| */ |
| typedef struct |
| { |
| TupleDesc tupdesc; |
| BufferCachePagesRec *record; |
| } BufferCachePagesContext; |
| |
| |
| /* |
| * Function returning data from the shared buffer cache - buffer number, |
| * relation node/tablespace/database/blocknum and dirty indicator. |
| */ |
| PG_FUNCTION_INFO_V1(pg_buffercache_pages); |
| PG_FUNCTION_INFO_V1(pg_buffercache_summary); |
| PG_FUNCTION_INFO_V1(pg_buffercache_usage_counts); |
| |
| Datum |
| pg_buffercache_pages(PG_FUNCTION_ARGS) |
| { |
| FuncCallContext *funcctx; |
| Datum result; |
| MemoryContext oldcontext; |
| BufferCachePagesContext *fctx; /* User function context. */ |
| TupleDesc tupledesc; |
| TupleDesc expected_tupledesc; |
| HeapTuple tuple; |
| |
| if (SRF_IS_FIRSTCALL()) |
| { |
| int i; |
| |
| funcctx = SRF_FIRSTCALL_INIT(); |
| |
| /* Switch context when allocating stuff to be used in later calls */ |
| oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); |
| |
| /* Create a user function context for cross-call persistence */ |
| fctx = (BufferCachePagesContext *) palloc(sizeof(BufferCachePagesContext)); |
| |
| /* |
| * To smoothly support upgrades from version 1.0 of this extension |
| * transparently handle the (non-)existence of the pinning_backends |
| * column. We unfortunately have to get the result type for that... - |
| * we can't use the result type determined by the function definition |
| * without potentially crashing when somebody uses the old (or even |
| * wrong) function definition though. |
| */ |
| if (get_call_result_type(fcinfo, NULL, &expected_tupledesc) != TYPEFUNC_COMPOSITE) |
| elog(ERROR, "return type must be a row type"); |
| |
| if (expected_tupledesc->natts < NUM_BUFFERCACHE_PAGES_MIN_ELEM || |
| expected_tupledesc->natts > NUM_BUFFERCACHE_PAGES_ELEM) |
| elog(ERROR, "incorrect number of output arguments"); |
| |
| /* Construct a tuple descriptor for the result rows. */ |
| tupledesc = CreateTemplateTupleDesc(expected_tupledesc->natts); |
| TupleDescInitEntry(tupledesc, (AttrNumber) 1, "bufferid", |
| INT4OID, -1, 0); |
| TupleDescInitEntry(tupledesc, (AttrNumber) 2, "relfilenode", |
| OIDOID, -1, 0); |
| TupleDescInitEntry(tupledesc, (AttrNumber) 3, "reltablespace", |
| OIDOID, -1, 0); |
| TupleDescInitEntry(tupledesc, (AttrNumber) 4, "reldatabase", |
| OIDOID, -1, 0); |
| TupleDescInitEntry(tupledesc, (AttrNumber) 5, "relforknumber", |
| INT2OID, -1, 0); |
| TupleDescInitEntry(tupledesc, (AttrNumber) 6, "relblocknumber", |
| INT8OID, -1, 0); |
| TupleDescInitEntry(tupledesc, (AttrNumber) 7, "isdirty", |
| BOOLOID, -1, 0); |
| TupleDescInitEntry(tupledesc, (AttrNumber) 8, "usage_count", |
| INT2OID, -1, 0); |
| |
| if (expected_tupledesc->natts == NUM_BUFFERCACHE_PAGES_ELEM) |
| TupleDescInitEntry(tupledesc, (AttrNumber) 9, "pinning_backends", |
| INT4OID, -1, 0); |
| |
| fctx->tupdesc = BlessTupleDesc(tupledesc); |
| |
| /* Allocate NBuffers worth of BufferCachePagesRec records. */ |
| fctx->record = (BufferCachePagesRec *) |
| MemoryContextAllocHuge(CurrentMemoryContext, |
| sizeof(BufferCachePagesRec) * NBuffers); |
| |
| /* Set max calls and remember the user function context. */ |
| funcctx->max_calls = NBuffers; |
| funcctx->user_fctx = fctx; |
| |
| /* Return to original context when allocating transient memory */ |
| MemoryContextSwitchTo(oldcontext); |
| |
| /* |
| * Scan through all the buffers, saving the relevant fields in the |
| * fctx->record structure. |
| * |
| * We don't hold the partition locks, so we don't get a consistent |
| * snapshot across all buffers, but we do grab the buffer header |
| * locks, so the information of each buffer is self-consistent. |
| */ |
| for (i = 0; i < NBuffers; i++) |
| { |
| BufferDesc *bufHdr; |
| uint32 buf_state; |
| |
| bufHdr = GetBufferDescriptor(i); |
| /* Lock each buffer header before inspecting. */ |
| buf_state = LockBufHdr(bufHdr); |
| |
| fctx->record[i].bufferid = BufferDescriptorGetBuffer(bufHdr); |
| fctx->record[i].relfilenode = bufHdr->tag.rnode.relNode; |
| fctx->record[i].reltablespace = bufHdr->tag.rnode.spcNode; |
| fctx->record[i].reldatabase = bufHdr->tag.rnode.dbNode; |
| fctx->record[i].forknum = bufHdr->tag.forkNum; |
| fctx->record[i].blocknum = bufHdr->tag.blockNum; |
| fctx->record[i].usagecount = BUF_STATE_GET_USAGECOUNT(buf_state); |
| fctx->record[i].pinning_backends = BUF_STATE_GET_REFCOUNT(buf_state); |
| |
| if (buf_state & BM_DIRTY) |
| fctx->record[i].isdirty = true; |
| else |
| fctx->record[i].isdirty = false; |
| |
| /* Note if the buffer is valid, and has storage created */ |
| if ((buf_state & BM_VALID) && (buf_state & BM_TAG_VALID)) |
| fctx->record[i].isvalid = true; |
| else |
| fctx->record[i].isvalid = false; |
| |
| UnlockBufHdr(bufHdr, buf_state); |
| } |
| } |
| |
| funcctx = SRF_PERCALL_SETUP(); |
| |
| /* Get the saved state */ |
| fctx = funcctx->user_fctx; |
| |
| if (funcctx->call_cntr < funcctx->max_calls) |
| { |
| uint32 i = funcctx->call_cntr; |
| Datum values[NUM_BUFFERCACHE_PAGES_ELEM]; |
| bool nulls[NUM_BUFFERCACHE_PAGES_ELEM]; |
| |
| values[0] = Int32GetDatum(fctx->record[i].bufferid); |
| nulls[0] = false; |
| |
| /* |
| * Set all fields except the bufferid to null if the buffer is unused |
| * or not valid. |
| */ |
| if (fctx->record[i].blocknum == InvalidBlockNumber || |
| fctx->record[i].isvalid == false) |
| { |
| nulls[1] = true; |
| nulls[2] = true; |
| nulls[3] = true; |
| nulls[4] = true; |
| nulls[5] = true; |
| nulls[6] = true; |
| nulls[7] = true; |
| /* unused for v1.0 callers, but the array is always long enough */ |
| nulls[8] = true; |
| } |
| else |
| { |
| values[1] = ObjectIdGetDatum(fctx->record[i].relfilenode); |
| nulls[1] = false; |
| values[2] = ObjectIdGetDatum(fctx->record[i].reltablespace); |
| nulls[2] = false; |
| values[3] = ObjectIdGetDatum(fctx->record[i].reldatabase); |
| nulls[3] = false; |
| values[4] = ObjectIdGetDatum(fctx->record[i].forknum); |
| nulls[4] = false; |
| values[5] = Int64GetDatum((int64) fctx->record[i].blocknum); |
| nulls[5] = false; |
| values[6] = BoolGetDatum(fctx->record[i].isdirty); |
| nulls[6] = false; |
| values[7] = Int16GetDatum(fctx->record[i].usagecount); |
| nulls[7] = false; |
| /* unused for v1.0 callers, but the array is always long enough */ |
| values[8] = Int32GetDatum(fctx->record[i].pinning_backends); |
| nulls[8] = false; |
| } |
| |
| /* Build and return the tuple. */ |
| tuple = heap_form_tuple(fctx->tupdesc, values, nulls); |
| result = HeapTupleGetDatum(tuple); |
| |
| SRF_RETURN_NEXT(funcctx, result); |
| } |
| else |
| SRF_RETURN_DONE(funcctx); |
| } |
| |
| Datum |
| pg_buffercache_summary(PG_FUNCTION_ARGS) |
| { |
| Datum result; |
| TupleDesc tupledesc; |
| HeapTuple tuple; |
| Datum values[NUM_BUFFERCACHE_SUMMARY_ELEM]; |
| bool nulls[NUM_BUFFERCACHE_SUMMARY_ELEM]; |
| |
| int32 buffers_used = 0; |
| int32 buffers_unused = 0; |
| int32 buffers_dirty = 0; |
| int32 buffers_pinned = 0; |
| int64 usagecount_total = 0; |
| |
| if (get_call_result_type(fcinfo, NULL, &tupledesc) != TYPEFUNC_COMPOSITE) |
| elog(ERROR, "return type must be a row type"); |
| |
| for (int i = 0; i < NBuffers; i++) |
| { |
| BufferDesc *bufHdr; |
| uint32 buf_state; |
| |
| /* |
| * This function summarizes the state of all headers. Locking the |
| * buffer headers wouldn't provide an improved result as the state of |
| * the buffer can still change after we release the lock and it'd |
| * noticeably increase the cost of the function. |
| */ |
| bufHdr = GetBufferDescriptor(i); |
| buf_state = pg_atomic_read_u32(&bufHdr->state); |
| |
| if (buf_state & BM_VALID) |
| { |
| buffers_used++; |
| usagecount_total += BUF_STATE_GET_USAGECOUNT(buf_state); |
| |
| if (buf_state & BM_DIRTY) |
| buffers_dirty++; |
| } |
| else |
| buffers_unused++; |
| |
| if (BUF_STATE_GET_REFCOUNT(buf_state) > 0) |
| buffers_pinned++; |
| } |
| |
| memset(nulls, 0, sizeof(nulls)); |
| values[0] = Int32GetDatum(buffers_used); |
| values[1] = Int32GetDatum(buffers_unused); |
| values[2] = Int32GetDatum(buffers_dirty); |
| values[3] = Int32GetDatum(buffers_pinned); |
| |
| if (buffers_used != 0) |
| values[4] = Float8GetDatum((double) usagecount_total / buffers_used); |
| else |
| nulls[4] = true; |
| |
| /* Build and return the tuple. */ |
| tuple = heap_form_tuple(tupledesc, values, nulls); |
| result = HeapTupleGetDatum(tuple); |
| |
| PG_RETURN_DATUM(result); |
| } |
| |
| Datum |
| pg_buffercache_usage_counts(PG_FUNCTION_ARGS) |
| { |
| ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; |
| int usage_counts[BM_MAX_USAGE_COUNT + 1] = {0}; |
| int dirty[BM_MAX_USAGE_COUNT + 1] = {0}; |
| int pinned[BM_MAX_USAGE_COUNT + 1] = {0}; |
| Datum values[NUM_BUFFERCACHE_USAGE_COUNTS_ELEM]; |
| bool nulls[NUM_BUFFERCACHE_USAGE_COUNTS_ELEM] = {0}; |
| |
| InitMaterializedSRF(fcinfo, 0); |
| |
| for (int i = 0; i < NBuffers; i++) |
| { |
| BufferDesc *bufHdr = GetBufferDescriptor(i); |
| uint32 buf_state = pg_atomic_read_u32(&bufHdr->state); |
| int usage_count; |
| |
| usage_count = BUF_STATE_GET_USAGECOUNT(buf_state); |
| usage_counts[usage_count]++; |
| |
| if (buf_state & BM_DIRTY) |
| dirty[usage_count]++; |
| |
| if (BUF_STATE_GET_REFCOUNT(buf_state) > 0) |
| pinned[usage_count]++; |
| } |
| |
| for (int i = 0; i < BM_MAX_USAGE_COUNT + 1; i++) |
| { |
| values[0] = Int32GetDatum(i); |
| values[1] = Int32GetDatum(usage_counts[i]); |
| values[2] = Int32GetDatum(dirty[i]); |
| values[3] = Int32GetDatum(pinned[i]); |
| |
| tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls); |
| } |
| |
| return (Datum) 0; |
| } |