blob: 89273f24356c79f21c93297502a455d56460c391 [file] [log] [blame]
/*
* brinfuncs.c
* Functions to investigate BRIN indexes
*
* Copyright (c) 2014-2023, PostgreSQL Global Development Group
*
* IDENTIFICATION
* contrib/pageinspect/brinfuncs.c
*/
#include "postgres.h"
#include "access/brin.h"
#include "access/brin_internal.h"
#include "access/brin_page.h"
#include "access/brin_revmap.h"
#include "access/brin_tuple.h"
#include "access/htup_details.h"
#include "catalog/index.h"
#include "catalog/pg_am_d.h"
#include "catalog/pg_type.h"
#include "funcapi.h"
#include "lib/stringinfo.h"
#include "miscadmin.h"
#include "pageinspect.h"
#include "storage/bufmgr.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/lsyscache.h"
#include "utils/rel.h"
#include "miscadmin.h"
PG_FUNCTION_INFO_V1(brin_page_type);
PG_FUNCTION_INFO_V1(brin_page_items);
PG_FUNCTION_INFO_V1(brin_metapage_info);
PG_FUNCTION_INFO_V1(brin_revmap_data);
/* GPDB specific */
PG_FUNCTION_INFO_V1(brin_revmap_chain);
#define IS_BRIN(r) ((r)->rd_rel->relam == BRIN_AM_OID)
typedef struct brin_column_state
{
int nstored;
FmgrInfo outputFn[FLEXIBLE_ARRAY_MEMBER];
} brin_column_state;
static Page verify_brin_page(bytea *raw_page, uint16 type,
const char *strtype);
Datum
brin_page_type(PG_FUNCTION_ARGS)
{
bytea *raw_page = PG_GETARG_BYTEA_P(0);
Page page;
char *type;
if (!superuser())
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
errmsg("must be superuser to use raw page functions")));
page = get_page_from_raw(raw_page);
if (PageIsNew(page))
PG_RETURN_NULL();
/* verify the special space has the expected size */
if (PageGetSpecialSize(page) != MAXALIGN(sizeof(BrinSpecialSpace)))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("input page is not a valid %s page", "BRIN"),
errdetail("Expected special size %d, got %d.",
(int) MAXALIGN(sizeof(BrinSpecialSpace)),
(int) PageGetSpecialSize(page))));
switch (BrinPageType(page))
{
case BRIN_PAGETYPE_META:
type = "meta";
break;
case BRIN_PAGETYPE_REVMAP:
type = "revmap";
break;
case BRIN_PAGETYPE_REGULAR:
type = "regular";
break;
default:
type = psprintf("unknown (%02x)", BrinPageType(page));
break;
}
PG_RETURN_TEXT_P(cstring_to_text(type));
}
/*
* Verify that the given bytea contains a BRIN page of the indicated page
* type, or die in the attempt. A pointer to the page is returned.
*/
static Page
verify_brin_page(bytea *raw_page, uint16 type, const char *strtype)
{
Page page = get_page_from_raw(raw_page);
if (PageIsNew(page))
return page;
/* verify the special space has the expected size */
if (PageGetSpecialSize(page) != MAXALIGN(sizeof(BrinSpecialSpace)))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("input page is not a valid %s page", "BRIN"),
errdetail("Expected special size %d, got %d.",
(int) MAXALIGN(sizeof(BrinSpecialSpace)),
(int) PageGetSpecialSize(page))));
/* verify the special space says this page is what we want */
if (BrinPageType(page) != type)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("page is not a BRIN page of type \"%s\"", strtype),
errdetail("Expected special type %08x, got %08x.",
type, BrinPageType(page))));
return page;
}
/*
* Extract all item values from a BRIN index page
*
* Usage: SELECT * FROM brin_page_items(get_raw_page('idx', 1), 'idx'::regclass);
*/
Datum
brin_page_items(PG_FUNCTION_ARGS)
{
bytea *raw_page = PG_GETARG_BYTEA_P(0);
Oid indexRelid = PG_GETARG_OID(1);
ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
Relation indexRel;
brin_column_state **columns;
BrinDesc *bdesc;
BrinMemTuple *dtup;
Page page;
OffsetNumber offset;
AttrNumber attno;
bool unusedItem;
if (!superuser())
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
errmsg("must be superuser to use raw page functions")));
InitMaterializedSRF(fcinfo, 0);
indexRel = index_open(indexRelid, AccessShareLock);
if (!IS_BRIN(indexRel))
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("\"%s\" is not a %s index",
RelationGetRelationName(indexRel), "BRIN")));
bdesc = brin_build_desc(indexRel);
/* minimally verify the page we got */
page = verify_brin_page(raw_page, BRIN_PAGETYPE_REGULAR, "regular");
if (PageIsNew(page))
{
brin_free_desc(bdesc);
index_close(indexRel, AccessShareLock);
PG_RETURN_NULL();
}
/*
* Initialize output functions for all indexed datatypes; simplifies
* calling them later.
*/
columns = palloc(sizeof(brin_column_state *) * RelationGetDescr(indexRel)->natts);
for (attno = 1; attno <= bdesc->bd_tupdesc->natts; attno++)
{
Oid output;
bool isVarlena;
BrinOpcInfo *opcinfo;
int i;
brin_column_state *column;
opcinfo = bdesc->bd_info[attno - 1];
column = palloc(offsetof(brin_column_state, outputFn) +
sizeof(FmgrInfo) * opcinfo->oi_nstored);
column->nstored = opcinfo->oi_nstored;
for (i = 0; i < opcinfo->oi_nstored; i++)
{
getTypeOutputInfo(opcinfo->oi_typcache[i]->type_id, &output, &isVarlena);
fmgr_info(output, &column->outputFn[i]);
}
columns[attno - 1] = column;
}
offset = FirstOffsetNumber;
unusedItem = false;
dtup = NULL;
for (;;)
{
Datum values[8];
bool nulls[8] = {0};
/*
* This loop is called once for every attribute of every tuple in the
* page. At the start of a tuple, we get a NULL dtup; that's our
* signal for obtaining and decoding the next one. If that's not the
* case, we output the next attribute.
*/
if (dtup == NULL)
{
ItemId itemId;
/* verify item status: if there's no data, we can't decode */
itemId = PageGetItemId(page, offset);
if (ItemIdIsUsed(itemId))
{
dtup = brin_deform_tuple(bdesc,
(BrinTuple *) PageGetItem(page, itemId),
NULL);
attno = 1;
unusedItem = false;
}
else
unusedItem = true;
}
else
attno++;
if (unusedItem)
{
values[0] = UInt16GetDatum(offset);
nulls[1] = true;
nulls[2] = true;
nulls[3] = true;
nulls[4] = true;
nulls[5] = true;
nulls[6] = true;
nulls[7] = true;
}
else
{
int att = attno - 1;
values[0] = UInt16GetDatum(offset);
switch (TupleDescAttr(rsinfo->setDesc, 1)->atttypid)
{
case INT8OID:
values[1] = Int64GetDatum((int64) dtup->bt_blkno);
break;
case INT4OID:
/* support for old extension version */
values[1] = UInt32GetDatum(dtup->bt_blkno);
break;
default:
elog(ERROR, "incorrect output types");
}
values[2] = UInt16GetDatum(attno);
values[3] = BoolGetDatum(dtup->bt_columns[att].bv_allnulls);
values[4] = BoolGetDatum(dtup->bt_columns[att].bv_hasnulls);
values[5] = BoolGetDatum(dtup->bt_placeholder);
values[6] = BoolGetDatum(dtup->bt_empty_range);
if (!dtup->bt_columns[att].bv_allnulls)
{
BrinValues *bvalues = &dtup->bt_columns[att];
StringInfoData s;
bool first;
int i;
initStringInfo(&s);
appendStringInfoChar(&s, '{');
first = true;
for (i = 0; i < columns[att]->nstored; i++)
{
char *val;
if (!first)
appendStringInfoString(&s, " .. ");
first = false;
val = OutputFunctionCall(&columns[att]->outputFn[i],
bvalues->bv_values[i]);
appendStringInfoString(&s, val);
pfree(val);
}
appendStringInfoChar(&s, '}');
values[7] = CStringGetTextDatum(s.data);
pfree(s.data);
}
else
{
nulls[7] = true;
}
}
tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
/*
* If the item was unused, jump straight to the next one; otherwise,
* the only cleanup needed here is to set our signal to go to the next
* tuple in the following iteration, by freeing the current one.
*/
if (unusedItem)
offset = OffsetNumberNext(offset);
else if (attno >= bdesc->bd_tupdesc->natts)
{
pfree(dtup);
dtup = NULL;
offset = OffsetNumberNext(offset);
}
/*
* If we're beyond the end of the page, we're done.
*/
if (offset > PageGetMaxOffsetNumber(page))
break;
}
brin_free_desc(bdesc);
index_close(indexRel, AccessShareLock);
return (Datum) 0;
}
Datum
brin_metapage_info(PG_FUNCTION_ARGS)
{
bytea *raw_page = PG_GETARG_BYTEA_P(0);
Page page;
BrinMetaPageData *meta;
TupleDesc tupdesc;
Datum values[8];
bool nulls[8];
Datum *firstrevmappages;
Datum *lastrevmappages;
Datum *lastrevmappagenums;
HeapTuple htup;
if (!superuser())
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
errmsg("must be superuser to use raw page functions")));
page = verify_brin_page(raw_page, BRIN_PAGETYPE_META, "metapage");
if (PageIsNew(page))
PG_RETURN_NULL();
/* Build a tuple descriptor for our result type */
if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
elog(ERROR, "return type must be a row type");
tupdesc = BlessTupleDesc(tupdesc);
/* Extract values from the metapage */
meta = (BrinMetaPageData *) PageGetContents(page);
values[0] = CStringGetTextDatum(psprintf("0x%08X", meta->brinMagic));
values[1] = Int32GetDatum(meta->brinVersion);
values[2] = Int32GetDatum(meta->pagesPerRange);
values[3] = Int64GetDatum(meta->lastRevmapPage);
/* GPDB specific fields */
values[4] = Int64GetDatum(meta->isAO);
if (!meta->isAO)
{
nulls[5] = true;
nulls[6] = true;
nulls[7] = true;
}
else
{
firstrevmappages = palloc(sizeof(Datum) * MAX_AOREL_CONCURRENCY);
lastrevmappages = palloc(sizeof(Datum) * MAX_AOREL_CONCURRENCY);
lastrevmappagenums = palloc(sizeof(Datum) * MAX_AOREL_CONCURRENCY);
for (int i = 0; i < MAX_AOREL_CONCURRENCY; i++)
{
/*
* We project these with Int32Get, so we can represent
* InvalidBlockNumber as (-1), for brevity.
*/
firstrevmappages[i] = Int32GetDatum(meta->aoChainInfo[i].firstPage);
lastrevmappages[i] = Int32GetDatum(meta->aoChainInfo[i].lastPage);
lastrevmappagenums[i] = UInt32GetDatum(meta->aoChainInfo[i].lastLogicalPageNum);
}
values[5] = PointerGetDatum(construct_array(firstrevmappages,
MAX_AOREL_CONCURRENCY,
INT8OID,
sizeof(int64), true, 'i'));
values[6] = PointerGetDatum(construct_array(lastrevmappages,
MAX_AOREL_CONCURRENCY,
INT8OID,
sizeof(int64), true, 'i'));
values[7] = PointerGetDatum(construct_array(lastrevmappagenums,
MAX_AOREL_CONCURRENCY,
INT8OID,
sizeof(int64), true, 'i'));
}
htup = heap_form_tuple(tupdesc, values, nulls);
PG_RETURN_DATUM(HeapTupleGetDatum(htup));
}
/*
* Return the TID array stored in a BRIN revmap page
*/
Datum
brin_revmap_data(PG_FUNCTION_ARGS)
{
struct
{
ItemPointerData *tids;
int idx;
} *state;
FuncCallContext *fctx;
if (!superuser())
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
errmsg("must be superuser to use raw page functions")));
if (SRF_IS_FIRSTCALL())
{
bytea *raw_page = PG_GETARG_BYTEA_P(0);
MemoryContext mctx;
Page page;
/* create a function context for cross-call persistence */
fctx = SRF_FIRSTCALL_INIT();
/* switch to memory context appropriate for multiple function calls */
mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
/* minimally verify the page we got */
page = verify_brin_page(raw_page, BRIN_PAGETYPE_REVMAP, "revmap");
if (PageIsNew(page))
{
MemoryContextSwitchTo(mctx);
PG_RETURN_NULL();
}
state = palloc(sizeof(*state));
state->tids = ((RevmapContents *) PageGetContents(page))->rm_tids;
state->idx = 0;
fctx->user_fctx = state;
MemoryContextSwitchTo(mctx);
}
fctx = SRF_PERCALL_SETUP();
state = fctx->user_fctx;
if (state->idx < REVMAP_PAGE_MAXITEMS)
SRF_RETURN_NEXT(fctx, PointerGetDatum(&state->tids[state->idx++]));
SRF_RETURN_DONE(fctx);
}
/*
* GPDB: Returns the chain of revmap block numbers for a given segno (aka block
* sequence).
*/
Datum
brin_revmap_chain(PG_FUNCTION_ARGS)
{
Oid indexRelid = PG_GETARG_OID(0);
int segno = PG_GETARG_UINT32(1);
Buffer metabuf;
Page metapage;
BrinMetaPageData *meta;
ArrayBuildState *astate = NULL;
BlockNumber currRevmapBlk;
Relation indexRel = index_open(indexRelid, AccessShareLock);
if (!superuser())
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
(errmsg("must be superuser to use raw page functions"))));
if (!IS_BRIN(indexRel))
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("\"%s\" is not a %s index",
RelationGetRelationName(indexRel), "BRIN")));
if (segno < 0 || segno > AOTupleId_MaxSegmentFileNum)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("\"%u\" is not a valid segno value (valid values are in [0,127])",
segno)));
metabuf = ReadBuffer(indexRel, BRIN_METAPAGE_BLKNO);
metapage = BufferGetPage(metabuf);
if (PageIsNew(metapage))
{
ReleaseBuffer(metabuf);
index_close(indexRel, AccessShareLock);
PG_RETURN_NULL();
}
meta = (BrinMetaPageData *) PageGetContents(metapage);
currRevmapBlk = meta->aoChainInfo[segno].firstPage;
while (currRevmapBlk != InvalidBlockNumber)
{
/* Look at the chain link to see what the next revmap blknum is */
Buffer curr;
astate = accumArrayResult(astate, UInt32GetDatum(currRevmapBlk), false,
INT8OID, CurrentMemoryContext);
curr = ReadBuffer(indexRel, currRevmapBlk);
currRevmapBlk = BrinNextRevmapPage(BufferGetPage(curr));
ReleaseBuffer(curr);
}
ReleaseBuffer(metabuf);
index_close(indexRel, AccessShareLock);
if (astate)
PG_RETURN_DATUM(makeArrayResult(astate,
CurrentMemoryContext));
else
PG_RETURN_NULL();
}