blob: bde7874c142e5cd7f12c00cd32bf8d28812170a2 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*-------------------------------------------------------------------------
*
* cdbpersistentbuild.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "utils/palloc.h"
#include "storage/fd.h"
#include "storage/relfilenode.h"
#include "catalog/catalog.h"
#include "catalog/pg_namespace.h"
#include "catalog/pg_authid.h"
#include "catalog/pg_filespace.h"
#include "catalog/pg_tablespace.h"
#include "catalog/pg_database.h"
#include "catalog/gp_persistent.h"
#include "access/persistentfilesysobjname.h"
#include "cdb/cdbdirectopen.h"
#include "cdb/cdbdispatchedtablespaceinfo.h"
#include "cdb/cdbpersistentstore.h"
#include "cdb/cdbpersistentfilesysobj.h"
#include "cdb/cdbpersistentdatabase.h"
#include "cdb/cdbpersistenttablespace.h"
#include "cdb/cdbpersistentfilespace.h"
#include "cdb/cdbpersistentrelfile.h"
#include "storage/itemptr.h"
#include "utils/hsearch.h"
#include "storage/shmem.h"
#include "access/genam.h"
#include "access/heapam.h"
#include "access/transam.h"
#include "catalog/heap.h"
#include "utils/guc.h"
#include "storage/smgr.h"
#include "utils/builtins.h"
#include "cdb/cdbdatabaseinfo.h"
#include "utils/syscache.h"
#include "access/nbtree.h"
#include "storage/bufpage.h"
#include "commands/tablespace.h"
#include "cdb/cdbvars.h"
static void
PersistentBuild_NonTransactionTruncate(RelFileNode *relFileNode)
{
SMgrRelation smgrRelation;
PersistentFileSysObjName fsObjName;
PersistentFileSysObjName_SetRelationFile(
&fsObjName,
relFileNode,
/* segmentFileNum */ 0,
is_tablespace_shared);
if (Debug_persistent_print)
elog(Persistent_DebugPrintLevel(),
"Non-transaction truncate of '%s'",
PersistentFileSysObjName_ObjectName(&fsObjName));
smgrRelation = smgropen(*relFileNode);
smgrtruncate(
smgrRelation,
0,
/* isTemp */ true,
/* isLocalBuf */ false,
/* persistentTid */ NULL,
/* persistentSerialNum */ 0);
smgrclose(smgrRelation);
}
static void PersistentBuild_ScanGpPersistentRelationNodeForGlobal(
Relation gp_relation_node,
int64 *count)
{
PersistentFileSysObjData *fileSysObjData;
PersistentFileSysObjSharedData *fileSysObjSharedData;
PersistentStoreScan storeScan;
Datum values[Natts_gp_persistent_relfile_node];
ItemPointerData persistentTid;
int64 persistentSerialNum;
PersistentFileSysObj_GetDataPtrs(
PersistentFsObjType_RelationFile,
&fileSysObjData,
&fileSysObjSharedData);
PersistentStore_BeginScan(
&fileSysObjData->storeData,
&fileSysObjSharedData->storeSharedData,
&storeScan);
while (PersistentStore_GetNext(
&storeScan,
values,
&persistentTid,
&persistentSerialNum))
{
RelFileNode relFileNode;
int32 segmentFileNum;
PersistentFileSysRelStorageMgr relationStorageManager;
PersistentFileSysState persistentState;
PersistentFileSysRelBufpoolKind relBufpoolKind;
TransactionId parentXid;
int64 serialNum;
ItemPointerData previousFreeTid;
PersistentFileSysObjName fsObjName;
bool sharedStorage;
GpPersistentRelfileNode_GetValues(
values,
&relFileNode.spcNode,
&relFileNode.dbNode,
&relFileNode.relNode,
&segmentFileNum,
&relationStorageManager,
&persistentState,
&relBufpoolKind,
&parentXid,
&serialNum,
&previousFreeTid,
&sharedStorage);
if (persistentState == PersistentFileSysState_Free)
continue;
PersistentFileSysObjName_SetRelationFile(
&fsObjName,
&relFileNode,
segmentFileNum,
NULL);
fsObjName.hasInited = true;
fsObjName.sharedStorage = sharedStorage;
if (relFileNode.spcNode != GLOBALTABLESPACE_OID)
continue;
if (relationStorageManager != PersistentFileSysRelStorageMgr_BufferPool)
elog(ERROR, "Only expecting global tables to be Buffer Pool managed");
InsertGpRelfileNodeTuple(
gp_relation_node,
relFileNode.relNode, // pg_class OID
/* relationName */ NULL, // Optional.
relFileNode.relNode, // pg_class relfilenode
/* segmentFileNum */ 0,
/* updateIndex */ false,
&persistentTid,
persistentSerialNum);
(*count)++;
}
PersistentStore_EndScan(&storeScan);
}
static void PersistentBuild_PopulateGpRelationNode(
DatabaseInfo *info,
Oid defaultTablespace,
int64 *count)
{
Relation gp_relfile_node;
int r;
RelFileNode indexRelFileNode;
bool indexFound;
Relation gp_relfile_node_index;
struct IndexInfo *indexInfo;
if (Debug_persistent_print)
elog(Persistent_DebugPrintLevel(),
"PersistentBuild_PopulateGpRelationNode: Enter for dbOid %u",
info->database);
MemSet(&indexRelFileNode, 0, sizeof(RelFileNode));
indexFound = false;
gp_relfile_node =
DirectOpen_GpRelfileNodeOpen(
defaultTablespace,
info->database);
for (r = 0; r < info->dbInfoRelArrayCount; r++)
{
DbInfoRel *dbInfoRel = &info->dbInfoRelArray[r];
RelFileNode relFileNode;
PersistentFileSysRelStorageMgr relStorageMgr;
ItemPointerData persistentTid;
int64 persistentSerialNum;
Relation rd;
if (dbInfoRel->reltablespace == GLOBALTABLESPACE_OID &&
info->database != TemplateDbOid)
continue;
relFileNode.spcNode = dbInfoRel->reltablespace;
relFileNode.dbNode =
(dbInfoRel->reltablespace == GLOBALTABLESPACE_OID ?
0 : info->database);
relFileNode.relNode = dbInfoRel->relfilenodeOid;
if (dbInfoRel->relationOid == GpRelfileNodeOidIndexId)
{
indexRelFileNode = relFileNode;
indexFound = true;
}
relStorageMgr = (
(dbInfoRel->relstorage == RELSTORAGE_AOROWS ||
dbInfoRel->relstorage == RELSTORAGE_PARQUET) ?
PersistentFileSysRelStorageMgr_AppendOnly :
PersistentFileSysRelStorageMgr_BufferPool);
/*
* The gp_relation_node mapping table is empty, so use the physical files as
* the guide.
*/
if (relStorageMgr == PersistentFileSysRelStorageMgr_BufferPool)
{
PersistentFileSysRelStorageMgr localRelStorageMgr;
PersistentFileSysRelBufpoolKind relBufpoolKind;
GpPersistentRelfileNode_GetRelfileInfo(
dbInfoRel->relkind,
dbInfoRel->relstorage,
dbInfoRel->relam,
&localRelStorageMgr,
&relBufpoolKind);
Assert(localRelStorageMgr == PersistentFileSysRelStorageMgr_BufferPool);
/*
* Heap tables only ever add a single segment_file_num=0 entry to
* gp_persistent_relation regardless of how many segment files there
* really are.
*/
PersistentRelfile_AddCreated(
&relFileNode,
/* segmentFileNum */ 0,
relStorageMgr,
relBufpoolKind,
dbInfoRel->relname,
&persistentTid,
&persistentSerialNum,
/* flushToXLog */ false);
InsertGpRelfileNodeTuple(
gp_relfile_node,
dbInfoRel->relationOid, // pg_class OID
dbInfoRel->relname,
relFileNode.relNode, // pg_class relfilenode
/* segmentFileNum */ 0,
/* updateIndex */ false,
&persistentTid,
persistentSerialNum);
}
else
{
int a;
int p;
/*
* Append-Only.
*/
/*if (dbInfoRel->physicalSegmentFilesCount == 0 ||
dbInfoRel->physicalSegmentFiles[0].segmentFileNum != 0)
{
elog(ERROR, "Physical segment file #0 missing for relation '%s'",
dbInfoRel->relname);
}*/
//rebuild gp_persistent_relation_node for AO tables
PersistentRelation_AddCreated(
&relFileNode,
&persistentTid,
&persistentSerialNum,
false);
/*
* Merge physical file existence and ao[cs]seg catalog logical EOFs .
*/
a = 0;
for (p = 0; p < dbInfoRel->physicalSegmentFilesCount; p++)
{
int physicalSegmentFileNum = dbInfoRel->physicalSegmentFiles[p].segmentFileNum;
bool haveCatalogInfo;
int64 logicalEof;
/*
* There is mostly a 1:1 matching of physical files and logical
* files and we just have to match them up correctly. However
* there are several cases where this can diverge that we have
* to be able to handle.
*
* 1) Segment file 0 always exists as a physical file, but is
* only cataloged when it actually contains data - this only
* occurs for ao when data is inserted in utility mode.
*
* 2) Files created in aborted transactions where an initial
* frozen tuple never made it to disk may have a physical file
* with no logical file.
* XXX - These are leaked files that should probably be
* cleaned up at some point.
*
* 3) It is possible to have files that logically exist with a
* logical EOF of 0 but not exist in the filesystem.
* XXX - how does this happen, is it really safe?
*/
logicalEof = 0;
haveCatalogInfo = false;
/* If we exhaust the loop then we are in case 2 */
while (a < dbInfoRel->appendOnlyCatalogSegmentInfoCount)
{
DbInfoAppendOnlyCatalogSegmentInfo *logicalSegInfo = \
&dbInfoRel->appendOnlyCatalogSegmentInfo[a];
/* Normal Case: both exist */
if (logicalSegInfo->segmentFileNum == physicalSegmentFileNum)
{
logicalEof = logicalSegInfo->logicalEof;
haveCatalogInfo = true;
a++;
break; /* found */
}
/* case 0 or case 2 */
else if (logicalSegInfo->segmentFileNum > physicalSegmentFileNum)
{
logicalEof = 0;
haveCatalogInfo = false;
break; /* not found */
}
/* case 3 - skip over logical segments w/o physical files */
else if (logicalSegInfo->logicalEof == 0)
{
a++;
continue; /* keep looking */
}
/* otherwise it is an error */
else
{
elog(ERROR, "logical EOF greater than zero (" INT64_FORMAT ") for segment file #%d in relation '%s' but physical file is missing",
logicalSegInfo->logicalEof,
logicalSegInfo->segmentFileNum,
dbInfoRel->relname);
}
/* unreachable */
Assert(false);
}
/*
* case 2) Ignore segment file left over from pre-Release 4.0 aborted
* transaction whose initial frozen ao[cs]seg tuple never made it to
* disk. This will be a file that can result in an upgrade complaint...
*/
if (physicalSegmentFileNum > 0 && !haveCatalogInfo)
continue;
PersistentRelfile_AddCreated(
&relFileNode,
physicalSegmentFileNum,
relStorageMgr,
PersistentFileSysRelBufpoolKind_None,
dbInfoRel->relname,
&persistentTid,
&persistentSerialNum,
/* flushToXLog */ false);
InsertGpRelfileNodeTuple(
gp_relfile_node,
dbInfoRel->relationOid, // pg_class OID
dbInfoRel->relname,
relFileNode.relNode, // pg_class relfilenode
physicalSegmentFileNum,
/* updateIndex */ false,
&persistentTid,
persistentSerialNum);
}
}
// reset Relation->rd_relationnodeinfo.isPresent, so that next time persistentid and serial# can be refetched
rd = RelationIdGetRelation(relFileNode.relNode);
if(RelationIsValid(rd)){
rd->rd_relationnodeinfo.isPresent = false;
RelationClose(rd);
}
(*count)++;
}
if (info->database != TemplateDbOid)
{
PersistentBuild_ScanGpPersistentRelationNodeForGlobal(
gp_relfile_node,
count);
}
/*
* Build the index for gp_relation_node.
*
* The problem is the session we are using is associated with one particular database
* of the cluster, but we need to iterate through all the databases. So, unfortunately,
* the solution has been to use the "Direct Open" stuff.
*
* We do this because MyDatabaseId, the default tablespace of the session should not be
* changed. The various caches and many other implicit things assume the object is for
* MyDatabaseId and the default tablespace. For example, we cannot use
* CatalogUpdateIndexes called in InsertGpRelationNodeTuple because it will not do
* the right thing.
*
* Also, if they re-indexed gp_relation_node, it will have a different relfilenode and so we
* must have found it (above) and open it with dynamically.
*/
Assert(indexFound);
PersistentBuild_NonTransactionTruncate(
&indexRelFileNode);
gp_relfile_node_index =
DirectOpen_GpRelfileNodeIndexOpenDynamic(
GpRelfileNodeOidIndexId,
indexRelFileNode.spcNode,
indexRelFileNode.dbNode,
indexRelFileNode.relNode);
indexInfo = makeNode(IndexInfo);
indexInfo->ii_NumIndexAttrs = Natts_gp_relfile_node_index;
indexInfo->ii_KeyAttrNumbers[0] = 1;
indexInfo->ii_KeyAttrNumbers[1] = 2;
indexInfo->ii_KeyAttrNumbers[2] = 6;
indexInfo->ii_Unique = true;
if (Debug_persistent_print)
elog(Persistent_DebugPrintLevel(),
"PersistentBuild_PopulateGpRelationNode: building gp_relfile_node_index %u/%u/%u for gp_relfile_node %u/%u/%u",
gp_relfile_node_index->rd_node.spcNode,
gp_relfile_node_index->rd_node.dbNode,
gp_relfile_node_index->rd_node.relNode,
gp_relfile_node->rd_node.spcNode,
gp_relfile_node->rd_node.dbNode,
gp_relfile_node->rd_node.relNode);
index_build(
gp_relfile_node,
gp_relfile_node_index,
indexInfo,
false);
DirectOpen_GpRelfileNodeIndexClose(gp_relfile_node_index);
DirectOpen_GpRelfileNodeClose(gp_relfile_node);
if (Debug_persistent_print)
elog(Persistent_DebugPrintLevel(),
"PersistentBuild_PopulateGpRelationNode: Exit for dbOid %u",
info->database);
}
static int64
PersistentBuild_BuildDb(
Oid dbOid,
bool mirrored)
{
int64 count = 0;
Relation gp_global_sequence;
Relation pg_database;
HeapTuple tuple;
HeapScanDesc scandesc;
Form_pg_database form_pg_database;
DatabaseInfo *info;
Oid defaultTablespace;
int t;
bool collectGpRelationNodeInfo, collectAppendOnlyCatalogSegmentInfo;
/*
* Turn this on so we don't try to fetch persistence information from
* gp_releation_node for gp_relation_node and its index until we've done the
* assignment with PersistentRelation_AddCreated.
*/
gp_before_persistence_work = true;
/*
* If the gp_global_sequence table hasn't been populated yet then we need
* to populate it before we can procede with building the rest of the
* persistent tables.
*/
gp_global_sequence = heap_open(GpGlobalSequenceRelationId, RowExclusiveLock);
scandesc = heap_beginscan(gp_global_sequence, SnapshotAny, 0, NULL);
tuple = heap_getnext(scandesc, ForwardScanDirection);
if (!HeapTupleIsValid(tuple))
{
TupleDesc tupDesc;
Datum values[Natts_gp_global_sequence];
bool nulls[Natts_gp_global_sequence];
/* Insert N frozen tuples of value 0 */
tupDesc = RelationGetDescr(gp_global_sequence);
MemSet(nulls, false, sizeof(nulls));
values[Anum_gp_global_sequence_sequence_num-1] = Int64GetDatum(0);
tuple = heap_form_tuple(tupDesc, values, nulls);
if (!HeapTupleIsValid(tuple))
elog(ERROR, "failed to build global sequence tuple");
for (t = 0; t < GpGlobalSequence_MaxSequenceTid; t++)
frozen_heap_insert(gp_global_sequence, tuple);
}
heap_endscan(scandesc);
heap_close(gp_global_sequence, RowExclusiveLock);
/* Lookup the information for the current database */
pg_database = heap_open(DatabaseRelationId, AccessShareLock);
/* Fetch a copy of the tuple to scribble on */
tuple = SearchSysCacheCopy(DATABASEOID,
ObjectIdGetDatum(dbOid),
0, 0, 0);
if (!HeapTupleIsValid(tuple))
elog(ERROR, "could not find tuple for database %u", dbOid);
form_pg_database = (Form_pg_database) GETSTRUCT(tuple);
defaultTablespace = form_pg_database->dattablespace;
if (Debug_persistent_print)
elog(Persistent_DebugPrintLevel(),
"PersistentBuild_BuildDb: dbOid %u, '%s'",
dbOid,
form_pg_database->datname.data);
/*
* Special call here to scan the persistent meta-data structures so we are open for
* business and then we can add information.
*/
PersistentFileSysObj_BuildInitScan();
if (gp_upgrade_mode && (Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_UTILITY)){
collectGpRelationNodeInfo = false;
collectAppendOnlyCatalogSegmentInfo = false;
}else{
collectGpRelationNodeInfo = true;
collectAppendOnlyCatalogSegmentInfo = true;
}
info = DatabaseInfo_Collect(
dbOid,
defaultTablespace,
collectGpRelationNodeInfo,
collectAppendOnlyCatalogSegmentInfo,
/* scanFileSystem */ true);
for (t = 0; t < info->tablespacesCount; t++)
{
Oid tablespace = info->tablespaces[t];
DbDirNode dbDirNode;
ItemPointerData persistentTid;
if (tablespace == GLOBALTABLESPACE_OID)
continue;
dbDirNode.tablespace = tablespace;
dbDirNode.database = dbOid;
PersistentDatabase_AddCreated(
&dbDirNode,
&persistentTid,
/* flushToXLog */ false);
}
PersistentBuild_PopulateGpRelationNode(
info,
defaultTablespace,
&count);
heap_close(pg_database, AccessShareLock);
gp_before_persistence_work = false;
/*
* Since we have written XLOG records with <persistentTid,
* persistentSerialNum> of zeroes because of the gp_before_persistence_work
* GUC, lets do a checkpoint to force out all buffer pool pages so we never
* try to redo those XLOG records in Crash Recovery.
*/
CreateCheckPoint(false, true);
return count;
}
Datum
gp_persistent_build_db(PG_FUNCTION_ARGS)
{
bool mirrored = PG_GETARG_BOOL(0);
PersistentBuild_BuildDb(
MyDatabaseId,
mirrored);
PG_RETURN_INT32(1);
}
Datum
gp_persistent_build_all(PG_FUNCTION_ARGS)
{
bool mirrored = PG_GETARG_BOOL(0);
Relation pg_filespace;
Relation pg_tablespace;
Relation pg_database;
HeapScanDesc scan;
HeapTuple tuple;
Datum *d;
bool *null;
// UNDONE: Verify we are in some sort of single-user mode.
// clear hash
PersistentFilespace_Reset();
//clear dispatched file space info
DispatchedFilespace_SeqSearch_Term();
/*
* Re-build filespaces.
*/
d = (Datum *) palloc(sizeof(Datum) * Natts_pg_filespace);
null = (bool *) palloc(sizeof(bool) * Natts_pg_filespace);
pg_filespace = heap_open(
FileSpaceRelationId,
AccessShareLock);
scan = heap_beginscan(pg_filespace, SnapshotNow, 0, NULL);
while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
{
Oid filespaceOid;
if (!HeapTupleIsValid(tuple))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_OBJECT),
errmsg("filespace tuple is invalid")));
filespaceOid = HeapTupleGetOid(tuple);
heap_deform_tuple(tuple, pg_filespace->rd_att, d, null);
if (filespaceOid == SYSTEMFILESPACE_OID)
{
if (Debug_persistent_print)
elog(Persistent_DebugPrintLevel(),
"gp_persistent_build_all: skip pg_system filespaceOid %u",
filespaceOid);
continue;
}
if (Debug_persistent_print)
elog(Persistent_DebugPrintLevel(),
"gp_persistent_build_all: filespaceOid %u", filespaceOid);
/*
* Special call here to scan the persistent meta-data structures so we are open for
* business and then we can add information.
*/
PersistentFileSysObj_BuildInitScan();
PersistentFilespace_AddCreated( filespaceOid,
/* flushToXLog */ false);
}
heap_endscan(scan);
heap_close(pg_filespace, AccessShareLock);
pfree(d);
pfree(null);
/*
* Re-build tablespaces.
*/
d = (Datum *) palloc(sizeof(Datum) * Natts_pg_tablespace);
null = (bool *) palloc(sizeof(bool) * Natts_pg_tablespace);
pg_tablespace = heap_open(
TableSpaceRelationId,
AccessShareLock);
scan = heap_beginscan(pg_tablespace, SnapshotNow, 0, NULL);
while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
{
Oid tablespaceOid;
if (!HeapTupleIsValid(tuple))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_OBJECT),
errmsg("tablespace tuple is invalid")));
tablespaceOid = HeapTupleGetOid(tuple);
heap_deform_tuple(tuple, pg_tablespace->rd_att, d, null);
if (tablespaceOid == DEFAULTTABLESPACE_OID ||
tablespaceOid == GLOBALTABLESPACE_OID)
{
if (Debug_persistent_print)
elog(Persistent_DebugPrintLevel(),
"gp_persistent_build_all: skip pg_default and pg_global tablespaceOid %u",
tablespaceOid);
continue;
}
if (Debug_persistent_print)
elog(Persistent_DebugPrintLevel(),
"gp_persistent_build_all: tablespaceOid %u filespaceOid %u",
tablespaceOid, DatumGetInt32(d[Anum_pg_tablespace_spcfsoid - 1]));
/*
* Special call here to scan the persistent meta-data structures so we are open for
* business and then we can add information.
*/
PersistentFileSysObj_BuildInitScan();
PersistentTablespace_AddCreated(
DatumGetInt32(d[Anum_pg_tablespace_spcfsoid - 1]),
tablespaceOid,
/* flushToXLog */ false);
}
heap_endscan(scan);
heap_close(pg_tablespace, AccessShareLock);
pfree(d);
pfree(null);
/*
* Re-build databases.
* Do template1 first since it will also populate the shared-object persistent objects.
*/
PersistentBuild_BuildDb(
TemplateDbOid,
mirrored);
if (Debug_persistent_print)
elog(Persistent_DebugPrintLevel(),
"gp_persistent_build_all: template1 complete");
/*
* Now, the remaining databases.
*/
pg_database = heap_open(
DatabaseRelationId,
AccessShareLock);
scan = heap_beginscan(pg_database, SnapshotNow, 0, NULL);
while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
{
Oid dbOid;
dbOid = HeapTupleGetOid(tuple);
if (dbOid == HcatalogDbOid)
continue;
if (dbOid == TemplateDbOid)
{
if (Debug_persistent_print)
elog(Persistent_DebugPrintLevel(),
"gp_persistent_build_all: skip template1");
continue;
}
if (Debug_persistent_print)
elog(Persistent_DebugPrintLevel(),
"gp_persistent_build_all: dbOid %u",
dbOid);
PersistentBuild_BuildDb(
dbOid,
mirrored);
}
heap_endscan(scan);
heap_close(pg_database, AccessShareLock);
PersistentStore_FlushXLog();
PG_RETURN_INT32(1);
}
/*
* gp_relfile_insert_for_register
*
* This function is an internal function, for hawq register to insert metadata
* into gp_relfile_node and gp_persistent_relfile_node.
*/
Datum
gp_relfile_insert_for_register(PG_FUNCTION_ARGS)
{
Oid tablespace = PG_GETARG_OID(0);
Oid database = PG_GETARG_OID(1);
Oid relation = PG_GETARG_OID(2);
Oid relfilenode = PG_GETARG_OID(3);
Oid segfile = PG_GETARG_OID(4);
char* relname = PG_GETARG_CSTRING(5);
BpChar* vRelkind = PG_GETARG_BPCHAR_P(6);
char relkind = vRelkind->vl_dat[0];
BpChar* vRelStorage = PG_GETARG_BPCHAR_P(7);
char relstorage = vRelStorage->vl_dat[0];
Oid relam = PG_GETARG_OID(8);
Relation gp_relfile_node;
RelFileNode relFileNode;
PersistentFileSysRelStorageMgr relStorageMgr;
ItemPointerData persistentTid;
int64 persistentSerialNum;
/* Fetch a copy of the tuple to scribble on */
HeapTuple tuple = SearchSysCacheCopy(DATABASEOID,
ObjectIdGetDatum(database),
0, 0, 0);
if (!HeapTupleIsValid(tuple))
elog(ERROR, "could not find tuple for database %u", database);
Form_pg_database form_pg_database = (Form_pg_database) GETSTRUCT(tuple);
Oid defaultTablespace = form_pg_database->dattablespace;
gp_relfile_node =
DirectOpen_GpRelfileNodeOpen(
defaultTablespace,
database);
relFileNode.spcNode = tablespace;
relFileNode.dbNode = database;
relFileNode.relNode = relfilenode;
relStorageMgr = (
(relstorage == RELSTORAGE_AOROWS ||
relstorage == RELSTORAGE_PARQUET) ?
PersistentFileSysRelStorageMgr_AppendOnly :
PersistentFileSysRelStorageMgr_BufferPool);
gp_before_persistence_work = true;
if (relStorageMgr == PersistentFileSysRelStorageMgr_BufferPool) {
PersistentFileSysRelStorageMgr localRelStorageMgr;
PersistentFileSysRelBufpoolKind relBufpoolKind;
GpPersistentRelfileNode_GetRelfileInfo(
relkind,
relstorage,
relam,
&localRelStorageMgr,
&relBufpoolKind);
Assert(localRelStorageMgr == PersistentFileSysRelStorageMgr_BufferPool);
/*
* Heap tables only ever add a single segment_file_num=0
* entry to gp_persistent_relation regardless of how many
* segment files there really are.
*/
PersistentRelfile_AddCreated(
&relFileNode,
/* segmentFileNum */ 0,
relStorageMgr,
relBufpoolKind,
relname,
&persistentTid,
&persistentSerialNum,
/* flushToXLog */ false);
InsertGpRelfileNodeTuple(
gp_relfile_node,
relation, //pg_class OID
relname,
relFileNode.relNode, //pg_class relfilenode
/* segmentFileNum */ 0,
/* updateIndex */ true,
&persistentTid,
persistentSerialNum);
} else {
PersistentRelfile_AddCreated(
&relFileNode,
segfile,
relStorageMgr,
PersistentFileSysRelBufpoolKind_None,
relname,
&persistentTid,
&persistentSerialNum,
/* flushToXLog */ false);
InsertGpRelfileNodeTuple(
gp_relfile_node,
relation, //pg_class OID
relname,
relFileNode.relNode, //pg_class relfilenode
segfile,
/* updateIndex */ true,
&persistentTid,
persistentSerialNum);
}
gp_before_persistence_work = false;
PG_RETURN_INT32(1);
}
static void
PersistentBuild_FindGpRelationNodeIndex(
Oid database,
Oid defaultTablespace,
RelFileNode *relFileNode)
{
Relation pg_class_rel;
HeapScanDesc scan;
HeapTuple tuple;
bool found;
/*
* Iterate through all the relations of the database and find gp_relation_node_index.
*/
pg_class_rel =
DirectOpen_PgClassOpen(
defaultTablespace,
database);
scan = heap_beginscan(pg_class_rel, SnapshotNow, 0, NULL);
found = false;
while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
{
Oid relationOid;
Form_pg_class form_pg_class;
Oid reltablespace;
relationOid = HeapTupleGetOid(tuple);
if (relationOid != GpRelfileNodeOidIndexId)
{
continue;
}
form_pg_class = (Form_pg_class) GETSTRUCT(tuple);
reltablespace = form_pg_class->reltablespace;
if (reltablespace == 0)
{
reltablespace = defaultTablespace;
}
relFileNode->spcNode = reltablespace;
relFileNode->dbNode = database;
relFileNode->relNode= form_pg_class->relfilenode;
found = true;
break;
}
heap_endscan(scan);
DirectOpen_PgClassClose(pg_class_rel);
if (!found)
{
elog(ERROR, "pg_class entry for gp_relation_node_index not found");
}
}
static int64
PersistentBuild_TruncateAllGpRelationNode(void)
{
Relation pg_database;
HeapScanDesc scan;
HeapTuple tuple;
int64 count;
pg_database = heap_open(
DatabaseRelationId,
AccessShareLock);
/*
* Truncate gp_relation_node and its index in each database.
*/
scan = heap_beginscan(pg_database, SnapshotNow, 0, NULL);
count = 0;
while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
{
Form_pg_database form_pg_database =
(Form_pg_database)GETSTRUCT(tuple);
Oid dbOid;
Oid dattablespace;
RelFileNode relFileNode;
SMgrRelation smgrRelation;
Page btree_metapage;
dbOid = HeapTupleGetOid(tuple);
dattablespace = form_pg_database->dattablespace;
if (dbOid == HcatalogDbOid)
continue;
if (Debug_persistent_print)
elog(Persistent_DebugPrintLevel(),
"PersistentBuild_TruncateAllGpRelationNode: dbOid %u, '%s'",
dbOid,
form_pg_database->datname.data);
if (Debug_persistent_print)
elog(Persistent_DebugPrintLevel(),
"Truncating gp_relation_node %u/%u/%u in database oid %u ('%s')",
relFileNode.spcNode,
relFileNode.dbNode,
relFileNode.relNode,
dbOid,
form_pg_database->datname.data);
relFileNode.spcNode = dattablespace;
relFileNode.dbNode = dbOid;
relFileNode.relNode = GpRelfileNodeRelationId;
/*
* Truncate WITHOUT generating an XLOG record (i.e. pretend it is a temp relation).
*/
PersistentBuild_NonTransactionTruncate(&relFileNode);
count++;
/*
* And, the index. Unfortunately, the relfilenode OID can change due to a
* REINDEX {TABLE|INDEX} command.
*/
PersistentBuild_FindGpRelationNodeIndex(
dbOid,
dattablespace,
&relFileNode);
if (Debug_persistent_print)
elog(Persistent_DebugPrintLevel(),
"Truncating gp_relation_node_index %u/%u/%u in database oid %u ('%s'). relfilenode different %s, tablespace different %s",
relFileNode.spcNode,
relFileNode.dbNode,
relFileNode.relNode,
dbOid,
form_pg_database->datname.data,
((relFileNode.relNode != GpRelfileNodeOidIndexId) ? "true" : "false"),
((relFileNode.spcNode != dattablespace) ? "true" : "false"));
PersistentBuild_NonTransactionTruncate(&relFileNode);
// The BTree needs an empty meta-data block.
smgrRelation = smgropen(relFileNode);
btree_metapage = (Page)palloc(BLCKSZ);
_bt_initmetapage(btree_metapage, P_NONE, 0);
smgrwrite(
smgrRelation,
/* blockNum */ 0,
(char*)btree_metapage,
/* isTemp */ false);
smgrimmedsync(smgrRelation);
pfree(btree_metapage);
smgrclose(smgrRelation);
count++;
}
heap_endscan(scan);
heap_close(pg_database, AccessShareLock);
return count;
}
Datum
gp_persistent_reset_all(PG_FUNCTION_ARGS)
{
RelFileNode relFileNode;
// UNDONE: Verify we are in some sort of single-user mode.
/*
* Truncate all database's gp_relation_node and their indices.
*/
PersistentBuild_TruncateAllGpRelationNode();
/*
* Truncate the 5 persistent shared tables.
*/
relFileNode.spcNode = GLOBALTABLESPACE_OID;
relFileNode.dbNode = 0;
relFileNode.relNode = GpPersistentRelfileNodeRelationId;
PersistentBuild_NonTransactionTruncate(&relFileNode);
relFileNode.relNode = GpPersistentRelationNodeRelationId;
PersistentBuild_NonTransactionTruncate(&relFileNode);
relFileNode.relNode = GpPersistentDatabaseNodeRelationId;
PersistentBuild_NonTransactionTruncate(&relFileNode);
relFileNode.relNode = GpPersistentTablespaceNodeRelationId;
PersistentBuild_NonTransactionTruncate(&relFileNode);
relFileNode.relNode = GpPersistentFilespaceNodeRelationId;
PersistentBuild_NonTransactionTruncate(&relFileNode);
/*
* Reset the persistent shared-memory free list heads and all shared-memory hash-tables.
*/
PersistentFileSysObj_Reset();
PG_RETURN_INT32(1);
}
static void
PersistentBuild_SetRelationBufpoolKind(
Oid defaultTablespace,
Oid database)
{
DatabaseInfo *info;
int r;
info = DatabaseInfo_Collect(
database,
defaultTablespace,
/* collectGpRelationNodeInfo */ true,
/* collectAppendOnlyCatalogSegmentInfo */ false,
/* scanFileSystem */ false);
for (r = 0; r < info->dbInfoRelArrayCount; r++)
{
DbInfoRel *dbInfoRel = &info->dbInfoRelArray[r];
RelFileNode relFileNode;
PersistentFileSysRelStorageMgr localRelStorageMgr;
PersistentFileSysRelBufpoolKind relBufpoolKind;
int g;
if (dbInfoRel->reltablespace == GLOBALTABLESPACE_OID &&
info->database != TemplateDbOid)
continue;
relFileNode.spcNode = dbInfoRel->reltablespace;
relFileNode.dbNode =
(dbInfoRel->reltablespace == GLOBALTABLESPACE_OID ?
0 : info->database);
relFileNode.relNode = dbInfoRel->relfilenodeOid;
GpPersistentRelfileNode_GetRelfileInfo(
dbInfoRel->relkind,
dbInfoRel->relstorage,
dbInfoRel->relam,
&localRelStorageMgr,
&relBufpoolKind);
/*
* Update the gp_persistent_relation_node entry for each gp_relation_node entry.
*/
for (g = 0; g < dbInfoRel->gpRelationNodesCount; g++)
{
DbInfoGpRelationNode *gpRelationNode = &dbInfoRel->gpRelationNodes[g];
PersistentFileSysObj_UpdateRelationBufpoolKind(
&relFileNode,
gpRelationNode->segmentFileNum,
&gpRelationNode->persistentTid,
gpRelationNode->persistentSerialNum,
relBufpoolKind);
}
}
}
Datum
gp_persistent_set_relation_bufpool_kind_all(PG_FUNCTION_ARGS)
{
Relation pg_database;
HeapScanDesc scan;
HeapTuple tuple;
if (!gp_upgrade_mode)
ereport(ERROR,
(errcode(ERRCODE_GP_FEATURE_NOT_SUPPORTED),
errmsg("function is not supported"),
errOmitLocation(true)));
// UNDONE: Verify we are in some sort of single-user mode.
/*
* Special call here to scan the persistent meta-data structures so we are open for
* business and then we can add information.
*/
PersistentFileSysObj_BuildInitScan();
pg_database = heap_open(
DatabaseRelationId,
AccessShareLock);
scan = heap_beginscan(pg_database, SnapshotNow, 0, NULL);
while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
{
Form_pg_database form_pg_database =
(Form_pg_database)GETSTRUCT(tuple);
Oid dbOid;
Oid dattablespace;
dbOid = HeapTupleGetOid(tuple);
dattablespace = form_pg_database->dattablespace;
if (Debug_persistent_print)
elog(Persistent_DebugPrintLevel(),
"gp_persistent_set_relation_bufpool_kind_all: dbOid %u",
dbOid);
PersistentBuild_SetRelationBufpoolKind(
dattablespace,
dbOid);
}
heap_endscan(scan);
heap_close(pg_database, AccessShareLock);
PG_RETURN_INT32(1);
}
Datum
gp_persistent_repair_delete(PG_FUNCTION_ARGS)
{
int fsObjType;
ItemPointerData persistentTid;
fsObjType = PG_GETARG_INT32(0);
persistentTid = PG_GETARG_TID(1);
if (fsObjType < PersistentFsObjType_First ||
fsObjType > PersistentFsObjType_Last)
elog(ERROR,
"Persistent object type must be in the range 1..4 "
"(Relation, Database Dir, Tablespace Dir, Filespace Dir)");
PersistentFileSysObj_RepairDelete(
fsObjType,
&persistentTid);
PG_RETURN_INT32(0);
}