blob: 9f433416b42db4005e01c8ce232779929b4f3e68 [file] [log] [blame]
/*-------------------------------------------------------------------------
*
* xlog_mm.c
*
* Special xlog handling for master mirroring.
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*-------------------------------------------------------------------------
*/
#include <fcntl.h>
#include <sys/stat.h>
#include <unistd.h>
#include "postgres.h"
#include "access/twophase.h"
#include "access/xlog.h"
#include "access/xlogmm.h"
#include "catalog/gp_segment_config.h"
#include "catalog/pg_filespace.h"
#include "catalog/pg_tablespace.h"
#include "cdb/cdbpersistentfilespace.h"
#include "cdb/cdbpersistenttablespace.h"
#include "cdb/cdbpersistentdatabase.h"
#include "cdb/cdbutil.h"
#include "cdb/cdbvars.h"
#include "commands/dbcommands.h"
#include "commands/filespace.h"
#include "commands/tablespace.h"
#include "postmaster/postmaster.h"
#include "storage/freespace.h"
#include "utils/builtins.h"
#include "utils/guc.h"
#include "cdb/cdbpersistentrecovery.h"
#include "cdb/cdbpersistentfilesysobj.h"
static void tblspc_get_filespace_paths(Oid tblspc, char **path);
static void add_filespace_map_entry(fspc_map *m, XLogRecPtr *beginLoc, char *caller);
static void add_tablespace_map_entry(tspc_map *m, XLogRecPtr *beginLoc, char *caller);
/*
* Remember filespace and tablespace mappings, used so that we know
* where to write data to during master mirror synchronisation. Note that
* segment level replay is different, as since we're not streaming we can be
* sure to get the persistent tables in shape and just used those.
*/
static HTAB *filespace_map_ht = NULL;
static HTAB *tablespace_map_ht = NULL;
/*
* Unlink the object referenced by path. Tolerate it not existing but
* do not tolerate any other error.
*
* For directories, we remove the whole tree underneath the directory and then
* the directory itself.
*/
static void
unlink_obj(char *path, uint8 info)
{
if (info == MMXLOG_REMOVE_DIR)
{
/* same behaviour as dropdb(), RemoveFileSpace(), RemoveTableSpace() */
elog(DEBUG1, "removing directory, as requested %s", path);
RemovePath(path, true);
}
else if (info == MMXLOG_REMOVE_FILE)
{
if (Debug_print_qd_mirroring)
elog(DEBUG1, "unlinking file, as requested %s", path);
if (RemovePath(path, false) < 0)
{
if (errno != ENOENT) /* allow it to already be removed */
elog(WARNING, "could not unlink %s: %m", path);
}
}
else
Insist(false);
}
/* Actual replay code */
void
mmxlog_redo(XLogRecPtr beginLoc, XLogRecPtr lsn, XLogRecord *record)
{
uint8 info = record->xl_info & ~XLR_INFO_MASK;
xl_mm_fs_obj *xlrec = (xl_mm_fs_obj *) XLogRecGetData(record);
char *path = xlrec->path;
if (path == NULL)
{
/*
* A NULL path indicates a problem in looking up the path specified in the xlog record.
* The record contains both the master and master mirror dbids and corresponding object
* paths. If obj_get_path() returns a NULL pointer, it is most likely xlog record's
* master or master mirror dbid does not match the dbid of the current master
* (i.e. this process).
*
* Although this situation should never occur, it is possible that the dbid of the
* master and master mirror have changed over time due to adding and dropping a standby
* master before and after an expansion. Older xlog records would contain pre-expansion
* dbids for the standby master, and post expansion records would contain the new dbid
* for the standby master. If this process is the new standby master (i.e. after a
* master mirror takeover), then it will not recognize the older standby master dbid
* in the xlog record.
*
* This situation should never occur, because a standby takeover should never be
* in a situation where it needs to read the xlog before the more recent stand by
* master initialization (i.e. the latest standby master initialization would have
* generated a checkpoint with no active transaction before that checkpoint.
*
* Another possibility is that a user copied the xlog from another system (i.e. master),
* and placed it here. The user should never do this, but if they do, then the dbids
* would not match.
*
* The other possibility is that the xlog record contained a NULL path for the object.
* This should also never happen.
*/
elog(ERROR, "The object's path can not be constructed based on the xlog record. ");
}
/* Standby should only touch local storage. */
if (GPStandby() && xlrec->shared)
{
elog(DEBUG1, "Standby redo skip none-local path: %s", path);
return;
}
if (info == MMXLOG_CREATE_DIR)
{
bool dir_created = true;
elog(DEBUG1, "got create directory request: %s", path);
if (Debug_persistent_recovery_print)
{
elog(PersistentRecovery_DebugPrintLevel(),
"mmxlog_redo: create directory request %d: path \"%s\", filespace %u, path \"%s\"",
info,
path,
xlrec->filespace,
xlrec->path);
}
/* There is no new filespace and tablespace for a Standby. */
if (MakeDirectory(path, 0700) == 0)
{
if (Debug_persistent_recovery_print)
{
elog(PersistentRecovery_DebugPrintLevel(),
"mmxlog_redo: Re-created directory \"%s\"",
path);
}
}
else
{
/*
* Allowed to already exist.
*/
if (errno != EEXIST)
{
if (GPStandby())
{
elog(ERROR, "could not create directory \"%s\": %m",
path);
}
else
{
elog(LOG, "Note: unable a create directory \"%s\" from Master Mirroring redo: %m",
path);
}
}
else
{
if (Debug_persistent_recovery_print)
{
elog(PersistentRecovery_DebugPrintLevel(),
"mmxlog_redo: Directory \"%s\" already exists",
path);
}
}
// UNDONE: This isn't idempotent! What if the directory create succeeds but the PG_VERSION create fails in a system crash?
dir_created = false;
}
/* need to add PG_VERSION for newly created databases */
if (xlrec->objtype == MM_OBJ_DATABASE && dir_created == true)
set_short_version(path, NULL, false);
}
else if (info == MMXLOG_REMOVE_DIR)
{
if (Debug_persistent_recovery_print)
{
elog(PersistentRecovery_DebugPrintLevel(),
"mmxlog_redo: remove directory request %s: path \"%s\", filespace %u, path \"%s\"",
(xlrec->objtype == MM_OBJ_FILESPACE ? "filespace" :
(xlrec->objtype == MM_OBJ_TABLESPACE ? "tablespace" :
(xlrec->objtype == MM_OBJ_DATABASE ? "database" :
"unknown"))),
path,
xlrec->filespace,
xlrec->path);
}
/* tablespace and database should be fine */
unlink_obj(path, info);
}
else if (info == MMXLOG_CREATE_FILE)
{
if (Debug_persistent_recovery_print)
{
elog(PersistentRecovery_DebugPrintLevel(),
"mmxlog_redo: create file request %d: path \"%s\", filespace %u, %u/%u/%u, path \"%s\"",
info,
path,
xlrec->filespace,
xlrec->tablespace,
xlrec->database,
xlrec->relfilenode,
xlrec->path);
}
/* Local file and hdfs file need different flags for open(). */
int fd = -1;
if (!xlrec->shared)
fd = PathNameOpenFile(path, O_CREAT | O_EXCL | PG_BINARY, 0600);
else
{
if (HdfsPathExist(path))
errno = EEXIST;
else
fd = PathNameOpenFile(path, O_CREAT | O_WRONLY, 0600);
}
if (fd < 0)
{
/* tolerate existence */
if (errno != EEXIST)
elog(WARNING, "could open open file %s: %m", path);
}
if (fd >= 0)
FileClose(fd);
}
else if (info == MMXLOG_REMOVE_FILE)
{
RelFileNode rnode;
Insist(xlrec->objtype == MM_OBJ_RELFILENODE);
if (Debug_persistent_recovery_print)
{
elog(PersistentRecovery_DebugPrintLevel(),
"mmxlog_redo: remove file request %d: path \"%s\", filespace %u, %u/%u/%u, path \"%s\"",
info,
path,
xlrec->filespace,
xlrec->tablespace,
xlrec->database,
xlrec->relfilenode,
xlrec->path);
}
rnode.spcNode = xlrec->tablespace;
rnode.dbNode = xlrec->database;
rnode.relNode = xlrec->relfilenode;
XLogDropRelation(rnode);
if (AmActiveMaster())
{
RC4PersistentTablespaceGetFilespaces tablespaceGetFilespaces;
char *primaryFilespaceLocation;
Oid filespaceOid;
/*
* If we are re-doing Master Mirroring work on the Master and the tablespace
* doesn't exist in the shared-memory persistent hash-table, skip the unlink...
*/
tablespaceGetFilespaces =
PersistentTablespace_TryGetFilespacePath(
rnode.spcNode,
&primaryFilespaceLocation,
&filespaceOid);
switch (tablespaceGetFilespaces)
{
case RC4PersistentTablespaceGetFilespaces_Ok:
break;
case RC4PersistentTablespaceGetFilespaces_TablespaceNotFound:
elog(LOG, "Note: unable find tablespace %u from Master Mirroring redo",
rnode.spcNode);
return;
case RC4PersistentTablespaceGetFilespaces_FilespaceNotFound:
elog(LOG, "Note: unable find filespace %u for tablespace %u for Master Mirroring redo",
filespaceOid, rnode.spcNode);
return;
default:
elog(ERROR, "Unexpected tablespace filespace fetch result: %d",
tablespaceGetFilespaces);
}
}
/*
* QD needs remove the other segments files.
*/
if (xlrec->shared)
{
RemovePath(path, false);
}
else
smgrdounlink(
&rnode,
/* isLocalBuf */ false,
/* relationName */ NULL,
/* isRedo */ true, // Don't generate Master Mirroring records...
/* ignoreNonExistence */ true);
}
else
elog(PANIC, "unknown mmxlog op code %u", info);
}
/*
* For log output
*/
void
mmxlog_desc(StringInfo buf, XLogRecPtr beginLoc, XLogRecord *record)
{
uint8 info = record->xl_info & ~XLR_INFO_MASK;
char *rec = XLogRecGetData(record);
xl_mm_fs_obj *xlrec = (xl_mm_fs_obj *) rec;
char *path = xlrec->path;
if (path == NULL)
{
/*
* A NULL path indicates a problem in looking up the path specified in the xlog record.
* The record contains both the master and master mirror dbids and corresponding object
* paths. If obj_get_path() returns a NULL pointer, it is most likely xlog record's
* master or master mirror dbid does not match the dbid of the current master
* (i.e. this process).
*
* Although this situation should never occur, it is possible that the dbid of the
* master and master mirror have changed over time due to adding and dropping a standby
* master before and after an expansion. Older xlog records would contain pre-expansion
* dbids for the standby master, and post expansion records would contain the new dbid
* for the standby master. If this process is the new standby master (i.e. after a
* master mirror takeover), then it will not recognize the older standby master dbid
* in the xlog record.
*
* This situation should never occur, because a standby takeover should never be
* in a situation where it needs to read the xlog before the more recent stand by
* master initialization (i.e. the latest standby master initialization would have
* generated a checkpoint with no active transaction before that checkpoint.
*
* Another possibility is that a user copied the xlog from another system (i.e. master),
* and placed it here. The user should never do this, but if they do, then the dbids
* would not match.
*
* The other possibility is that the xlog record contained a NULL path for the object.
* This should also never happen.
*/
elog(ERROR, "The object's path can not be constructed based on the xlog record. ");
}
if (info == MMXLOG_CREATE_DIR)
appendStringInfo(buf, "create directory: path \"%s\", filespace %u",
path,
xlrec->filespace);
else if (info == MMXLOG_REMOVE_DIR)
appendStringInfo(buf, "remove directory: path \"%s\", filespace %u",
path,
xlrec->filespace);
else if (info == MMXLOG_CREATE_FILE)
appendStringInfo(buf, "create file: path \"%s\", filespace %u",
path,
xlrec->filespace);
else if (info == MMXLOG_REMOVE_FILE)
appendStringInfo(buf, "remove file: path \"%s\", filespace %u",
path,
xlrec->filespace);
else
appendStringInfo(buf, "UNKNOWN");
}
/* convert and oid to a string */
static char *
oidtoa(Oid oid)
{
char *tmp = palloc(11); /* maximum OID is UINT_MAX */
pg_ltoa(oid, tmp);
return tmp;
}
static char *
append_file_parts(mm_fs_obj_type type,
char *str, Oid tablespace, Oid database, Oid relfilenode,
uint32 segnum)
{
if (type == MM_OBJ_FILESPACE)
return str; /* already done in base path */
/* first two are the special tablespaces */
if (tablespace == DEFAULTTABLESPACE_OID)
join_path_components(str, str, "base");
else if (tablespace == GLOBALTABLESPACE_OID)
join_path_components(str, str, "global");
else
join_path_components(str, str, oidtoa(tablespace));
if (OidIsValid(database))
join_path_components(str, str, oidtoa(database));
if (OidIsValid(relfilenode))
{
join_path_components(str, str, oidtoa(relfilenode));
/* Is this a segment > 0 ? If so, add the segment file number */
if (segnum)
sprintf(str + strlen(str), "/%u", segnum);
}
return str;
}
/*
* Guts of logging for creation or destruction of filesystem objects on
* the master.
*/
static bool
emit_mmxlog_fs_record(mm_fs_obj_type type, Oid filespace,
Oid tablespace, Oid database, Oid relfilenode,
ItemPointer persistentTid, int64 persistentSerialNum,
int32 segnum, uint8 flags, XLogRecPtr *beginLoc)
{
XLogRecData rdata;
xl_mm_fs_obj xlrec;
char *path;
MemSet(beginLoc, 0, sizeof(XLogRecPtr));
if (!CanEmitXLogRecords)
return false;
if (!AmActiveMaster())
return false;
Insist(Gp_role == GP_ROLE_DISPATCH ||
Gp_role == GP_ROLE_UTILITY);
if (type == MM_OBJ_FILESPACE)
{
Insist(OidIsValid(filespace));
PersistentFilespace_GetLocation(filespace, &path);
}
else
{
Insist(OidIsValid(tablespace));
tblspc_get_filespace_paths(tablespace, &path);
filespace = PersistentTablespace_GetFileSpaceOid(tablespace);
}
/*
* Make a non-transactional XLOG entry showing the file creation. It's
* non-transactional because we should replay it whether the transaction
* commits or not; if not, the file will be dropped at abort time.
*/
xlrec.objtype = type;
xlrec.filespace = filespace;
xlrec.tablespace = tablespace;
xlrec.database = database;
xlrec.relfilenode = relfilenode;
xlrec.segnum = segnum;
xlrec.shared = (SYSTEMFILESPACE_OID!=filespace); //cannot call is_filespace_shared(filespace); here, it will cause deadlock with PersistentObjLock
xlrec.persistentTid = *persistentTid;
xlrec.persistentSerialNum = persistentSerialNum;
Insist(!path || strlen(path) <= MAXPGPATH);
xlrec.path[0] = '\0';
if (path)
StrNCpy(xlrec.path, path, sizeof(xlrec.path));
else
{
/*
* Allow relative paths if we didn't get anything when we looked up
* the filespace. We must allow this for the default filespace.
*/
xlrec.path[0] = '.';
xlrec.path[1] = '\0';
}
append_file_parts(type, xlrec.path, tablespace, database,
relfilenode, segnum);
if (Debug_print_qd_mirroring)
elog(DEBUG1, "XLOG: type = %i, flags = %i, path = %s",
type, flags, xlrec.path);
rdata.data = (char *) &xlrec;
rdata.len = sizeof(xlrec);
rdata.buffer = InvalidBuffer;
rdata.next = NULL;
XLogInsert(RM_MMXLOG_ID, flags | XLOG_NO_TRAN, &rdata);
*beginLoc = XLogLastInsertBeginLoc();
return true;
}
/* External interface to filespace removal logging */
void
mmxlog_log_remove_filespace(Oid filespace,ItemPointer persistentTid, int64 persistentSerialNum)
{
bool emitted;
XLogRecPtr beginLoc;
if (Debug_print_qd_mirroring)
elog(DEBUG1, "emitting drop filespace record for %u",
filespace);
emitted =
emit_mmxlog_fs_record(
MM_OBJ_FILESPACE,
filespace,
InvalidOid /* tablespace */,
InvalidOid /* database */,
InvalidOid /* relfilenode */,
persistentTid,
persistentSerialNum,
0 /* segnum */,
MMXLOG_REMOVE_DIR,
&beginLoc);
if (Debug_persistent_recovery_print)
{
SUPPRESS_ERRCONTEXT_DECLARE;
SUPPRESS_ERRCONTEXT_PUSH();
elog(PersistentRecovery_DebugPrintLevel(),
"mmxlog_log_remove_filespace: delete filespace %u (emitted %s, beginLoc %s)",
filespace,
(emitted ? "true" : "false"),
XLogLocationToString(&beginLoc));
SUPPRESS_ERRCONTEXT_POP();
}
}
/* External interface to tablespace removal logging */
void
mmxlog_log_remove_tablespace(Oid tablespace,ItemPointer persistentTid, int64 persistentSerialNum)
{
bool emitted;
XLogRecPtr beginLoc;
if (Debug_print_qd_mirroring)
elog(DEBUG1, "emitting drop tablespace record for %u",
tablespace);
emitted =
emit_mmxlog_fs_record(
MM_OBJ_TABLESPACE,
InvalidOid /* filespace */,
tablespace,
InvalidOid /* database */,
InvalidOid /* relfilenode */,
persistentTid,
persistentSerialNum,
0 /* segnum */,
MMXLOG_REMOVE_DIR,
&beginLoc);
if (Debug_persistent_recovery_print)
{
SUPPRESS_ERRCONTEXT_DECLARE;
SUPPRESS_ERRCONTEXT_PUSH();
elog(PersistentRecovery_DebugPrintLevel(),
"mmxlog_log_remove_tablespace: delete tablespace %u (emitted %s, beginLoc %s)",
tablespace,
(emitted ? "true" : "false"),
XLogLocationToString(&beginLoc));
SUPPRESS_ERRCONTEXT_POP();
}
}
/* External interface to database removal logging */
void
mmxlog_log_remove_database(Oid tablespace, Oid database,
ItemPointer persistentTid, int64 persistentSerialNum)
{
bool emitted;
XLogRecPtr beginLoc;
if (Debug_print_qd_mirroring)
elog(DEBUG1, "emitting drop database record for %u/%u",
tablespace, database);
emitted =
emit_mmxlog_fs_record(
MM_OBJ_DATABASE,
InvalidOid /* filespace */,
tablespace,
database,
InvalidOid /* relfilenode */,
persistentTid,
persistentSerialNum,
0 /* segnum */,
MMXLOG_REMOVE_DIR,
&beginLoc);
if (Debug_persistent_recovery_print)
{
SUPPRESS_ERRCONTEXT_DECLARE;
SUPPRESS_ERRCONTEXT_PUSH();
elog(PersistentRecovery_DebugPrintLevel(),
"mmxlog_log_remove_database: delete database directory %u/%u (emitted %s, beginLoc %s)",
tablespace,
database,
(emitted ? "true" : "false"),
XLogLocationToString(&beginLoc));
SUPPRESS_ERRCONTEXT_POP();
}
}
/* External interface to relation removal logging */
void
mmxlog_log_remove_relation(Oid tablespace, Oid database, Oid relfilenode,
ItemPointer persistentTid, int64 persistentSerialNum)
{
bool emitted;
XLogRecPtr beginLoc;
if (Debug_print_qd_mirroring)
{
elog(DEBUG1, "emitting drop relation record for %u/%u/%u",
tablespace, database, relfilenode);
}
emitted = emit_mmxlog_fs_record(
MM_OBJ_RELATION,
InvalidOid /* fielspace */,
tablespace,
database,
relfilenode,
persistentTid,
persistentSerialNum,
0 /* segnum */,
MMXLOG_REMOVE_DIR,
&beginLoc);
if (Debug_persistent_recovery_print)
{
SUPPRESS_ERRCONTEXT_DECLARE;
SUPPRESS_ERRCONTEXT_PUSH();
elog(PersistentRecovery_DebugPrintLevel(),
"mmxlog_log_remove_relation: delete relation directory %u/%u/%u (emitted %s, beginLoc %s)",
tablespace,
database,
relfilenode,
(emitted ? "true" : "false"),
XLogLocationToString(&beginLoc));
SUPPRESS_ERRCONTEXT_POP();
}
}
/* External interface to relfilenode removal logging */
void
mmxlog_log_remove_relfilenode(Oid tablespace, Oid database, Oid relfilenode,
int32 segnum,ItemPointer persistentTid, int64 persistentSerialNum)
{
bool emitted;
XLogRecPtr beginLoc;
if (Debug_print_qd_mirroring)
elog(DEBUG1, "emitting drop relfilenode record for %u/%u/%u",
tablespace, database, relfilenode);
emitted =
emit_mmxlog_fs_record(
MM_OBJ_RELFILENODE,
InvalidOid /* filespace */,
tablespace,
database,
relfilenode,
persistentTid,
persistentSerialNum,
segnum,
MMXLOG_REMOVE_FILE,
&beginLoc);
if (Debug_persistent_recovery_print)
{
SUPPRESS_ERRCONTEXT_DECLARE;
SUPPRESS_ERRCONTEXT_PUSH();
elog(PersistentRecovery_DebugPrintLevel(),
"mmxlog_log_remove_relfilenode: delete relation %u/%u/%u, segment file #%d (emitted %s, beginLoc %s)",
tablespace,
database,
relfilenode,
segnum,
(emitted ? "true" : "false"),
XLogLocationToString(&beginLoc));
SUPPRESS_ERRCONTEXT_POP();
}
}
/* External interface to filespace creation logging */
void
mmxlog_log_create_filespace(Oid filespace,ItemPointer persistentTid, int64 persistentSerialNum)
{
bool emitted;
XLogRecPtr beginLoc;
emitted =
emit_mmxlog_fs_record(
MM_OBJ_FILESPACE,
filespace,
InvalidOid /* tablespace */,
InvalidOid /* database */,
InvalidOid /* relfilenode */,
persistentTid,
persistentSerialNum,
0 /* segnum */,
MMXLOG_CREATE_DIR,
&beginLoc);
if (Debug_persistent_recovery_print)
{
SUPPRESS_ERRCONTEXT_DECLARE;
SUPPRESS_ERRCONTEXT_PUSH();
elog(PersistentRecovery_DebugPrintLevel(),
"mmxlog_log_create_filespace: create filespace %u (emitted %s, beginLoc %s)",
filespace,
(emitted ? "true" : "false"),
XLogLocationToString(&beginLoc));
SUPPRESS_ERRCONTEXT_POP();
}
}
/* External interface to tablespace creation logging */
void
mmxlog_log_create_tablespace(Oid filespace, Oid tablespace, ItemPointer persistentTid, int64 persistentSerialNum)
{
bool emitted;
XLogRecPtr beginLoc;
emitted =
emit_mmxlog_fs_record(
MM_OBJ_TABLESPACE,
filespace,
tablespace,
InvalidOid /* database */,
InvalidOid /* relfilenode */,
persistentTid,
persistentSerialNum,
0 /* segnum */,
MMXLOG_CREATE_DIR,
&beginLoc);
if (Debug_persistent_recovery_print)
{
SUPPRESS_ERRCONTEXT_DECLARE;
SUPPRESS_ERRCONTEXT_PUSH();
elog(PersistentRecovery_DebugPrintLevel(),
"mmxlog_log_create_tablespace: create tablespace %u (filespace %u, emitted %s, beginLoc %s)",
tablespace,
filespace,
(emitted ? "true" : "false"),
XLogLocationToString(&beginLoc));
SUPPRESS_ERRCONTEXT_POP();
}
}
/* External interface to database creation logging */
void
mmxlog_log_create_database(Oid tablespace, Oid database,
ItemPointer persistentTid, int64 persistentSerialNum)
{
bool emitted;
XLogRecPtr beginLoc;
emitted =
emit_mmxlog_fs_record(
MM_OBJ_DATABASE,
InvalidOid /* filespace */,
tablespace,
database,
InvalidOid /* relfilenode */,
persistentTid,
persistentSerialNum,
0 /* segnum */,
MMXLOG_CREATE_DIR,
&beginLoc);
if (Debug_persistent_recovery_print)
{
SUPPRESS_ERRCONTEXT_DECLARE;
SUPPRESS_ERRCONTEXT_PUSH();
elog(PersistentRecovery_DebugPrintLevel(),
"mmxlog_log_create_database: create database directory %u/%u (emitted %s, beginLoc %s)",
tablespace,
database,
(emitted ? "true" : "false"),
XLogLocationToString(&beginLoc));
SUPPRESS_ERRCONTEXT_POP();
}
}
/*
* External interface to relation create logging
*/
void
mmxlog_log_create_relation(Oid tablespace, Oid database, Oid relfilenode,
ItemPointer persistentTid, int64 persistentSerialNum)
{
bool emitted;
XLogRecPtr beginLoc;
emitted = emit_mmxlog_fs_record(
MM_OBJ_RELATION,
InvalidOid /* filespace */,
tablespace,
database,
relfilenode,
persistentTid,
persistentSerialNum,
0 /*segnum */,
MMXLOG_CREATE_DIR,
&beginLoc);
if (Debug_persistent_recovery_print)
{
SUPPRESS_ERRCONTEXT_DECLARE;
SUPPRESS_ERRCONTEXT_PUSH();
elog(PersistentRecovery_DebugPrintLevel(),
"mmxlog_log_create_relation: create relation directory %u/%u/%u (emitted %s, beginLoc %s)",
tablespace,
database,
relfilenode,
(emitted ? "true" : "false"),
XLogLocationToString(&beginLoc));
SUPPRESS_ERRCONTEXT_POP();
}
}
/* External interface to relfilenode creation logging */
void
mmxlog_log_create_relfilenode(Oid tablespace, Oid database,
Oid relfilenode, int32 segnum,
ItemPointer persistentTid, int64 persistentSerialNum)
{
bool emitted;
XLogRecPtr beginLoc;
emitted =
emit_mmxlog_fs_record(
MM_OBJ_RELFILENODE,
InvalidOid /* filespace */,
tablespace,
database,
relfilenode,
persistentTid,
persistentSerialNum,
segnum,
MMXLOG_CREATE_FILE,
&beginLoc);
if (Debug_persistent_recovery_print)
{
SUPPRESS_ERRCONTEXT_DECLARE;
SUPPRESS_ERRCONTEXT_PUSH();
elog(PersistentRecovery_DebugPrintLevel(),
"mmxlog_log_create_relfilenode: create relation %u/%u/%u, segment file #%d (emitted %s, beginLoc %s)",
tablespace,
database,
relfilenode,
segnum,
(emitted ? "true" : "false"),
XLogLocationToString(&beginLoc));
SUPPRESS_ERRCONTEXT_POP();
}
}
/*
* We need to make sure the memory for these maps doesn't
away.
*/
static void *
map_alloc(Size sz)
{
/* XXX: might have to allocate in a higher memory context */
return palloc(sz);
}
/*
* Generic initialisation of hash table.
*/
static HTAB *
init_hash(const char *name, Size keysize, Size entrysize, int initialSize)
{
HASHCTL ctl;
ctl.keysize = keysize;
ctl.entrysize = entrysize;
ctl.alloc = map_alloc;
return hash_create(name,
initialSize,
&ctl,
HASH_ELEM | HASH_ALLOC);
}
static void
mmxlog_empty_filespace_hashtable(char *caller)
{
HASH_SEQ_STATUS iterateStatus;
fspc_map *entry;
int i;
bool found;
hash_seq_init(&iterateStatus, filespace_map_ht);
i = 0;
while (true)
{
entry =
(fspc_map*)
hash_seq_search(&iterateStatus);
if (entry == NULL)
break;
if (Debug_persistent_recovery_print)
{
elog(PersistentRecovery_DebugPrintLevel(),
"mmxlog_empty_filespace_hashtable[%d]: filespace %u, dbid1 %d, dbid2 %d (caller '%s')",
i,
entry->filespaceoid,
entry->dbid1,
entry->dbid2,
caller);
}
entry = hash_search(filespace_map_ht,
&(entry->filespaceoid),
HASH_REMOVE,
&found);
if (entry == NULL)
elog(ERROR, "Corrupted filespace hashtable");
i++;
}
if (Debug_persistent_recovery_print)
{
elog(PersistentRecovery_DebugPrintLevel(),
"mmxlog_empty_filespace_hashtable: filespace remove count %d (caller '%s')",
i,
caller);
}
}
static void
mmxlog_empty_tablespace_hashtable(char *caller)
{
HASH_SEQ_STATUS iterateStatus;
tspc_map *entry;
int i;
bool found;
hash_seq_init(&iterateStatus, tablespace_map_ht);
i = 0;
while (true)
{
entry =
(tspc_map*)
hash_seq_search(&iterateStatus);
if (entry == NULL)
break;
if (Debug_persistent_recovery_print)
{
elog(PersistentRecovery_DebugPrintLevel(),
"mmxlog_empty_tablespace_hashtable[%d]: tablespace %u, (filespace %u, caller '%s')",
i,
entry->tablespaceoid,
entry->filespaceoid,
caller);
}
entry = hash_search(tablespace_map_ht,
&(entry->tablespaceoid),
HASH_REMOVE,
&found);
if (entry == NULL)
elog(ERROR, "Corrupted tablespace hashtable");
i++;
}
if (Debug_persistent_recovery_print)
{
elog(PersistentRecovery_DebugPrintLevel(),
"mmxlog_empty_tablespace_hashtable: tablespace remove count %d (caller '%s')",
i,
caller);
}
}
void
mmxlog_empty_hashtables(void)
{
mmxlog_empty_filespace_hashtable("mmxlog_empty_hashtables");
mmxlog_empty_tablespace_hashtable("mmxlog_empty_hashtables");
}
void
mmxlog_print_filespaces(int elevel, char *caller)
{
HASH_SEQ_STATUS iterateStatus;
fspc_map *entry;
int i;
hash_seq_init(&iterateStatus, filespace_map_ht);
i = 0;
while (true)
{
entry =
(fspc_map*)
hash_seq_search(&iterateStatus);
if (entry == NULL)
break;
elog(elevel,
"mmxlog_print_filespaces[%d]: filespace %u (dbid1 %d, path1 \"%s\", dbid2 %d, path2 \"%s\", caller '%s')",
i,
entry->filespaceoid,
entry->dbid1,
entry->path1,
entry->dbid2,
entry->path2,
caller);
i++;
}
elog(elevel,
"mmxlog_print_filespaces: filespace count %d (caller '%s')",
i,
caller);
}
void
mmxlog_print_tablespaces(int elevel, char *caller)
{
HASH_SEQ_STATUS iterateStatus;
tspc_map *entry;
int i;
hash_seq_init(&iterateStatus, tablespace_map_ht);
i = 0;
while (true)
{
entry =
(tspc_map*)
hash_seq_search(&iterateStatus);
if (entry == NULL)
break;
elog(elevel,
"mmxlog_print_tablespaces[%d]: tablespace %u, (filespace %u, caller '%s')",
i,
entry->tablespaceoid,
entry->filespaceoid,
caller);
i++;
}
elog(elevel,
"mmxlog_print_tablespaces: tablespace count %d (caller '%s')",
i,
caller);
}
/*
* Add a new mapping to the filespace hash table. We do not support the
* complementary filespace mapping removal function because we do not want to
* get into situations where we've removed a filespace but still data to apply
* to some file in the filespace. Unfortunately, WAL needs to just do what it is
* told and it could be told to do this if we get the logic wrong on the other
* end.
*/
static void
add_filespace_map_entry(fspc_map *m, XLogRecPtr *beginLoc, char *caller)
{
void *entry;
bool found;
/*
* The table is lazily initialised.
*/
if (!filespace_map_ht)
filespace_map_ht = init_hash("mmxlog filespace map",
sizeof(Oid), /* keysize */
sizeof(fspc_map),
gp_max_filespaces);
entry = hash_search(filespace_map_ht,
&(m->filespaceoid),
HASH_ENTER,
&found);
/*
* If this is a new entry, we need to add the data, if we found
* an entry, we need to update it, so just copy our data
* right over the top.
*/
memcpy(entry, m, sizeof(fspc_map));
if (Debug_persistent_recovery_print)
{
elog(PersistentRecovery_DebugPrintLevel(),
"add_filespace_map_entry: add filespace %u, dbid1 %d, dbid2 %d (beginLoc %s, caller '%s')",
m->filespaceoid,
m->dbid1,
m->dbid2,
XLogLocationToString(beginLoc),
caller);
}
}
/*
* Same as add_filespace_map_entry(), but for tablespaces.
*/
static void
add_tablespace_map_entry(tspc_map *m, XLogRecPtr *beginLoc, char *caller)
{
void *entry;
bool found;
/*
* The table is lazily initialised.
*/
if (!tablespace_map_ht)
tablespace_map_ht = init_hash("mmxlog tablespace map",
sizeof(Oid), /* keysize */
sizeof(tspc_map),
gp_max_tablespaces);
entry = hash_search(tablespace_map_ht,
&(m->tablespaceoid),
HASH_ENTER,
&found);
/*
* See above for why we do this.
*/
memcpy(entry, m, sizeof(tspc_map));
if (Debug_persistent_recovery_print)
{
elog(PersistentRecovery_DebugPrintLevel(),
"add_tablespace_map_entry: add tablespace %u (filespace %u, beginLoc %s, caller '%s')",
m->tablespaceoid,
m->filespaceoid,
XLogLocationToString(beginLoc),
caller);
}
}
/*
* Given a filespace oid, lookup that path to the filespace.
*
* Output parameter will be set to NULL if not found.
*/
bool
mmxlog_filespace_get_path(
Oid fspcoid,
char **filespacePath)
{
bool found;
fspc_map *m;
char *path = NULL;
Insist(fspcoid != SYSTEMFILESPACE_OID);
*filespacePath = NULL;
/*
* The table is lazily initialised.
*/
if (!filespace_map_ht)
filespace_map_ht = init_hash("mmxlog filespace map",
sizeof(Oid), /* keysize */
sizeof(fspc_map),
gp_max_filespaces);
m = hash_search(filespace_map_ht,
&fspcoid,
HASH_FIND,
&found);
if (!found)
{
if (Debug_persistent_recovery_print)
{
mmxlog_print_filespaces(
PersistentRecovery_DebugPrintLevel(),
"mmxlog_filespace_get_path");
}
return false;
}
path = m->path1;
*filespacePath = pstrdup(path);
return true;
}
/*
* Given a tablespace oid, return that filespace for the tablespace.
*
* Output parameter will be set to InvalidOid if not found.
*/
bool
mmxlog_tablespace_get_filespace(
Oid tspcoid,
Oid *filespaceOid)
{
tspc_map *m;
bool found;
elog(DEBUG1, "MMXLOG: looking for tspcoid %u", tspcoid);
*filespaceOid = InvalidOid;
/*
* The table is lazily initialised.
*/
if (!tablespace_map_ht)
tablespace_map_ht = init_hash("mmxlog tablespace map",
sizeof(Oid), /* keysize */
sizeof(tspc_map),
gp_max_tablespaces);
/*
* First, get the filespace that the tablespace resides in.
*/
m = hash_search(tablespace_map_ht, &tspcoid, HASH_FIND, &found);
if (!found)
{
if (Debug_persistent_recovery_print)
{
mmxlog_print_tablespaces(
PersistentRecovery_DebugPrintLevel(),
"mmxlog_tablespace_get_filespace");
}
return false;
}
*filespaceOid = m->filespaceoid;
return true;
}
void
mmxlog_add_filespace_init(
fspc_agg_state **fas, int *maxCount)
{
int len;
Assert (*fas == NULL);
*maxCount = 10; // Start off with at least this much room.
len = FSPC_CHECKPOINT_BYTES(*maxCount);
*fas = (fspc_agg_state*)palloc0(len);
}
void
mmxlog_add_filespace(
fspc_agg_state **fas, int *maxCount,
Oid filespace,
char *path1,
char *caller)
{
int len;
int count;
fspc_map *m;
char *filespaceLocation1;
Assert(*fas != NULL);
Assert(*maxCount > 0);
count = (*fas)->count;
Assert(count <= *maxCount);
if (count == *maxCount)
{
fspc_agg_state *oldFas;
oldFas = *fas;
(*maxCount) *= 2; // Double.
len = FSPC_CHECKPOINT_BYTES(*maxCount);
*fas = (fspc_agg_state*)palloc0(len);
memcpy(*fas, oldFas, FSPC_CHECKPOINT_BYTES(count));
pfree(oldFas);
}
m = &(*fas)->maps[count];
m->filespaceoid = filespace;
PersistentFilespace_ConvertBlankPaddedLocation(
&filespaceLocation1,
path1,
/* isPrimary */ false);
if (filespaceLocation1 != NULL)
{
strncpy(m->path1, filespaceLocation1, MAXPGPATH);
pfree(filespaceLocation1);
}
else
{
// UNDONE: Do we ever not have both a master and mirror path???
/*
* Allow relative paths if we didn't get anything when we looked up
* the filespace. We must allow this for the default filespace.
*/
m->path1[0] = '.';
m->path1[1] = '\0';
}
if (Debug_persistent_recovery_print)
{
SUPPRESS_ERRCONTEXT_DECLARE;
SUPPRESS_ERRCONTEXT_PUSH();
elog(PersistentRecovery_DebugPrintLevel(),
"mmxlog_add_filespace[%d]: add filespace %u (path1 \"%s\", caller '%s')",
count,
filespace,
m->path1,
caller);
SUPPRESS_ERRCONTEXT_POP();
}
(*fas)->count++;
}
void
mmxlog_add_tablespace_init(
tspc_agg_state **tas, int *maxCount)
{
int len;
Assert (*tas == NULL);
*maxCount = 10; // Start off with at least this much room.
len = TSPC_CHECKPOINT_BYTES(*maxCount);
*tas = (tspc_agg_state*)palloc0(len);
}
void
mmxlog_add_tablespace(
tspc_agg_state **tas, int *maxCount,
Oid filespace, Oid tablespace, char *caller)
{
int len;
int count;
tspc_map *m;
Assert(*tas != NULL);
Assert(*maxCount > 0);
count = (*tas)->count;
Assert(count <= *maxCount);
if (count == *maxCount)
{
tspc_agg_state *oldTas;
oldTas = *tas;
(*maxCount) *= 2; // Double.
len = TSPC_CHECKPOINT_BYTES(*maxCount);
*tas = (tspc_agg_state*)palloc0(len);
memcpy(*tas, oldTas, TSPC_CHECKPOINT_BYTES(count));
pfree(oldTas);
}
m = &(*tas)->maps[count];
m->filespaceoid = filespace;
m->tablespaceoid = tablespace;
if (Debug_persistent_recovery_print)
{
SUPPRESS_ERRCONTEXT_DECLARE;
SUPPRESS_ERRCONTEXT_PUSH();
elog(PersistentRecovery_DebugPrintLevel(),
"mmxlog_add_tablespace[%d]: add tablespace %u (filespace %u, caller '%s')",
count,
tablespace,
filespace,
caller);
SUPPRESS_ERRCONTEXT_POP();
}
(*tas)->count++;
}
void
mmxlog_add_database_init(
dbdir_agg_state **das, int *maxCount)
{
int len;
Assert (*das == NULL);
*maxCount = 10; // Start off with at least this much room.
len = DBDIR_CHECKPOINT_BYTES(*maxCount);
*das = (dbdir_agg_state*)palloc0(len);
}
void
mmxlog_add_database(
dbdir_agg_state **das, int *maxCount,
Oid database, Oid tablespace, char *caller)
{
int len;
int count;
dbdir_map *m;
Assert(*das != NULL);
Assert(*maxCount > 0);
count = (*das)->count;
Assert(count <= *maxCount);
if (count == *maxCount)
{
dbdir_agg_state *oldDas;
oldDas = *das;
(*maxCount) *= 2; // Double.
len = DBDIR_CHECKPOINT_BYTES(*maxCount);
*das = (dbdir_agg_state*)palloc0(len);
memcpy(*das, oldDas, DBDIR_CHECKPOINT_BYTES(count));
pfree(oldDas);
}
m = &(*das)->maps[count];
m->databaseoid = database;
m->tablespaceoid = tablespace;
if (Debug_persistent_recovery_print)
{
SUPPRESS_ERRCONTEXT_DECLARE;
SUPPRESS_ERRCONTEXT_PUSH();
elog(PersistentRecovery_DebugPrintLevel(),
"mmxlog_add_database[%d]: add database %u (tablespace %u, caller '%s')",
count,
database,
tablespace,
caller);
SUPPRESS_ERRCONTEXT_POP();
}
(*das)->count++;
}
/*
* Add filespace and tablespace OID => path maps to the checkpoint payload. This
* is used by the standby to construct a valid picture of the filespace and
* tablespace configuration without having to touch the persistent tables --
* which it cannot do since they're not guaranteed to be in a consistent state.
*
* NOTE: You must hold the PersistentObjLock before calling this routine!
*/
void
mmxlog_append_checkpoint_data(XLogRecData rdata[6])
{
fspc_agg_state *f;
tspc_agg_state *t;
dbdir_agg_state *d;
/*
* We must make sure no one traverses the rdata chain into uninitialised
* data if we exit early, below.
*/
rdata[1].next = NULL;
rdata[2].next = NULL;
rdata[3].next = NULL;
rdata[4].next = NULL;
if (gp_before_filespace_setup)
{
if (Debug_persistent_recovery_print)
{
SUPPRESS_ERRCONTEXT_DECLARE;
SUPPRESS_ERRCONTEXT_PUSH();
elog(PersistentRecovery_DebugPrintLevel(),
"mmxlog_append_checkpoint_data: no tablespace and filespace information for checkpoint because gp_before_filespace_setup GUC is true");
SUPPRESS_ERRCONTEXT_POP();
}
return;
}
if (AmStandbyMaster())
{
f = (fspc_agg_state *) palloc0(sizeof(*f));
t = (tspc_agg_state *) palloc0(sizeof(*t));
d = (dbdir_agg_state *) palloc0(sizeof(*d));
f->count = 0;
t->count = 0;
d->count = 0;
}
else
{
f = NULL;
get_filespace_data(&f, "mmxlog_append_checkpoint_data");
t = NULL;
get_tablespace_data(&t, "mmxlog_append_checkpoint_data");
d = NULL;
get_database_data(&d, "mmxlog_append_checkpoint_data");
}
rdata[2].data = (char*)f;
rdata[2].buffer = InvalidBuffer;
rdata[2].len = FSPC_CHECKPOINT_BYTES(f->count);
rdata[3].data = (char*)t;
rdata[3].buffer = InvalidBuffer;
rdata[3].len = TSPC_CHECKPOINT_BYTES(t->count);
rdata[4].data = (char*)d;
rdata[4].buffer = InvalidBuffer;
rdata[4].len = DBDIR_CHECKPOINT_BYTES(d->count);
rdata[1].next = &(rdata[2]);
rdata[2].next = &(rdata[3]);
rdata[3].next = &(rdata[4]);
if (Debug_persistent_recovery_print)
{
SUPPRESS_ERRCONTEXT_DECLARE;
SUPPRESS_ERRCONTEXT_PUSH();
elog(PersistentRecovery_DebugPrintLevel(),
"mmxlog_append_checkpoint_data: %d filespaces, %d tablespaces, %d databases checkpoint information",
f->count,
t->count,
d->count);
SUPPRESS_ERRCONTEXT_POP();
}
}
/*
* Return a pointer to the first section following the master/mirror checkpoint information
*/
char *
mmxlog_get_checkpoint_record_suffix(XLogRecord *checkpointRecord)
{
char *retPtr;
TMGXACT_CHECKPOINT *dtxCheckpoint;
fspc_agg_state *f;
tspc_agg_state *t;
dbdir_agg_state *d;
int dtxCheckpointLen;
int fLen;
int tLen;
int dLen;
dtxCheckpoint = (TMGXACT_CHECKPOINT *)(XLogRecGetData(checkpointRecord) + sizeof(CheckPoint));
dtxCheckpointLen = TMGXACT_CHECKPOINT_BYTES(dtxCheckpoint->committedCount);
f = (fspc_agg_state *)(((char*)dtxCheckpoint) + dtxCheckpointLen);
fLen = FSPC_CHECKPOINT_BYTES(f->count);
t = (tspc_agg_state *)(((char *)f) + fLen);
tLen = TSPC_CHECKPOINT_BYTES(t->count);
d = (dbdir_agg_state *)(((char *)t) + tLen);
dLen = DBDIR_CHECKPOINT_BYTES(d->count);
retPtr = ((char *)d) + dLen;
return retPtr;
} /* end mmxlog_get_checkpoint_record_suffix */
bool
mmxlog_get_checkpoint_info(char *cpdata, int masterMirroringLen, int checkpointLen, XLogRecPtr *beginLoc, int errlevel,
fspc_agg_state **f,
tspc_agg_state **t,
dbdir_agg_state **d)
{
int remainderLen;
int filespaceInfoLen;
int tablespaceInfoLen;
int databaseInfoLen;
SUPPRESS_ERRCONTEXT_DECLARE;
SUPPRESS_ERRCONTEXT_PUSH();
remainderLen = masterMirroringLen;
if (remainderLen < FSPC_CHECKPOINT_BYTES(0))
{
if (errlevel != -1)
ereport(errlevel,
(errmsg("Bad checkpoint record length %u (Master mirroring filespace information header: expected at least length %u, actual length %u) at location %s",
checkpointLen,
(uint32)FSPC_CHECKPOINT_BYTES(0),
remainderLen,
XLogLocationToString(beginLoc))));
SUPPRESS_ERRCONTEXT_POP();
return false;
}
*f = (fspc_agg_state*)cpdata;
filespaceInfoLen = FSPC_CHECKPOINT_BYTES((*f)->count);
if (Debug_persistent_recovery_print)
{
elog(PersistentRecovery_DebugPrintLevel(),
"mmxlog_get_checkpoint_info: Checkpoint record length %u, %d filespaces, filespaceInfoLen %d, remainder length %d, location %s",
checkpointLen,
(*f)->count,
filespaceInfoLen,
remainderLen,
XLogLocationToString(beginLoc));
}
if (remainderLen < filespaceInfoLen)
{
if (errlevel != -1)
ereport(errlevel,
(errmsg("Bad checkpoint record length %u (Master mirroring filesapce information: expected at least length %u, actual length %u, count %d) at location %s",
checkpointLen,
filespaceInfoLen,
remainderLen,
(*f)->count,
XLogLocationToString(beginLoc))));
SUPPRESS_ERRCONTEXT_POP();
return false;
}
remainderLen -= filespaceInfoLen;
if (remainderLen < TSPC_CHECKPOINT_BYTES(0))
{
if (errlevel != -1)
ereport(errlevel,
(errmsg("Bad checkpoint record length %u (Master mirroring tablespace information header: expected at least length %u, actual length %u) at location %s",
checkpointLen,
(uint32)TSPC_CHECKPOINT_BYTES(0),
remainderLen,
XLogLocationToString(beginLoc))));
SUPPRESS_ERRCONTEXT_POP();
return false;
}
*t = (tspc_agg_state*) (cpdata + filespaceInfoLen);
tablespaceInfoLen = TSPC_CHECKPOINT_BYTES((*t)->count);
if (Debug_persistent_recovery_print)
{
elog(PersistentRecovery_DebugPrintLevel(),
"mmxlog_get_checkpoint_info: Checkpoint record length %u, %d tablespaces, tablespaceInfoLen %d, remainder length %d, location %s",
checkpointLen,
(*t)->count,
filespaceInfoLen,
remainderLen,
XLogLocationToString(beginLoc));
}
if (remainderLen < tablespaceInfoLen)
{
if (errlevel != -1)
ereport(errlevel,
(errmsg("Bad checkpoint record length %u (Master mirroring tablesapce information: expected at least length %u, actual length %u, count %d) at location %s",
checkpointLen,
tablespaceInfoLen,
remainderLen,
(*t)->count,
XLogLocationToString(beginLoc))));
SUPPRESS_ERRCONTEXT_POP();
return false;
}
remainderLen -= tablespaceInfoLen;
if (remainderLen < DBDIR_CHECKPOINT_BYTES(0))
{
if (errlevel != -1)
ereport(errlevel,
(errmsg("Bad checkpoint record length %u (Master mirroring database directory information header: expected at least length %u, actual length %u) at location %s",
checkpointLen,
(uint32)DBDIR_CHECKPOINT_BYTES(0),
remainderLen,
XLogLocationToString(beginLoc))));
SUPPRESS_ERRCONTEXT_POP();
return false;
}
*d = (dbdir_agg_state*) (cpdata + filespaceInfoLen + tablespaceInfoLen);
databaseInfoLen = DBDIR_CHECKPOINT_BYTES((*d)->count);
if (Debug_persistent_recovery_print)
{
elog(PersistentRecovery_DebugPrintLevel(),
"mmxlog_get_checkpoint_info: Checkpoint record length %u, %d databases, databaseInfoLen %d, remainder length %d, location %s",
checkpointLen,
(*d)->count,
databaseInfoLen,
remainderLen,
XLogLocationToString(beginLoc));
}
remainderLen -= databaseInfoLen;
if (remainderLen == 0)
elog(WARNING,"mmxlog_get_checkpoint_info: The checkpoint at %s appears to be a 4.0 checkpoint", XLogLocationToString(beginLoc));
else if (remainderLen < 0)
{
if (errlevel != -1)
ereport(errlevel,
(errmsg("Bad checkpoint record length %u (Master mirroring database directory information: expected length %u, actual length %u, count %d) at location %s",
checkpointLen,
databaseInfoLen,
remainderLen,
(*d)->count,
XLogLocationToString(beginLoc))));
SUPPRESS_ERRCONTEXT_POP();
return false;
}
SUPPRESS_ERRCONTEXT_POP();
return true;
} /* mmxlog_get_checkpoint_info */
bool
mmxlog_verify_checkpoint_info(char *cpdata, int masterMirroringLen, int checkpointLen, XLogRecPtr *beginLoc, int errlevel)
{
fspc_agg_state *f;
tspc_agg_state *t;
dbdir_agg_state *d;
return mmxlog_get_checkpoint_info(cpdata, masterMirroringLen, checkpointLen, beginLoc, errlevel, &f, &t, &d);
}
/*
* If we're on the master standby, we expect to receive filespace and tablespace
* meta data from a checkpoint.
*/
bool
mmxlog_get_checkpoint_counts(char *cpdata, int masterMirroringLen, int checkpointLen, XLogRecPtr *beginLoc, int errlevel, int *filespaceCount, int *tablespaceCount, int *databaseCount)
{
fspc_agg_state *f = NULL;
tspc_agg_state *t = NULL;
dbdir_agg_state *d = NULL;
*filespaceCount = 0;
*tablespaceCount = 0;
*databaseCount = 0;
Assert(cpdata != NULL);
if (!mmxlog_get_checkpoint_info(cpdata, masterMirroringLen, checkpointLen, beginLoc, errlevel, &f, &t, &d))
return false;
*filespaceCount = f->count;
*tablespaceCount = t->count;
*databaseCount = t->count;
return true;
}
/*
* If we're on the master standby, we expect to receive filespace and tablespace
* meta data from a checkpoint.
*/
void
mmxlog_read_checkpoint_data(char *cpdata, int masterMirroringLen, int checkpointLen, XLogRecPtr *beginLoc)
{
fspc_agg_state *f = NULL;
tspc_agg_state *t = NULL;
dbdir_agg_state *d = NULL;
fspc_map *fmap;
tspc_map *tmap;
dbdir_map *dmap;
int i;
Assert(cpdata != NULL);
if (!GPStandby())
return;
mmxlog_get_checkpoint_info(cpdata, masterMirroringLen, checkpointLen, beginLoc, PANIC, &f, &t, &d);
/*
* Push the data down into the hash tables. We calculate the array length
* from the byte length of the array. We need to do filespaces first as
* they are the root of the space hierarchy.
*/
fmap = &(f->maps[0]);
for (i = 0; i < f->count; i++)
{
fspc_map *m = &(fmap[i]);
if (m->filespaceoid == InvalidOid)
elog(ERROR, "bad filespace checkpoint information for entry %d", i);
add_filespace_map_entry(m, beginLoc, "mmxlog_read_checkpoint_data");
if (mkdir(fmap[i].path2, 0700) == 0)
{
if (Debug_persistent_recovery_print)
{
elog(PersistentRecovery_DebugPrintLevel(),
"mmxlog_read_checkpoint_data: Re-created filespace directory \"%s\"",
fmap[i].path2);
}
}
else
{
/*
* Allowed to already exist.
*/
if (errno != EEXIST)
{
elog(ERROR, "could not create filespace directory \"%s\": %m",
fmap[i].path2);
}
else
{
if (Debug_persistent_recovery_print)
{
elog(PersistentRecovery_DebugPrintLevel(),
"mmxlog_read_checkpoint_data: Filespace directory \"%s\" already exists",
fmap[i].path2);
}
}
}
}
tmap = &(t->maps[0]);
for (i = 0; i < t->count; i++)
{
char path[MAXPGPATH];
char *tmp;
tspc_map *m = &(tmap[i]);
if (m->tablespaceoid == InvalidOid)
elog(ERROR, "bad tablespace checkpoint information for entry %d", i);
add_tablespace_map_entry(m, beginLoc, "mmxlog_read_checkpoint_data");
if (!mmxlog_filespace_get_path(
m->filespaceoid,
&tmp))
{
elog(ERROR, "cannot find filespace path for filespace OID %u (tablespace %u)",
m->filespaceoid, m->tablespaceoid);
}
snprintf(path, sizeof(path), "%s/%u", tmp, m->tablespaceoid);
if (mkdir(path, 0700) == 0)
{
if (Debug_persistent_recovery_print)
{
elog(PersistentRecovery_DebugPrintLevel(),
"mmxlog_read_checkpoint_data: Re-created tablespace directory \"%s\"",
path);
}
}
else
{
/*
* Allowed to already exist.
*/
if (errno != EEXIST)
{
elog(ERROR, "could not create tablespace directory \"%s\": %m",
path);
}
else
{
if (Debug_persistent_recovery_print)
{
elog(PersistentRecovery_DebugPrintLevel(),
"mmxlog_read_checkpoint_data: Tablespace directory \"%s\" already exists",
path);
}
}
}
pfree(tmp);
}
dmap = &(d->maps[0]);
for (i = 0; i < d->count; i++)
{
char path[MAXPGPATH];
dbdir_map *m = &(dmap[i]);
Oid filespaceoid;
if (m->databaseoid == InvalidOid)
elog(ERROR, "bad database directory checkpoint information for entry %d", i);
// add_tablespace_map_entry(m, beginLoc, "mmxlog_read_checkpoint_data");
if (m->tablespaceoid == GLOBALTABLESPACE_OID)
elog(ERROR, "should not have the global tablespace in the database directory entries");
if (m->tablespaceoid == DEFAULTTABLESPACE_OID)
snprintf(path, sizeof(path), "base/%u", m->databaseoid);
else
{
char *tmp;
if (!mmxlog_tablespace_get_filespace(
m->tablespaceoid,
&filespaceoid))
{
elog(ERROR, "cannot find filespace OID for tablespace %u",
m->tablespaceoid);
}
if (!mmxlog_filespace_get_path(
filespaceoid,
&tmp))
{
elog(ERROR, "cannot find filespace path for filespace OID %u (tablespace %u)",
filespaceoid, m->tablespaceoid);
}
snprintf(path, sizeof(path), "%s/%u/%u", tmp, m->tablespaceoid, m->databaseoid);
pfree(tmp);
}
if (mkdir(path, 0700) == 0)
{
if (Debug_persistent_recovery_print)
{
elog(PersistentRecovery_DebugPrintLevel(),
"mmxlog_read_checkpoint_data: Re-created database directory \"%s\"",
path);
}
}
else
{
/*
* Allowed to already exist.
*/
if (errno != EEXIST)
{
elog(ERROR, "could not create database directory \"%s\": %m",
path);
}
else
{
if (Debug_persistent_recovery_print)
{
elog(PersistentRecovery_DebugPrintLevel(),
"mmxlog_read_checkpoint_data: Database directory \"%s\" already exists",
path);
}
}
}
}
if (Debug_persistent_recovery_print)
{
elog(PersistentRecovery_DebugPrintLevel(),
"mmxlog_read_checkpoint_data: %d filespaces, %d tablespaces, %d databases (beginLoc %s)",
f->count,
t->count,
d->count,
XLogLocationToString(beginLoc));
}
}
/*
* Given a tablespace OID, get the master and mirror filespace paths.
*/
static void
tblspc_get_filespace_paths(Oid tblspc, char **path)
{
/*
* Built in tablespaces are not known by the PersistentTablespace code
* so we need to handle them here.
*/
if (tblspc == GLOBALTABLESPACE_OID ||
tblspc == DEFAULTTABLESPACE_OID)
{
*path = NULL;
/* short circuit */
return;
}
PersistentTablespace_GetFilespacePath(
tblspc,
FALSE,
path);
}