blob: c52f0b94ab6495e1a0d5cd276f9697159a6d789c [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
* Manages transitions between primary/mirror modes.
*
*/
#include "postgres.h"
#include "postmaster/postmaster.h"
#include "postmaster/primary_mirror_mode.h"
#include "storage/pmsignal.h"
#include "storage/shmem.h"
#include "storage/spin.h"
#include "miscadmin.h"
#include "access/xlog.h"
#include "utils/guc.h"
#include "utils/netcheck.h"
#include "catalog/pg_filespace.h"
#include "cdb/cdbpersistentfilespace.h"
#include "cdb/cdbvars.h"
#include <string.h>
#include "access/slru.h"
#include "access/xlog_internal.h"
#include <unistd.h>
#include <sys/stat.h>
#define BUFFER_LEN (2 * (MAXPGPATH))
/**
* todo MIRRORING Review setup of signal handlers here --
* if we are inside a transition will we quit out properly on shutdown?
*/
/**
* The transition state is used when the postmaster needs to return back to the server loop.
* The primary/mirror mode function will mark its transition as being in-progress...
* later calls to doRequestedPrimaryMirrorModeTransition will check this field.
*
* Note: if the order or # of values is changed then update the labels below.
*
* this implements a state machine. Each transition (like to-primary-segment, or to-mirror-segment)
* manages its own transition order through this machine.
*
* Note that some places (notably, getFileRepRoleAndState) rely on the numerical order of these
* for implementation. Be careful of that when changing this.
*/
typedef enum
{
TSNone = 0,
/* for two-phase recovery (where filerep goes into changelogging before database startup and
* needs to be notified after the database is up) */
TSDoDatabaseStartup,
TSDoneDatabaseStartup,
TSDone,
/* note: these should be in sync with getTransitionStateLabel */
/* the number of elements in the enumeration */
TS__EnumerationCount
} TransitionState;
/* State management */
static void assertModuleInitialized(void);
/**
* Shared globals for this module live in a single instance of PMModuleState.
*/
typedef struct PMModuleState
{
/** all accesses to the isTransitionRequested and requestedTransition must lock on this */
slock_t lock;
/* TRANSITION INFORMATION */
/**
* Has a transition been requested (and not yet fulfilled completed by the postmaster)?
*/
volatile bool isTransitionRequested;
/**
* The requested transition.
*/
volatile PrimaryMirrorModeTransitionArguments requestedTransition;
/**
* The result of the transition; only valid when isTransitionRequested is false (postmaster done) and
* isTransitionListenerFinished is also false (backend has not yet received this transitionResult
*/
volatile PrimaryMirrorModeTransitionResult transitionResult;
/**
* The extra information about the transition result; used to report more info
* back to the caller (such as to differentiate a mirroring failure from other failure cases)
*/
volatile char transitionResultExtraInfo[MAX_TRANSITION_RESULT_EXTRA_INFO];
/**
* For transitions that require the postmaster to do multiple steps of work, this will hold the current
* progress point. Only valid while isTransitionRequested is true
*/
volatile TransitionState transitionState;
/**
* Has the postmaster copied the currently requested transition to its local memory yet?
*/
volatile bool transitionCopiedToPMLocalMem;
/* FILE REPLICATION PARAMETERS */
volatile PrimaryMirrorMode mode;
volatile SegmentState_e segmentState;
volatile DataState_e dataState;
volatile FaultType_e faultType;
/* connection info for self (which can be a mirror or primary, depending on mode) */
volatile char hostAddress[PM_MAX_HOST_NAME_LENGTH+1];
/* file replication port on self */
volatile int hostPort;
/* connection info for the peer (which can be a mirror or primary, depending on mode) */
volatile char peerAddress[PM_MAX_HOST_NAME_LENGTH+1];
/* file replication port on self */
volatile int peerPort;
/* OTHER FIELDS */
volatile bool haveStartedDatabase;
/** used by resync processes, this field will be kept up-to-date by the postmaster */
volatile pid_t bgWriterPID;
/**
* a counter used to assign values to transitions; that values are for logging.
*/
volatile int transitionNumberCounter;
/**
* if true, means we are in fault after postmaster reset and waiting for a transition to be sent
*/
volatile bool isInFaultFromPostmasterReset;
/* Filespace details for temp files */
slock_t tempFilespaceLock;
volatile int numOfFilespacePath;
volatile int numOfValidFilespacePath;
volatile int lastAllocateIdx;
volatile char **tempFilespacePath;
volatile bool *tempFilespacePathHasError;
volatile bool isUsingDefaultFilespaceForTempFiles;
/* Filespace details for transaction files */
volatile char txnFilespacePath[MAXPGPATH];
volatile char peerTxnFilespacePath[MAXPGPATH];
volatile Oid txnFilespaceOID;
volatile bool isUsingDefaultFilespaceForTxnFiles;
} PMModuleState;
static volatile PMModuleState *pmModuleState = NULL;
/**
* Local globals live in a single PMLocalState instance
*/
typedef struct PMLocalState
{
/**
* The most recent transition applied. This variable is only valid
* in the postmaster process; it is used to save around transitions
* for later postmaster reset.
*
* Note also that the dataState value in this transition MAY CHANGE because of an internal transition
* to resync!
*/
PrimaryMirrorModeTransitionArguments mostRecentTransition;
/**
* See mostRecentTransition
*/
bool haveSetMostRecentTransition;
/**
* Used if the reset of the filerep peer cannot be done to indicate that the postmaster reset should
* transition to fault rather than trying a peer postmaster reset
*/
bool doNextResetToFault;
/**
* Was the most recent (non-fault) segment state "change tracking disabled"?
*/
bool mostRecentSegmentStateWasChangeTrackingDisabled;
/**
* only valid when between calls to
* acquireModuleSpinLockAndMaybeStartCriticalSection and releaseModuleSpinLockAndEndCriticalSection
* tells the value that was passed to acquireModuleSpinLockAndMaybeStartCriticalSection
*/
bool criticalSectionStarted;
} PMLocalState;
PMLocalState pmLocalState;
bool gHaveInitializedLocalState = false;
static int isAbsolutePath(const char *path);
static bool TemporaryDirectoryIsOk(const char *path);
/**
* Extra result information that can be returned to the calling script so it
* can choose to take a different action besides just standard, generic handling
*/
#define RESULT_INFO_MIRRORING_FAILURE "MirroringFailure"
#define RESULT_INFO_POSTMASTER_DIED "PostmasterDied"
#define RESULT_INFO_IN_SHUTDOWN "ServerIsInShutdown"
#define RESULT_INFO_INVALID_STATE_TRANSITION "InvalidStateTransition"
/*
* getNumOfTempFilespacePaths
* This function will fill up the share memory structure and
* If computeNumberOnly is true, this function will check the file format
* correctness and return the number of temporary filespace paths only.
*/
static int
getNumOfTempFilespacePaths(bool computeNumberOnly)
{
char buffer[BUFFER_LEN];
FILE *fp = NULL;
int numOfTempFilespacePaths = 0;
/* Read the temp directories details from gp_temporary_files_directories flat file */
fp = fopen(TEMPFILES_DIRECTORIES_FLATFILE, "r");
if (fp)
{
while (true)
{
MemSet(buffer, 0, BUFFER_LEN);
if (fgets(buffer, BUFFER_LEN, fp) == NULL)
break;
buffer[strlen(buffer) - 1] = '\0';
ereport(LOG, (errmsg("get temporary directory: %s", buffer)));
numOfTempFilespacePaths++;
if (computeNumberOnly)
continue;
/* Populate the temp paths */
strcpy((char *) pmModuleState->tempFilespacePath[numOfTempFilespacePaths - 1], buffer);
/* If we have a valid path name, we mark that we are not using default filespace. */
if (pmModuleState->tempFilespacePath[numOfTempFilespacePaths - 1][0])
pmModuleState->isUsingDefaultFilespaceForTempFiles = 0;
}
fclose(fp);
}
else
{
/* Don't make palloc sad. */
numOfTempFilespacePaths = 1;
if (!computeNumberOnly)
{
ereport(LOG, (errmsg("temporary files using default location")));
}
}
return numOfTempFilespacePaths;
}
/**
* LABELS and parsing of values
*/
/**
* Get the label for a primary mirror mode. Returns a pointer to static memory.
*
* Note that the label is externally visible (through a getStatus call to the postmaster, for example)
*
* note also that some python code (as used by gpstate) may use these label values, so when changing them
* be sure to search the management scripts code.
*/
const char *
getMirrorModeLabel(PrimaryMirrorMode mode)
{
static const char *labels[] =
{
"Uninitialized", "Master", "Standby", "MirrorlessSegment"
};
COMPILE_ASSERT(ARRAY_SIZE(labels) == PMMode__EnumerationCount);
return labels[mode];
}
/**
* Get the label for a data state. Returns a pointer to static memory.
*
* Note that the label is externally visible (through a getStatus call to the postmaster, for example)
*
* note also that some python code (as used by gpstate) may use these label values, so when changing them
* be sure to search the management scripts code.
*/
const char *
getDataStateLabel(DataState_e state)
{
static const char *labels[] =
{
"NotInitialized", "InSync", "InChangeTracking"
};
COMPILE_ASSERT(ARRAY_SIZE(labels) == DataState__EnumerationCount);
return labels[state];
}
/**
* Note that the label is externally visible (through a getStatus call to the postmaster, for example)
*
* note also that some python code (as used by gpstate) may use these label values, so when changing them
* be sure to search the management scripts code.
*/
const char *
getSegmentStateLabel(SegmentState_e state)
{
static const char *labels[] =
{
"NotInitialized", "Initialization", "InChangeTrackingTransition", "InResyncTransition",
"InSyncTransition", "Ready", "ChangeTrackingDisabled", "Fault",
"ShutdownBackends", "Shutdown", "ImmediateShutdown"
};
COMPILE_ASSERT(ARRAY_SIZE(labels) == SegmentState__EnumerationCount);
return labels[state];
}
/**
* Note that the label is externally visible (through a getStatus call to the postmaster, for example)
*
* note also that some python code (as used by gpstate) may use these label values, so when changing them
* be sure to search the management scripts code.
*/
const char *
getFaultTypeLabel(FaultType_e faultType)
{
static const char *labels[] =
{
"NotInitialized",
"FaultIO",
"FaultDB",
"FaultNet",
};
COMPILE_ASSERT(ARRAY_SIZE(labels) == FaultType__EnumerationCount);
return labels[faultType];
}
/**
* Get the label for a transition result. Returns a pointer to static memory.
*
* Note that scripts use these values in strings that may be parsed, so please keep
* them as alphabetic values only
*/
const char *
getTransitionResultLabel(PrimaryMirrorModeTransitionResult res)
{
static const char *labels[] =
{
"Success", "InvalidTargetMode", "OtherTransitionInProgress",
"InvalidInputParameter", "Error", "OkayButStillInProgress"
};
COMPILE_ASSERT(ARRAY_SIZE(labels) == PMTransition__EnumerationCount);
return labels[res];
}
/**
* Translate PrimaryMirrorMode to character compatible with 'role', as stored in gp_segment_configuration
*/
char getRole(PrimaryMirrorMode mode)
{
Assert(mode >= PMModeUninitialized && mode < PMMode__EnumerationCount);
switch (mode)
{
case PMModeMaster:
case PMModeMirrorlessSegment:
return 'p';
case PMModeUninitialized:
return '-';
default:
Assert(!"Invalid role");
return '\0';
}
}
/**
* Translate DataState_e to character compatible with 'mode', as stored in gp_segment_configuration
*/
char getMode(DataState_e state)
{
Assert(state >= DataStateNotInitialized && state < DataState__EnumerationCount);
switch (state)
{
case DataStateInSync:
return 's';
case DataStateNotInitialized:
return '-';
default:
Assert(!"Invalid mode");
return '\0';
}
}
/**
* Convert the given string name for a primary mirror mode to a mode. Note
* that any unparseable arg value returns PMModeUninitialized.
*
* @param arg should be non-NULL
*/
PrimaryMirrorMode
decipherPrimaryMirrorModeArgument(const char *arg)
{
if (strcmp("master", arg) == 0)
return PMModeMaster;
else if (strcmp("segment", arg) == 0 || strcmp("mirrorless", arg) == 0 )
return PMModeMirrorlessSegment;
else if (strcmp("standby", arg) == 0)
return PMModeStandby;
else
return PMModeUninitialized;
}
/**
* acquire the module spin lock that is used to protect the contents of pmModuleState,
* and perhaps start a critical section (START_CRIT_SECTION), according to the parameter startCriticalSection
*/
static void
acquireModuleSpinLockAndMaybeStartCriticalSection(bool startCriticalSection)
{
Assert(!pmLocalState.criticalSectionStarted);
if (startCriticalSection)
{
pmLocalState.criticalSectionStarted = true;
START_CRIT_SECTION();
}
SpinLockAcquire(&pmModuleState->lock);
}
/**
* release the module spin lock that is used to protect the contents of pmModuleState
*/
static void
releaseModuleSpinLockAndEndCriticalSectionAsNeeded(void)
{
SpinLockRelease(&pmModuleState->lock);
if (pmLocalState.criticalSectionStarted)
{
pmLocalState.criticalSectionStarted = false;
END_CRIT_SECTION();
}
}
static void
clearTransitionArgs(volatile PrimaryMirrorModeTransitionArguments *args)
{
args->mode = PMModeUninitialized;
args->dataState = DataStateNotInitialized;
args->hostAddress[0] = '\0';
args->hostPort = 0;
args->peerAddress[0] = '\0';
args->peerPort = 0;
args->transitionToFault = false;
args->transitionToChangeTrackingDisabledMode = false;
}
PrimaryMirrorModeTransitionArguments *
createNewTransitionArguments(void)
{
PrimaryMirrorModeTransitionArguments *result = palloc(sizeof(PrimaryMirrorModeTransitionArguments));
clearTransitionArgs(result);
return result;
}
static void
assertModuleInitialized(void)
{
Assert(pmModuleState != NULL && "init must be called first");
}
void
getPrimaryMirrorStatusCodes(PrimaryMirrorMode *pm_mode,
SegmentState_e *s_state,
DataState_e *d_state,
FaultType_e *f_type)
{
acquireModuleSpinLockAndMaybeStartCriticalSection(false);
{
if (pm_mode)
*pm_mode = pmModuleState->mode;
if (s_state)
*s_state = pmModuleState->segmentState;
if (d_state)
*d_state = pmModuleState->dataState;
if (f_type)
*f_type = pmModuleState->faultType;
}
releaseModuleSpinLockAndEndCriticalSectionAsNeeded();
}
void
getPrimaryMirrorModeStatus(char *bufOut, int bufSize)
{
PrimaryMirrorMode pm_mode;
SegmentState_e s_state;
DataState_e d_state;
FaultType_e f_type;
assertModuleInitialized();
getPrimaryMirrorStatusCodes(&pm_mode, &s_state, &d_state, &f_type);
snprintf(bufOut, bufSize, "mode: %s\n""segmentState: %s\n""dataState: %s\n""faultType: %s",
getMirrorModeLabel(pm_mode), getSegmentStateLabel(s_state), getDataStateLabel(d_state), getFaultTypeLabel(f_type));
}
/**
* While holding the module spin lock, return whether or not we are in a valid transition.
*
* This is more than just checking pmModulestate->isTransitionRequested because that requested transition
* may be illegal
*/
static bool
isInValidTransition_UnderLock(void)
{
return
pmModuleState->isTransitionRequested && pmModuleState->transitionState >= TSDoDatabaseStartup;
}
/**
*
* Fetch the current file and target replication role and states. Returns values through the pointer arguments.
*
* Note that any of the arguments may be NULL in which case that piece of information is not returned.
*
* @param fileRepRoleOut may be NULL. If non-null, *fileRepRoleOut is filled in with the current filerep role
* @param segmentStateOut may be NULL. If non-null, *segmentStateOut is filled in with the current segment state
* @param dataStateOut may be NULL. If non-null, *dataStateOut is filled in with the current segment state
* @param isInFilerepTransitionOut may be NULL. If non-null, *isInTransitionOut is filled in with true or false depending on
* if the segment is in filerep transition to primary or mirror mode
* @param transitionTargetDataStateOut may be NULL. If non-null, *transitionTargetDataStateOut is filled
* in with the target data state, or DataStateNotInitialized if there is
* no transition to primary or mirror mode is in progress.
*/
void
getFileRepRoleAndState(
FileRepRole_e *fileRepRoleOut,
SegmentState_e *segmentStateOut,
DataState_e *dataStateOut,
bool *isInFilerepTransitionOut,
DataState_e *transitionTargetDataStateOut
)
{
assertModuleInitialized();
FileRepRole_e fileRepRole;
SegmentState_e segmentState;
DataState_e dataState;
bool isInFilerepTransition;
DataState_e transitionTargetDataState;
acquireModuleSpinLockAndMaybeStartCriticalSection(false);
{
/* check to see if postmaster is still processing a transition. Note that here we don't
* need to check to see if the listener is complete
**/
isInFilerepTransition = isInValidTransition_UnderLock();
fileRepRole = FileRepNoRoleConfigured;
dataState = DataStateNotInitialized;
segmentState = SegmentStateNotInitialized;
transitionTargetDataState = DataStateNotInitialized;
}
releaseModuleSpinLockAndEndCriticalSectionAsNeeded();
/* now fill in output! */
if (fileRepRoleOut)
*fileRepRoleOut = fileRepRole;
if (segmentStateOut)
*segmentStateOut = segmentState;
if (dataStateOut)
*dataStateOut = dataState;
if (isInFilerepTransitionOut)
*isInFilerepTransitionOut = isInFilerepTransition;
if (transitionTargetDataStateOut)
*transitionTargetDataStateOut = transitionTargetDataState;
}
/**
* NOTE: in some cases the caller should also call isInFaultFromPostmasterReset() to check to
* make sure the database will be started/should be started. When in fault from postmaster reset,
* database processes should not be started.
*/
bool
isPrimaryMirrorModeAFullPostmaster(bool checkTargetModeAsWell)
{
bool result;
assertModuleInitialized();
/* the locking here may not be technically necessary */
acquireModuleSpinLockAndMaybeStartCriticalSection(false);
result = pmModuleState->mode == PMModeStandby ||
pmModuleState->mode == PMModeMaster ||
pmModuleState->mode == PMModeMirrorlessSegment;
releaseModuleSpinLockAndEndCriticalSectionAsNeeded();
return result;
}
bool
isInFaultFromPostmasterReset()
{
bool result;
assertModuleInitialized();
/* the locking here may not be technically necessary */
acquireModuleSpinLockAndMaybeStartCriticalSection(false);
{
result = pmModuleState->isInFaultFromPostmasterReset;
}
releaseModuleSpinLockAndEndCriticalSectionAsNeeded();
return result;
}
Size
primaryMirrorModeShmemSize(void)
{
Size size;
int numOfPaths = getNumOfTempFilespacePaths(true);
size = sizeof(PMModuleState);
/* temp filespace path */
size += sizeof(char *) * numOfPaths;
size += sizeof(char) * MAXPGPATH * numOfPaths;
size += sizeof(bool) * numOfPaths;
return size;
}
void
primaryMirrorModeShmemInit(void)
{
int i;
if (!gHaveInitializedLocalState)
{
/* only initialize local state once */
pmLocalState.haveSetMostRecentTransition = false;
pmLocalState.mostRecentSegmentStateWasChangeTrackingDisabled = false;
pmLocalState.criticalSectionStarted = false;
gHaveInitializedLocalState = true;
}
pmModuleState = (PMModuleState *) ShmemAlloc(sizeof(PMModuleState));
pmModuleState->isTransitionRequested = false;
pmModuleState->transitionCopiedToPMLocalMem = false;
pmModuleState->mode = PMModeUninitialized;
pmModuleState->segmentState = SegmentStateNotInitialized;
pmModuleState->dataState = DataStateNotInitialized;
pmModuleState->faultType = FaultTypeNotInitialized;
pmModuleState->transitionState = TSNone;
pmModuleState->transitionResult = PMTransitionSuccess;
pmModuleState->hostAddress[0] = '\0';
pmModuleState->hostPort = 0;
pmModuleState->peerAddress[0] = '\0';
pmModuleState->peerPort = 0;
pmModuleState->transitionResultExtraInfo[0] = '\0';
pmModuleState->haveStartedDatabase = false;
pmModuleState->bgWriterPID = 0;
pmModuleState->transitionNumberCounter = 1;
pmModuleState->isInFaultFromPostmasterReset = false;
/* temp filespace path */
pmModuleState->numOfFilespacePath = getNumOfTempFilespacePaths(true);
pmModuleState->numOfValidFilespacePath = pmModuleState->numOfFilespacePath;
Assert(pmModuleState->numOfFilespacePath > 0);
pmModuleState->tempFilespacePath = (volatile char **) ShmemAlloc(sizeof(char *) * pmModuleState->numOfFilespacePath);
pmModuleState->tempFilespacePathHasError = (volatile bool *) ShmemAlloc(sizeof(bool) * pmModuleState->numOfFilespacePath);
for (i = 0; i < pmModuleState->numOfFilespacePath; i++)
{
pmModuleState->tempFilespacePath[i] = (char *) ShmemAlloc(sizeof(char) * MAXPGPATH);
pmModuleState->tempFilespacePath[i][0] = '\0';
pmModuleState->tempFilespacePathHasError[i] = false;
}
/* Will be changed in primaryMirrorPopulateFilespaceInfo(). */
pmModuleState->isUsingDefaultFilespaceForTempFiles = 1;
primaryMirrorPopulateFilespaceInfo();
pmModuleState->txnFilespacePath[0] = '\0';
pmModuleState->peerTxnFilespacePath[0] = '\0';
pmModuleState->txnFilespaceOID = SYSTEMFILESPACE_OID;
pmModuleState->isUsingDefaultFilespaceForTxnFiles = 1;
clearTransitionArgs(&pmModuleState->requestedTransition);
SpinLockInit(&pmModuleState->lock);
SpinLockInit(&pmModuleState->tempFilespaceLock);
}
/**
* Reset all necessary parameters EXCEPT for the requested transition itself. Caller must do that.
*
* Will be used for setting initial mode (on startup) and handling postmaster reset
*
* The only requestedTransition field reset is the transitionNumber
*/
static void
resetModuleStateForInitialTransition(void)
{
pmModuleState->isTransitionRequested = true;
pmModuleState->transitionCopiedToPMLocalMem = false;
pmModuleState->transitionState = TSNone;
pmModuleState->requestedTransition.transitionNumber = pmModuleState->transitionNumberCounter++;
}
void
setInitialRequestedPrimaryMirrorMode(PrimaryMirrorMode targetMode)
{
Assert(pmModuleState->mode == PMModeUninitialized);
pmModuleState->requestedTransition.mode = targetMode;
pmModuleState->mode = targetMode;
resetModuleStateForInitialTransition();
}
/**
* Mark that after postmaster reset (as processed by primaryMirrorHandlePostmasterReset)
* segment should transition to mirroring fault state
*/
void
setFaultAfterReset(bool val)
{
pmLocalState.doNextResetToFault = val;
}
/**
* Update shared memory to transition segment to mirroring fault
*/
void
setTransitionToFault()
{
/* mark mirroring fault if segment is not marked to transition to change-tracking */
Assert(pmModuleState->requestedTransition.dataState != DataStateInChangeTracking);
pmModuleState->requestedTransition.transitionToFault = true;
}
/**
* Get flag from shared memory indicating transition to mirroring fault
*/
bool
getTransitionToFault()
{
return pmModuleState->requestedTransition.transitionToFault;
}
/**
* Check if one of the NICs where the primary and mirror are attached is not active
*/
bool
primaryMirrorCheckNICFailure()
{
char localHost[sizeof(pmModuleState->hostAddress)];
char peerHost[sizeof(pmModuleState->peerAddress)];
acquireModuleSpinLockAndMaybeStartCriticalSection(true);
{
/* get local and peer host names */
strncpy(localHost, (const char *) pmModuleState->hostAddress, sizeof(localHost));
strncpy(peerHost, (const char *) pmModuleState->peerAddress, sizeof(peerHost));
}
releaseModuleSpinLockAndEndCriticalSectionAsNeeded();
/*
* if primary and mirror are uninitialized or co-located,
* there is no NIC redundancy
*/
if (0 == strcmp(localHost, peerHost))
{
return false;
}
int running = 0;
if (NetCheckNIC(localHost))
{
running++;
}
if (NetCheckNIC(peerHost))
{
running++;
}
Assert(running > 0);
/*
* report only a single NIC failure;
* double failures indicate errors in NIC configuration retrieval
*/
return (running == 1);
}
/**
* Called on postmaster reset, after all children have been shut down but before starting the children back up.
* Returns true if the postmaster should try to reset the peer before doing a reset.
*/
bool
primaryMirrorPostmasterResetShouldRestartPeer(void)
{
Assert(pmLocalState.haveSetMostRecentTransition);
/* if we're a primary AND we were in resync/sync then try to restart in that mode */
return false;
}
/**
* Handle a postmaster reset. This is called AFTER shared memory has been reset.
*
* This function will use the locally stored data from the most recently requested transition
* in order to transition.
*/
void
primaryMirrorHandlePostmasterReset(void)
{
bool transitionToFault = false;
Assert(pmModuleState->mode == PMModeUninitialized);
Assert(pmLocalState.haveSetMostRecentTransition);
elog(LOG,
"PrimaryMirrorMode: Processing postmaster reset with recent mode of %d",
pmLocalState.mostRecentTransition.mode);
transitionToFault = false;
pmLocalState.doNextResetToFault = false;
elog(LOG,
"PrimaryMirrorMode: Processing postmaster reset to %s state",
transitionToFault ? "fault" : "non-fault");
pmModuleState->requestedTransition = pmLocalState.mostRecentTransition;
pmModuleState->requestedTransition.transitionToFault = transitionToFault;
/* come after assigning requestedTransition, so we get the transition number set */
resetModuleStateForInitialTransition();
}
/**
* To be called by the postmaster. The answer may become true immediately
* after this call but shouldn't (because postmaster is the only one who resets it)
* become false
*
* Does not acquire the lock as we are only peeking.
*/
bool
isPrimaryMirrorModeTransitionRequested(void)
{
return pmModuleState->isTransitionRequested;
}
/*
* Copy most recent transition request from shared memory to local memory
*/
void copyTransitionParametersToLocalMemory()
{
/* this is called before resetting shared memory so no locking is required */
pmLocalState.mostRecentTransition = pmModuleState->requestedTransition;
pmLocalState.haveSetMostRecentTransition = true;
pmModuleState->transitionCopiedToPMLocalMem = true;
}
void primaryMirrorRecordSegmentStateToPostmasterLocalMemory(void)
{
Assert(!IsUnderPostmaster);
SegmentState_e segmentState = pmModuleState->segmentState;
switch (segmentState)
{
case SegmentStateInitialization:
case SegmentStateInResyncTransition:
case SegmentStateInSyncTransition:
case SegmentStateReady:
pmLocalState.mostRecentSegmentStateWasChangeTrackingDisabled = false;
break;
case SegmentStateChangeTrackingDisabled:
pmLocalState.mostRecentSegmentStateWasChangeTrackingDisabled = true;
break;
case SegmentStateNotInitialized:
case SegmentStateInChangeTrackingTransition:
case SegmentStateFault:
case SegmentStateShutdownFilerepBackends:
case SegmentStateShutdown:
case SegmentStateImmediateShutdown:
/* does not change mostRecentSegmentStateWasChangeTrackingDisabled */
break;
case SegmentState__EnumerationCount:
Assert(0);
break;
}
}
/**
* Get the arguments that were most recently copied to local memory. This is ONLY valid
* in the postmaster and in children that are forked most recently after the transition.
*/
PrimaryMirrorModeTransitionArguments primaryMirrorGetArgumentsFromLocalMemory(void)
{
if (!pmLocalState.haveSetMostRecentTransition)
{
elog(ERROR, "Request for filerep arguments from local memory made when they do not exist.");
}
return pmLocalState.mostRecentTransition;
}
Oid
primaryMirrorGetTxnFilespaceOID(void)
{
assertModuleInitialized();
return pmModuleState->txnFilespaceOID;
}
char*
primaryMirrorGetTxnFilespacePath(void)
{
assertModuleInitialized();
return (char*)pmModuleState->txnFilespacePath;
}
char *
primaryMirrorGetTempFilespacePath(void)
{
return DatabasePath;
#if 0
int i;
int lastAllocateIdx;
int numOfDirectories;
assertModuleInitialized();
/* Don't increase the allocate counter. */
if (gp_force_use_default_temporary_directory)
return DatabasePath;
if (pmModuleState->numOfValidFilespacePath == 0)
return DatabasePath;
/*
* Writer will choose temporary and store it in shared snapshot. Use it if
* we have it.
*/
if (SharedLocalSnapshotSlot &&
SharedLocalSnapshotSlot->session_temporary_directory)
return SharedLocalSnapshotSlot->session_temporary_directory;
/* Prevent the others. */
SpinLockAcquire(&pmModuleState->tempFilespaceLock);
lastAllocateIdx = pmModuleState->lastAllocateIdx;
numOfDirectories = pmModuleState->numOfFilespacePath;
for (i = 1; i <= numOfDirectories; i++)
{
int idx = (lastAllocateIdx + i) % numOfDirectories;
if (pmModuleState->tempFilespacePathHasError[idx])
continue;
/*
* Allocate an error temporary will cause segment startup failed, we
* check it here to reduce the race condition.
*/
if (!TemporaryDirectoryIsOk((const char *) pmModuleState->tempFilespacePath[idx]))
{
pmModuleState->tempFilespacePathHasError[idx] = true;
pmModuleState->numOfValidFilespacePath--;
continue;
}
break;
}
if (i <= numOfDirectories)
{
pmModuleState->lastAllocateIdx = (lastAllocateIdx + i) % numOfDirectories;
lastAllocateIdx = pmModuleState->lastAllocateIdx;
SpinLockRelease(&pmModuleState->tempFilespaceLock);
elog(DEBUG1, "Temp directory id %d path: \"%s\" was chosen",
lastAllocateIdx,
(char *) pmModuleState->tempFilespacePath[lastAllocateIdx]);
return (char *) pmModuleState->tempFilespacePath[lastAllocateIdx];
}
/* Use the default temporary directory. */
pmModuleState->numOfValidFilespacePath = 0;
SpinLockRelease(&pmModuleState->tempFilespaceLock);
return DatabasePath;
#endif
}
bool
primaryMirrorIsUsingDefaultFilespaceForTempFiles(void)
{
bool ret = pmModuleState->isUsingDefaultFilespaceForTempFiles;
/* It is safe to access pmModuleState->numOfValidFilespacePath without lock here. */
assertModuleInitialized();
if (ret)
elog(DEBUG1, "Temp directory default: \"%s\" was chosen", DatabasePath);
return ret;
}
bool
primaryMirrorIsUsingDefaultFilespaceForTxnFiles(void)
{
assertModuleInitialized();
return pmModuleState->isUsingDefaultFilespaceForTxnFiles;
}
/* The caller needs to free the memory of the return value */
char*
makeRelativeToTxnFilespace(char *path)
{
char *fullPath = NULL;
assertModuleInitialized();
fullPath = (char*)palloc(MAXPGPATH);
if (pmModuleState->isUsingDefaultFilespaceForTxnFiles)
{
if (snprintf(fullPath, MAXPGPATH, "%s", path) > MAXPGPATH)
{
ereport(ERROR, (errmsg("cannot form path %s\n", path)));
}
}
else
{
if (snprintf(fullPath, MAXPGPATH, "%s/%s", primaryMirrorGetTxnFilespacePath(), path) > MAXPGPATH)
{
ereport(ERROR, (errmsg("cannot form path %s/%s", primaryMirrorGetTxnFilespacePath(), path)));
}
}
return fullPath;
}
char*
primaryMirrorGetPeerTxnFilespacePath(void)
{
assertModuleInitialized();
return (char *)pmModuleState->peerTxnFilespacePath;
}
char*
makeRelativeToPeerTxnFilespace(char *path)
{
char *fullPath = NULL;
assertModuleInitialized();
fullPath = (char*)palloc(MAXPGPATH);
if (pmModuleState->isUsingDefaultFilespaceForTxnFiles)
sprintf(fullPath, "%s", path);
else
sprintf(fullPath, "%s/%s", primaryMirrorGetPeerTxnFilespacePath(), path);
return fullPath;
}
void
primaryMirrorPopulateFilespaceInfo(void)
{
int i;
/* Read the temp directories details from gp_temporary_files_directories flat file */
getNumOfTempFilespacePaths(false);
for (i = 0; i < pmModuleState->numOfFilespacePath; i++)
TemporaryDirectorySanityCheck((char *) pmModuleState->tempFilespacePath[i], 0, false);
ereport(LOG, (errmsg("transaction files using default pg_system filespace")));
}
/*
* Return true if the directory is one of those that can be configured on
* a different filespace
*/
bool
isTxnDir(char *path)
{
/* Return true only if the directory is relative to default directory */
if (isAbsolutePath(path))
return false;
if (strstr(path, XLOGDIR))
return true;
if (strstr(path, SUBTRANS_DIR))
return true;
if (strstr(path, CLOG_DIR))
return true;
if (strstr(path, DISTRIBUTEDLOG_DIR))
return true;
if (strstr(path, DISTRIBUTEDXIDMAP_DIR))
return true;
if (strstr(path, MULTIXACT_MEMBERS_DIR))
return true;
if (strstr(path, MULTIXACT_OFFSETS_DIR))
return true;
return false;
}
int
isAbsolutePath(const char *path)
{
if (path[0] == '/')
return 1;
return 0;
}
bool
isFilespaceUsedForTxnFiles(Oid fsoid)
{
assertModuleInitialized();
if ((!pmModuleState->isUsingDefaultFilespaceForTxnFiles) && (fsoid == pmModuleState->txnFilespaceOID))
return true;
return false;
}
/*
* Reset (unlock) all Primary Mirror state spin locks.
*/
void primaryMirrorModeResetSpinLocks(void)
{
if (pmModuleState != NULL)
{
SpinLockInit(&pmModuleState->lock);
SpinLockInit(&pmModuleState->tempFilespaceLock);
}
}
/*
* TemporaryDirectoryFaultInjection
*/
void
TemporaryDirectoryFaultInjection(int temp_dir_idx)
{
int abs_dir_idx = abs(temp_dir_idx);
/* System will set gucs earlier than pmModule initialization. */
if (!pmModuleState)
return;
if (pmModuleState->isUsingDefaultFilespaceForTempFiles)
return;
/* Display config and reset session. */
if (temp_dir_idx == 0)
{
elog(NOTICE, "current default temporary directory: \"%s\"", getCurrentTempFilePath);
TemporaryDirectorySanityCheck(getCurrentTempFilePath, 0, true);
return;
}
/* switch the temporary directory error state. */
if (abs_dir_idx >= 1 &&
abs_dir_idx <= pmModuleState->numOfFilespacePath)
{
char *state = NULL;
SpinLockAcquire(&pmModuleState->tempFilespaceLock);
if (temp_dir_idx > 0)
{
pmModuleState->tempFilespacePathHasError[abs_dir_idx - 1] = false;
pmModuleState->numOfValidFilespacePath++;
state = "no error";
}
else
{
pmModuleState->tempFilespacePathHasError[abs_dir_idx - 1] = true;
pmModuleState->numOfValidFilespacePath--;
state = "error";
}
SpinLockRelease(&pmModuleState->tempFilespaceLock);
elog(NOTICE, "temp dir id: %d path: %s marked %s",
temp_dir_idx,
pmModuleState->tempFilespacePath[abs_dir_idx - 1],
state);
return;
}
elog(WARNING, "temporary directory index range is between [%d, %d]",
1,
pmModuleState->numOfFilespacePath);
}
/*
* TemporaryDirectorySanityCheck
*/
void
TemporaryDirectorySanityCheck(const char *path, int err, bool process_die)
{
int i;
int error_level = process_die ? FATAL : WARNING;
if (pmModuleState->isUsingDefaultFilespaceForTempFiles)
return;
for (i = 0; i < pmModuleState->numOfFilespacePath; i++)
{
/* match the prefix. */
if (strncmp(path, (char *) pmModuleState->tempFilespacePath[i], strlen((char *) pmModuleState->tempFilespacePath[i])) == 0)
{
if (TemporaryDirectoryIsOk((char *) pmModuleState->tempFilespacePath[i]))
break;
SpinLockAcquire(&pmModuleState->tempFilespaceLock);
pmModuleState->tempFilespacePathHasError[i] = true;
pmModuleState->numOfValidFilespacePath--;
SpinLockRelease(&pmModuleState->tempFilespaceLock);
/*
* Some paths may ask us to check path sanity, in this case, the
* err is 0. Use errno if err == 0.
*/
elog(error_level, "temporary directory id: %d path \"%s\" has error(errno: %d)",
i, path, err == 0 ? errno : err);
break;
}
}
/* default path will not be marked. */
}
/*
* TemporaryDirectoryTest
*/
static bool
TemporaryDirectoryIsOk(const char *path)
{
struct stat st;
/* check file exists */
if (stat(path, &st) < 0)
{
elog(DEBUG1, "temporary direcotry does not exists for: \"%s\"", path);
return false;
}
/* check is directory */
if (!S_ISDIR(st.st_mode))
{
elog(DEBUG1, "temporary direcotry is not a valid directory for: \"%s\"", path);
return false;
}
/* check permission */
if (access(path, W_OK|X_OK) < 0)
{
elog(DEBUG1, "temporary direcotry permission denied for: \"%s\"", path);
return false;
}
return true;
}