blob: 9a086c95cd983d595664cc06c2d6db1959e1f594 [file] [log] [blame]
#!/usr/bin/env bash
## ==================================================================
## Required: A fresh gpdemo cluster with mirrors sourced.
##
## This script tests and showcases a very simple Point-In-Time
## Recovery scenario by utilizing WAL Archiving and restore
## points. This test also demonstrates the commit blocking during
## distributed restore point creation during concurrent transactions
## to guarantee cluster consistency.
##
## Note: After successfully running this test, the PITR cluster will
## still be up and running from the temp_test directory. Run the
## `clean` Makefile target to go back to the gpdemo cluster.
## ==================================================================
# Store gpdemo master and primary segment data directories.
# This assumes default settings for the ports and data directories.
DATADIR="${COORDINATOR_DATA_DIRECTORY%*/*/*}"
MASTER=${DATADIR}/qddir/demoDataDir-1
PRIMARY1=${DATADIR}/dbfast1/demoDataDir0
PRIMARY2=${DATADIR}/dbfast2/demoDataDir1
PRIMARY3=${DATADIR}/dbfast3/demoDataDir2
MASTER_PORT=7000
PRIMARY1_PORT=7002
PRIMARY2_PORT=7003
PRIMARY3_PORT=7004
# Set up temporary directories to store the basebackups and the WAL
# archives that will be used for Point-In-Time Recovery later.
TEMP_DIR=$PWD/temp_test
REPLICA_MASTER=$TEMP_DIR/replica_m
REPLICA_PRIMARY1=$TEMP_DIR/replica_p1
REPLICA_PRIMARY2=$TEMP_DIR/replica_p2
REPLICA_PRIMARY3=$TEMP_DIR/replica_p3
ARCHIVE_PREFIX=$TEMP_DIR/archive_seg
REPLICA_MASTER_DBID=10
REPLICA_PRIMARY1_DBID=11
REPLICA_PRIMARY2_DBID=12
REPLICA_PRIMARY3_DBID=13
# The options for pg_regress and pg_isolation2_regress.
REGRESS_OPTS="--dbname=gpdb_pitr_database --use-existing --init-file=../regress/init_file --init-file=./init_file_gpdb_pitr --load-extension=gp_inject_fault"
ISOLATION2_REGRESS_OPTS="${REGRESS_OPTS} --init-file=../isolation2/init_file_isolation2"
# Run test via pg_regress with given test name.
run_test()
{
../regress/pg_regress $REGRESS_OPTS $1
if [ $? != 0 ]; then
exit 1
fi
}
# Run test via pg_isolation2_regress with given test name. The
# isolation2 framework is mainly used to demonstrate the commit
# blocking scenario.
run_test_isolation2()
{
../isolation2/pg_isolation2_regress $ISOLATION2_REGRESS_OPTS $1
if [ $? != 0 ]; then
exit 1
fi
}
# Remove temporary test directory if it already exists.
[ -d $TEMP_DIR ] && rm -rf $TEMP_DIR
# Create our test database.
createdb gpdb_pitr_database
# Test gp_create_restore_point()
run_test test_gp_create_restore_point
# Test output of gp_switch_wal()
run_test_isolation2 test_gp_switch_wal
# Set up WAL Archiving by updating the postgresql.conf files of the
# master and primary segments. Afterwards, restart the cluster to load
# the new settings.
echo "Setting up WAL Archiving configurations..."
for segment_role in MASTER PRIMARY1 PRIMARY2 PRIMARY3; do
DATADIR_VAR=$segment_role
echo "wal_level = replica
archive_mode = on
archive_command = 'cp %p ${ARCHIVE_PREFIX}%c/%f'" >> ${!DATADIR_VAR}/postgresql.conf
done
mkdir -p ${ARCHIVE_PREFIX}{-1,0,1,2}
gpstop -ar -q
# Create the basebackups which will be our replicas for Point-In-Time
# Recovery later.
echo "Creating basebackups..."
for segment_role in MASTER PRIMARY1 PRIMARY2 PRIMARY3; do
PORT_VAR=${segment_role}_PORT
REPLICA_VAR=REPLICA_$segment_role
REPLICA_DBID_VAR=REPLICA_${segment_role}_DBID
pg_basebackup -h localhost -p ${!PORT_VAR} -X stream -D ${!REPLICA_VAR} --target-gp-dbid ${!REPLICA_DBID_VAR}
done
# Run setup test. This will create the tables, create the restore
# points, and demonstrate the commit blocking.
run_test_isolation2 gpdb_pitr_setup
# Stop the gpdemo cluster. We'll be focusing on the PITR cluster from
# now onwards.
echo "Stopping gpdemo cluster to now focus on PITR cluster..."
gpstop -a -q
# Appending recovery settings to postgresql.conf in all the replicas to setup
# for Point-In-Time Recovery. Specifically, we need to have the restore_command
# and recovery_target_name set up properly. We'll also need to empty out the
# postgresql.auto.conf file to disable synchronous replication on the PITR
# cluster since it won't have mirrors to replicate to.
# Also touch a recovery_finished file in the datadirs to demonstrate that the
# recovery_end_command GUC is functional.
echo "Appending recovery settings to postgresql.conf files in the replicas and starting them up..."
for segment_role in MASTER PRIMARY1 PRIMARY2 PRIMARY3; do
REPLICA_VAR=REPLICA_$segment_role
echo "restore_command = 'cp ${ARCHIVE_PREFIX}%c/%f %p'
recovery_target_name = 'test_restore_point'
recovery_target_action = 'promote'
recovery_end_command = 'touch ${!REPLICA_VAR}/recovery_finished'" >> ${!REPLICA_VAR}/postgresql.conf
echo "" > ${!REPLICA_VAR}/postgresql.auto.conf
touch ${!REPLICA_VAR}/recovery.signal
pg_ctl start -D ${!REPLICA_VAR} -l /dev/null
done
# Wait up to 30 seconds for new master to accept connections.
RETRY=60
while true; do
pg_isready > /dev/null
if [ $? == 0 ]; then
break
fi
sleep 0.5s
RETRY=$[$RETRY - 1]
if [ $RETRY -le 0 ]; then
echo "FAIL: Timed out waiting for new master to accept connections."
exit 1
fi
done
# Reconfigure the segment configuration on the replica master so that
# the other replicas are recognized as primary segments.
echo "Configuring replica master's gp_segment_configuration..."
PGOPTIONS="-c gp_role=utility" psql postgres -c "
SET allow_system_table_mods=true;
DELETE FROM gp_segment_configuration WHERE preferred_role='m';
UPDATE gp_segment_configuration SET dbid=${REPLICA_MASTER_DBID}, datadir='${REPLICA_MASTER}' WHERE content = -1;
UPDATE gp_segment_configuration SET dbid=${REPLICA_PRIMARY1_DBID}, datadir='${REPLICA_PRIMARY1}' WHERE content = 0;
UPDATE gp_segment_configuration SET dbid=${REPLICA_PRIMARY2_DBID}, datadir='${REPLICA_PRIMARY2}' WHERE content = 1;
UPDATE gp_segment_configuration SET dbid=${REPLICA_PRIMARY3_DBID}, datadir='${REPLICA_PRIMARY3}' WHERE content = 2;
"
# Restart the cluster to get the MPP parts working.
echo "Restarting cluster now that the new cluster is properly configured..."
export COORDINATOR_DATA_DIRECTORY=$REPLICA_MASTER
gpstop -ar
# Run validation test to confirm we have gone back in time.
run_test gpdb_pitr_validate
# Print unnecessary success output.
echo "============================================="
echo "SUCCESS! GPDB Point-In-Time Recovery worked."
echo "============================================="