| -- Ensure that the wal files up until the one which has the last CHECKPOINT wal |
| -- record BEFORE the oldest replication slot restart_lsn are not recycled even |
| -- if more than one CHECKPOINTs are performed after that restart_lsn. Otherwise |
| -- gprecoverseg (based on pg_rewind) would fail due to missing wal file |
| |
| CREATE OR REPLACE LANGUAGE plpython3u; |
| CREATE OR REPLACE FUNCTION connectSeg(n int, port int, hostname text) RETURNS bool AS $$ |
| import os |
| import subprocess |
| import time |
| for i in range(n): |
| try: |
| subprocess.run(["psql", "-h", str(hostname), "-p", str(port), "postgres", "-Xc", "select 1;"], |
| env={"PGOPTIONS": "-c gp_role=utility", "PATH": os.getenv("PATH")}, |
| check=True) |
| return True |
| except Exception as e: |
| time.sleep(1) |
| raise Exception("wait connection timeout") |
| $$ |
| LANGUAGE plpython3u; |
| |
| CREATE TABLE tst_missing_tbl (a int); |
| INSERT INTO tst_missing_tbl values(2),(1),(5); |
| |
| -- Make the test faster by not preserving any extra wal segment files |
| !\retcode gpconfig -c wal_keep_size -v 0; |
| !\retcode gpstop -ari; |
| |
| -- Test 1: Ensure that pg_rewind doesn't fail due to checkpoints inadvertently |
| -- recycling WAL when a former primary is marked down in configuration, while it |
| -- actually continues to run. The subsequent CHECKPOINT which is performed after the |
| -- failover on the segment being marked down should not recycle the wal file |
| -- which has the last common checkpoint of the target and source segment of |
| -- pg_rewind. |
| |
| -- Run a checkpoint so that the below sqls won't cause a checkpoint |
| -- until an explicit checkpoint command is issued by the test. |
| -- checkpoint_timeout is by default 300 but the below test should be able to |
| -- finish in 300 seconds. |
| 1: CHECKPOINT; |
| |
| 0U: SELECT pg_switch_wal is not null FROM pg_switch_wal(); |
| 1: INSERT INTO tst_missing_tbl values(2),(1),(5); |
| -- Should be not needed mostly but let's 100% ensure since pg_switch_wal() |
| -- won't switch if it has been on the boundary (seldom though). |
| 0U: SELECT pg_switch_wal is not null FROM pg_switch_wal(); |
| 1: INSERT INTO tst_missing_tbl values(2),(1),(5); |
| 0Uq: |
| |
| -- Make sure primary/mirror pair is in sync, otherwise FTS can't promote mirror |
| 1: SELECT wait_until_all_segments_synchronized(); |
| -- Mark down the primary with content 0 via fts fault injection. |
| 1: SELECT gp_inject_fault_infinite('fts_handle_message', 'error', dbid) FROM gp_segment_configuration WHERE content = 0 AND role = 'p'; |
| |
| -- Trigger failover and double check. |
| 1: SELECT gp_request_fts_probe_scan(); |
| 1: SELECT role, preferred_role from gp_segment_configuration where content = 0; |
| |
| -- Run two more checkpoints. Previously this causes the checkpoint.redo wal |
| -- file before the oldest replication slot LSN is recycled/removed. |
| 0M: CHECKPOINT; |
| 0M: BEGIN; |
| 0M: DROP TABLE tst_missing_tbl; |
| 0M: ABORT; |
| 0M: CHECKPOINT; |
| 0Mq: |
| |
| -- Write something (promote adds a 'End Of Recovery' xlog that causes the |
| -- divergence between primary and mirror, but I add a write here so that we |
| -- know that a wal divergence is explicitly triggered and 100% completed. Also |
| -- sanity check the tuple distribution (assumption of the test). |
| 2: INSERT INTO tst_missing_tbl values(2),(1),(5); |
| 2: SELECT gp_segment_id, count(*) from tst_missing_tbl group by gp_segment_id; |
| |
| -- Ensure that pg_rewind succeeds. Previously it could fail since the divergence |
| -- LSN wal file is missing. |
| !\retcode gprecoverseg -av; |
| -- In case it fails it should not affect subsequent testing. |
| !\retcode gprecoverseg -aF; |
| 2: SELECT wait_until_all_segments_synchronized(); |
| |
| -- Test 2: Ensure that pg_rewind doesn't fail due to checkpoints inadvertently |
| -- recycling WAL when a former primary was abnormally shutdown. In the case the |
| -- target segment was abnormally shutdown, pg_rewind starts and then stops |
| -- the target segment as single-user mode postgres to ensure clean shutdown which |
| -- causes two checkpoints. The two newer checkpoints introduced by pg_rewind |
| -- should not recycle the wal file that has the last common checkpoint of the |
| -- target and source segment of pg_rewind. |
| |
| -- See previous comment for why. |
| 3: CHECKPOINT; |
| |
| 1U: SELECT pg_switch_wal is not null FROM pg_switch_wal(); |
| 3: INSERT INTO tst_missing_tbl values(2),(1),(5); |
| 1U: SELECT pg_switch_wal is not null FROM pg_switch_wal(); |
| 3: INSERT INTO tst_missing_tbl values(2),(1),(5); |
| 1U: SELECT pg_switch_wal is not null FROM pg_switch_wal(); |
| 3: INSERT INTO tst_missing_tbl values(2),(1),(5); |
| -- Should be not needed mostly but let's 100% ensure since pg_switch_wal() |
| -- won't switch if it is on the boundary already (seldom though). |
| 1U: SELECT pg_switch_wal is not null FROM pg_switch_wal(); |
| 3: INSERT INTO tst_missing_tbl values(2),(1),(5); |
| |
| -- Hang at checkpointer before writing checkpoint xlog. |
| 3: SELECT gp_inject_fault('checkpoint_after_redo_calculated', 'suspend', dbid) FROM gp_segment_configuration WHERE role='p' AND content = 1; |
| 1U&: CHECKPOINT; |
| 3: SELECT gp_wait_until_triggered_fault('checkpoint_after_redo_calculated', 1, dbid) FROM gp_segment_configuration WHERE role='p' AND content = 1; |
| |
| -- Stop the primary immediately and promote the mirror. |
| 3: SELECT pg_ctl(datadir, 'stop', 'immediate') FROM gp_segment_configuration WHERE role='p' AND content = 1; |
| 3: SELECT gp_request_fts_probe_scan(); |
| -- Wait for the segment promotion finished and accept the connection |
| 3: select connectSeg(600,port,hostname) from gp_segment_configuration where content = 1 and role = 'p'; |
| -- Wait for the end of recovery CHECKPOINT completed after the mirror was promoted |
| 3: SELECT gp_inject_fault('checkpoint_after_redo_calculated', 'skip', dbid) FROM gp_segment_configuration WHERE role='p' AND content = 1; |
| 3: SELECT gp_wait_until_triggered_fault('checkpoint_after_redo_calculated', 1, dbid) FROM gp_segment_configuration WHERE role = 'p' AND content = 1; |
| 3: SELECT gp_inject_fault('checkpoint_after_redo_calculated', 'reset', dbid) FROM gp_segment_configuration WHERE role = 'p' AND content = 1; |
| 3: SELECT role, preferred_role from gp_segment_configuration where content = 1; |
| |
| 4: INSERT INTO tst_missing_tbl values(2),(1),(5); |
| 4: SELECT gp_segment_id, count(*) from tst_missing_tbl group by gp_segment_id; |
| |
| -- CHECKPOINT should fail now. |
| 1U<: |
| 1Uq: |
| |
| -- Ensure that pg_rewind succeeds. For unclean shutdown, there are two |
| -- checkpoints are introduced in pg_rewind when running single-user mode postgres |
| -- (one is the checkpoint after crash recovery and another is the shutdown |
| -- checkpoint) and previously the checkpoints clean up the wal files that |
| -- include the previous checkpoint (before divergence LSN) for pg_rewind and |
| -- thus makes gprecoverseg (pg_rewind) fail. |
| !\retcode gprecoverseg -av; |
| -- In case it fails it should not affect subsequent testing. |
| !\retcode gprecoverseg -aF; |
| 4: SELECT wait_until_all_segments_synchronized(); |
| |
| -- Test 3: Ensure that pg_rewind doesn't fail due to checkpoints inadvertently |
| -- recycling WAL when a former primary is marked down in configuration, while it |
| -- actually continued running for a while before it was cleanly shutdown. The wal |
| -- file on the target segment which has the last common checkpoint of the target |
| -- and source segment of pg_rewind should survive the subsequent CHECKPOINTs |
| -- performed on the target segment even if the segment was cleanly shutdown |
| -- and started again after the failover. |
| |
| -- Run a checkpoint so that the below sqls won't cause a checkpoint |
| -- until an explicit checkpoint command is issued by the test. |
| -- checkpoint_timeout is by default 300 but the below test should be able to |
| -- finish in 300 seconds. |
| 1: CHECKPOINT; |
| |
| 0U: SELECT pg_switch_wal is not null FROM pg_switch_wal(); |
| 1: INSERT INTO tst_missing_tbl values(2),(1),(5); |
| -- Should be not needed mostly but let's 100% ensure since pg_switch_wal() |
| -- won't switch if it has been on the boundary (seldom though). |
| 0U: SELECT pg_switch_wal is not null FROM pg_switch_wal(); |
| 1: INSERT INTO tst_missing_tbl values(2),(1),(5); |
| 0Uq: |
| |
| -- Make sure primary/mirror pair is in sync, otherwise FTS can't promote mirror |
| 1: SELECT wait_until_all_segments_synchronized(); |
| -- Mark down the primary with content 0 via fts fault injection. |
| 1: SELECT gp_inject_fault_infinite('fts_handle_message', 'error', dbid) FROM gp_segment_configuration WHERE content = 0 AND role = 'p'; |
| |
| -- Trigger failover and double check. |
| 1: SELECT gp_request_fts_probe_scan(); |
| 1: SELECT role, preferred_role from gp_segment_configuration where content = 0; |
| |
| -- Run two more checkpoints. Previously this causes the checkpoint.redo wal |
| -- file before the oldest replication slot LSN is recycled/removed. |
| 0M: BEGIN; |
| 0M: DROP TABLE tst_missing_tbl; |
| 0M: ABORT; |
| 0M: CHECKPOINT; |
| 0M: BEGIN; |
| 0M: DROP TABLE tst_missing_tbl; |
| 0M: ABORT; |
| 0M: CHECKPOINT; |
| |
| -- Clean shutdown. Clean shutdown performs CHECKPOINT |
| 2: SELECT pg_ctl(datadir, 'stop', 'fast') from gp_segment_configuration where role = 'm' and content = 0; |
| 0Mq: |
| |
| -- Start again. Start from a clean shutdown does not perform CHECKPOINT |
| 2: SELECT pg_ctl_start(datadir, port) from gp_segment_configuration where role = 'm' and content = 0; |
| |
| -- Perform CHECKPOINT. Previously this causes the checkpoint.redo wal |
| -- file before the oldest replication slot LSN is recycled/removed. |
| 0M: CHECKPOINT; |
| |
| -- Write something (promote adds a 'End Of Recovery' xlog that causes the |
| -- divergence between primary and mirror, but I add a write here so that we |
| -- know that a wal divergence is explicitly triggered and 100% completed. Also |
| -- sanity check the tuple distribution (assumption of the test). |
| 2: INSERT INTO tst_missing_tbl values(2),(1),(5); |
| 2: SELECT gp_segment_id, count(*) from tst_missing_tbl group by gp_segment_id; |
| |
| -- Ensure that pg_rewind succeeds. Previously it could fail since the divergence |
| -- LSN wal file is missing. |
| !\retcode gprecoverseg -av; |
| -- In case it fails it should not affect subsequent testing. |
| !\retcode gprecoverseg -aF; |
| 2: SELECT wait_until_all_segments_synchronized(); |
| |
| -- Test 4: Ensure that pg_rewind doesn't fail due to checkpoints inadvertently |
| -- recycling WAL when a former primary was abnormally shutdown and it wrote |
| -- a CHECKPOINT record locally but the record did not make to the wal receiver. |
| -- In the case the target segment was abnormally shutdown, pg_rewind starts and |
| -- then stops the target segment as single-user mode postgres to ensure clean shutdown |
| -- which causes two checkpoints. The two newer checkpoints introduced by pg_rewind |
| -- plus the checkpoint during the unclean shutdown should not recycle the wal |
| -- file that has the last common checkpoint of the target and source segment of |
| -- pg_rewind. |
| |
| -- See previous comment for why. |
| 3: CHECKPOINT; |
| 1U: SELECT pg_switch_wal is not null FROM pg_switch_wal(); |
| 3: INSERT INTO tst_missing_tbl values(2),(1),(5); |
| -- Should be not needed mostly but let's 100% ensure since pg_switch_wal() |
| -- won't switch if it is on the boundary already (seldom though). |
| 1U: SELECT pg_switch_wal is not null FROM pg_switch_wal(); |
| 3: INSERT INTO tst_missing_tbl values(2),(1),(5); |
| |
| -- Have primary/mirror pair in sync before suspending the wal sender. |
| 3: SELECT wait_until_all_segments_synchronized(); |
| |
| -- Hang the wal sender before writing checkpoint wal record. |
| 3: SELECT gp_inject_fault('wal_sender_loop', 'suspend', dbid) FROM gp_segment_configuration WHERE role='p' AND content = 1; |
| 3: SELECT gp_wait_until_triggered_fault('wal_sender_loop', 1, dbid) FROM gp_segment_configuration WHERE role='p' AND content = 1; |
| |
| -- Checkpoint and make sure the CHECKPOINT record is written on disk |
| 3: SELECT gp_inject_fault('checkpoint_control_file_updated', 'suspend', dbid) FROM gp_segment_configuration WHERE role='p' AND content = 1; |
| 1U&: CHECKPOINT; |
| 3: SELECT gp_wait_until_triggered_fault('checkpoint_control_file_updated', 1, dbid) FROM gp_segment_configuration WHERE role='p' AND content = 1; |
| |
| -- Stop the primary immediately and promote the mirror. |
| 3: SELECT pg_ctl(datadir, 'stop', 'immediate') FROM gp_segment_configuration WHERE role='p' AND content = 1; |
| 3: SELECT gp_request_fts_probe_scan(); |
| |
| -- Wait for the segment promotion finished and accept the connection |
| 3: select connectSeg(600,port,hostname) from gp_segment_configuration where content = 1 and role = 'p'; |
| -- Reset faults and confirm FTS configuration |
| 3: SELECT gp_inject_fault('wal_sender_loop', 'reset', dbid) FROM gp_segment_configuration WHERE role='p' AND content = 1; |
| 3: SELECT gp_inject_fault('checkpoint_control_file_updated', 'reset', dbid) FROM gp_segment_configuration WHERE role = 'p' AND content = 1; |
| 3: SELECT role, preferred_role from gp_segment_configuration where content = 1; |
| |
| -- Write something on the current primary |
| 4: INSERT INTO tst_missing_tbl values(2),(1),(5); |
| 4: SELECT gp_segment_id, count(*) from tst_missing_tbl group by gp_segment_id; |
| |
| -- CHECKPOINT should fail now. |
| 1U<: |
| 1Uq: |
| |
| -- Ensure that pg_rewind succeeds. For unclean shutdown, there are two |
| -- checkpoints are introduced in pg_rewind when running single-user mode postgres |
| -- (one is the checkpoint after crash recovery and another is the shutdown |
| -- checkpoint) and previously the checkpoints clean up the wal files that |
| -- include the previous checkpoint (before divergence LSN) for pg_rewind and |
| -- thus makes gprecoverseg (pg_rewind) fail. |
| !\retcode gprecoverseg -av; |
| -- In case it fails it should not affect subsequent testing. |
| !\retcode gprecoverseg -aF; |
| 4: SELECT wait_until_all_segments_synchronized(); |
| |
| -- Test 5: Ensure that replication slot's restart_lsn advances and WAL files |
| -- that are older than the minimum restart_lsn get removed/recycled. |
| |
| -- Turn off wal_recycle to make this test easier to write. |
| !\retcode gpconfig -c wal_recycle -v false; |
| !\retcode gpstop -ari; |
| |
| -- Create an unlogged table on the primary that remembers replication slot's last restart_lsn and number of WAL files. |
| 1U: CREATE UNLOGGED TABLE unlogged_wal_retention_test(restart_lsn_before pg_lsn, wal_count_before int); |
| 1U: INSERT INTO unlogged_wal_retention_test SELECT (select restart_lsn FROM pg_replication_slots WHERE slot_name = 'internal_wal_replication_slot') as restart_lsn_before, (select count(*) from pg_ls_waldir()) as wal_count_before; |
| 5: CHECKPOINT; |
| -- Replication slot's restart_lsn should advance to the checkpoint's redo location. |
| 1U: SELECT pg_wal_lsn_diff(restart_lsn, restart_lsn_before) > 0 from pg_replication_slots, unlogged_wal_retention_test WHERE slot_name = 'internal_wal_replication_slot'; |
| 1U: SELECT pg_wal_lsn_diff(restart_lsn, redo_lsn) = 0 from pg_replication_slots, pg_control_checkpoint() WHERE slot_name = 'internal_wal_replication_slot'; |
| -- Some old WALs should be removed |
| 1U: select ((select count(*)::int from pg_ls_waldir()) - wal_count_before) < 0 FROM unlogged_wal_retention_test; |
| -- Record the restart_lsn and the WAL file count. |
| 1U: UPDATE unlogged_wal_retention_test SET restart_lsn_before = (SELECT restart_lsn from pg_replication_slots WHERE slot_name = 'internal_wal_replication_slot'), wal_count_before = (select count(*) from pg_ls_waldir()); |
| -- Write some records to a newer WAL file. |
| 1U: SELECT pg_switch_wal is not null FROM pg_switch_wal(); |
| 5: INSERT INTO tst_missing_tbl values(2),(1),(5); |
| -- Replication slot's restart_lsn should NOT change regardless mirror has received more wals. |
| 1U: select pg_wal_lsn_diff(restart_lsn, restart_lsn_before) = 0 FROM pg_replication_slots, unlogged_wal_retention_test WHERE slot_name = 'internal_wal_replication_slot'; |
| -- Record the WAL file count. |
| 1U: UPDATE unlogged_wal_retention_test SET wal_count_before = (select count(*) from pg_ls_waldir()); |
| |
| -- Hang the wal sender before writing more wals. |
| 5: SELECT gp_inject_fault('wal_sender_loop', 'suspend', dbid) FROM gp_segment_configuration WHERE role='p' AND content = 1; |
| -- Perform checkpoint. This is to mimic walreciver's confirmed received lsn is behind the redo lsn. |
| 1U: CHECKPOINT; |
| |
| -- Replication slot's restart_lsn should NOT change regardless a new checkpoints was performed. |
| 1U: select pg_wal_lsn_diff(restart_lsn, restart_lsn_before) = 0 FROM pg_replication_slots, unlogged_wal_retention_test WHERE slot_name = 'internal_wal_replication_slot'; |
| -- Replication slot's restart_lsn should be smaller than the checkpoint's redo location. |
| 1U: SELECT pg_wal_lsn_diff(restart_lsn, redo_lsn) < 0 from pg_replication_slots, pg_control_checkpoint() WHERE slot_name = 'internal_wal_replication_slot'; |
| |
| -- Resume walsender. |
| 5: SELECT gp_inject_fault('wal_sender_loop', 'reset', dbid) FROM gp_segment_configuration WHERE role='p' AND content = 1; |
| -- Perform transaction to make sure wals are in sync. |
| 5: INSERT INTO tst_missing_tbl values(2),(1),(5); |
| -- Replication slot's restart_lsn should now advance to the checkpoint's redo location. |
| 1U: SELECT pg_wal_lsn_diff(restart_lsn, restart_lsn_before) > 0 from pg_replication_slots, unlogged_wal_retention_test WHERE slot_name = 'internal_wal_replication_slot'; |
| 1U: SELECT pg_wal_lsn_diff(restart_lsn, redo_lsn) = 0 from pg_replication_slots, pg_control_checkpoint() WHERE slot_name = 'internal_wal_replication_slot'; |
| -- Trigger old WAL removal. |
| 1U: CHECKPOINT; |
| -- And the WAL file(s) older than the redo lsn that we previously kept should now be removed. |
| 1U: select ((select count(*)::int from pg_ls_waldir()) - wal_count_before) < 0 FROM unlogged_wal_retention_test; |
| |
| -- Cleanup |
| 1U: DROP TABLE unlogged_wal_retention_test; |
| 1Uq: |
| |
| 5: DROP TABLE tst_missing_tbl; |
| 5: DROP FUNCTION connectSeg; |
| !\retcode gpconfig -r wal_keep_size; |
| !\retcode gpconfig -r wal_recycle; |
| !\retcode gpstop -ari; |