blob: 2edee9a8b15d199c315ca7f709fab1b8de8b8be2 [file] [log] [blame]
--
-- Master/standby commit blocking scenarios. These are different from
-- primary/mirror commit blocking because there is no FTS or a
-- third-party as external arbiter in case of master/standby.
--
-- Scenario1: Commits should block until standby confirms WAL flush
-- up to commit LSN.
-- Check that are starting with a clean slate, standby must be in sync
-- with master.
select application_name, state from pg_stat_replication;
-- Inject fault on standby to skip WAL flush.
select gp_inject_fault_infinite('walrecv_skip_flush', 'skip', dbid)
from gp_segment_configuration where content=-1 and role='m';
begin;
create or replace function wait_for_pg_stat_activity(timeout_secs int)
returns void as $$
declare
c int; /* in func */
i int; /* in func */
begin
c := 0; /* in func */
i := 0; /* in func */
while c < 1 and i < timeout_secs*2 loop
select count(*) into c from pg_stat_activity
where wait_event = 'SyncRep'; /* in func */
perform pg_sleep(0.5); /* in func */
perform pg_stat_clear_snapshot(); /* in func */
i := i + 1; /* in func */
end loop; /* in func */
if c < 1 then
raise exception 'timeout waiting for command to get blocked'; /* in func */
end if; /* in func */
end; /* in func */
$$ language plpgsql;
-- Flush WAL to trigger the fault on standby.
checkpoint;
select gp_wait_until_triggered_fault('walrecv_skip_flush', 1, dbid)
from gp_segment_configuration where content=-1 and role='m';
-- Should block in commit (SyncrepWaitForLSN()), waiting for commit
-- LSN to be flushed on standby.
1&: create table commit_blocking_on_standby_t1 (a int);
-- The create table command should be seen as blocked. Wait until
-- that happens.
select wait_for_pg_stat_activity(60);
select datname, wait_event, query from pg_stat_activity
where wait_event = 'SyncRep';
select gp_inject_fault('walrecv_skip_flush', 'reset', dbid)
from gp_segment_configuration where content=-1 and role='m';
-- Ensure that commits are no longer blocked.
commit;
1<:
-- The blocked commit must have finished and the table should be ready
-- for insert.
insert into commit_blocking_on_standby_t1 values (1);
-- Scenario2: In CATCHUP phase, commits should not block until standby
-- has caught up within range. And thereafter, commits should start
-- blocking.
-- In order to get master and standby in CATCHUP state, existing
-- connection, which is in STREAMING state must be closed. A new
-- connection will then be initiated by standby, beginning in STARTUP
-- then CATCHUP to STREAMING. Faults are used to suspend WAL sender
-- before entering STREAMING state.
-- Suspend WAL sender in main loop. "infinite_loop" fault type does
-- not block signals.
select gp_inject_fault_infinite('wal_sender_loop', 'infinite_loop', dbid)
from gp_segment_configuration where content=-1 and role='p';
-- Inject fault on standby to skip WAL flush.
select gp_inject_fault_infinite('walrecv_skip_flush', 'skip', dbid)
from gp_segment_configuration where content=-1 and role='m';
-- Kill existing walsender. WAL sender and WAL receiver processes
-- will be restarted and new connection will be established. Note
-- that the faults injected are still in effect and will affect the
-- newly forked WAL sender and receiver processes.
select pg_terminate_backend(pid) from pg_stat_replication;
-- Should be set to 1 WAL segment by default. Standby is considered
-- caught up if its flush_lsn is less than 1 WAL segment (64MB)
-- away from sent_lsn.
show repl_catchup_within_range;
-- Start a transaction, execute a DDL and commit. The commit should
-- not block.
begin;
select wait_until_standby_in_state('catchup');
select gp_wait_until_triggered_fault('wal_sender_loop', 1, dbid)
from gp_segment_configuration where content=-1 and role='p';
-- WAL sender should be stuck in CATCHUP state.
select application_name, state from pg_stat_replication;
-- This commit should NOT block because WAL sender has not yet
-- processed caughtup within range as it is stuck at the beginning of
-- main loop.
commit;
select gp_inject_fault('wal_sender_after_caughtup_within_range', 'suspend', dbid)
from gp_segment_configuration where content=-1 and role='p';
select gp_inject_fault('wal_sender_loop', 'reset', dbid)
from gp_segment_configuration where content=-1 and role='p';
-- Once this fault is triggered, WAL sender should have set
-- caughtup_within_range to true because difference between
-- sent_lsn and flush_lsn is within 1 WAL segment (64) MB.
select gp_wait_until_triggered_fault(
'wal_sender_after_caughtup_within_range', 1, dbid)
from gp_segment_configuration where content=-1 and role='p';
-- Should block because standby is considered to have caughtup within
-- range.
1&: create table commit_blocking_on_standby_t2 (a int);
-- The create table command should be seen as blocked.
select wait_for_pg_stat_activity(60);
select datname, wait_event, query from pg_stat_activity
where wait_event = 'SyncRep';
-- Reset faults on primary as well as mirror.
select gp_inject_fault('all', 'reset', dbid)
from gp_segment_configuration where content=-1;
1<:
-- Create table transaction must have committed and the table should
-- be ready for insert.
insert into commit_blocking_on_standby_t2 values (1);
select wait_until_standby_in_state('streaming');