blob: c461b03786c4bce1a8c2af9ae02d660a6f090c8b [file] [log] [blame]
-- Tests mirror promotion triggered by FTS in 2 different scenarios.
--
-- 1st: Shut-down of primary and hence unavailability of primary
-- leading to mirror promotion. In this case the connection between
-- primary and mirror is disconnected prior to promotion and
-- walreceiver doesn't exist.
--
-- 2nd: Primary is alive but using fault injector simulated to not
-- respond to fts. This helps to validate fts time-out logic for
-- probes. Plus also mirror promotion triggered while connection
-- between primary and mirror is still alive and hence walreceiver
-- also exist during promotion.
SELECT role, preferred_role, content, status FROM gp_segment_configuration;
-- stop a primary in order to trigger a mirror promotion
select pg_ctl((select datadir from gp_segment_configuration c
where c.role='p' and c.content=0), 'stop');
-- trigger failover
select gp_request_fts_probe_scan();
-- wait some seconds until the promotion is done.
!\retcode gpfts -A -D;
-- expect: to see the content 0, preferred primary is mirror and it's down
-- the preferred mirror is primary and it's up and not-in-sync
select content, preferred_role, role, status, mode
from gp_segment_configuration
where content = 0;
-- wait some seconds until the promotion is done.
select pg_sleep(2);
-- wait for content 0 (earlier mirror, now primary) to finish the promotion
0U: select 1;
-- Quit this utility mode session, as need to start fresh one below
0Uq:
-- fully recover the failed primary as new mirror
!\retcode gprecoverseg -aF --no-progress;
-- loop while segments come in sync
select wait_until_all_segments_synchronized();
-- expect: to see roles flipped and in sync
select content, preferred_role, role, status, mode
from gp_segment_configuration
where content = 0;
-- start_ignore
-- set GUCs to speed-up the test
alter system set gp_fts_probe_retries to 2;
alter system set gp_fts_probe_timeout to 5;
select pg_reload_conf();
select dbid from gp_segment_configuration where content = 0 and role = 'p';
-- end_ignore
select gp_inject_fault_infinite('fts_handle_message', 'infinite_loop', dbid)
from gp_segment_configuration
where content = 0 and role = 'p';
-- trigger failover
select gp_request_fts_probe_scan();
-- trigger one more probe right away which mostly results in sending
-- promotion request again to mirror, while its going through
-- promotion, which is nice condition to test as well.
select gp_request_fts_probe_scan();
!\retcode gpfts -A -D;
-- expect segments restored back to its preferred role, but mirror is down
select content, preferred_role, role, status, mode
from gp_segment_configuration
where content = 0;
-- wait some seconds until the promotion is done.
select pg_sleep(2);
-- start_ignore
-- reset GUCs
alter system set gp_fts_probe_retries to default;
alter system set gp_fts_probe_timeout to default;
select pg_reload_conf();
-- end_ignore
-- wait some seconds until the promotion is done.
select pg_sleep(2);
-- -- wait for content 0 (earlier mirror, now primary) to finish the promotion
0U: select 1;
-- create tablespace to test if it works with gprecoverseg -F (pg_basebackup)
!\retcode mkdir -p /tmp/mirror_promotion_tablespace_loc;
create tablespace mirror_promotion_tablespace location '/tmp/mirror_promotion_tablespace_loc';
create table mirror_promotion_tblspc_heap_table (a int) tablespace mirror_promotion_tablespace;
-- -- now, let's fully recover the mirror
!\retcode gprecoverseg -aF --no-progress;
drop table mirror_promotion_tblspc_heap_table;
drop tablespace mirror_promotion_tablespace;
-- loop while segments come in sync
select wait_until_all_segments_synchronized();
-- now, the content 0 primary and mirror should be at their preferred role
-- and up and in-sync
select content, preferred_role, role, status, mode
from gp_segment_configuration
where content = 0;