| -- This test performs segment reconfiguration when "alter resource group" is executed in the two phase commit. |
| -- The steps are, when run "alter resource group", before QD broadcasts commit prepared command to QEs(the |
| -- second phase of 2PC), we trigger an error and cause one primary segment down. |
| -- The expectation is "alter resource group" can run successfully since the mirror segment is UP. |
| -- After recover the segment, there is no error or blocking. |
| |
| -- set these values purely to cut down test time, as default fts trigger is |
| -- every min and 5 retries |
| alter system set gp_fts_probe_interval to 10; |
| alter system set gp_fts_probe_retries to 0; |
| select pg_reload_conf(); |
| |
| 1:create resource group rgroup_seg_down with (CPU_RATE_LIMIT=35, MEMORY_LIMIT=35, CONCURRENCY=10); |
| |
| -- inject an error in function dtm_broadcast_commit_prepared, that is before QD broadcasts commit prepared command to QEs |
| 2:select gp_inject_fault_infinite('dtm_broadcast_commit_prepared', 'suspend', dbid) from gp_segment_configuration where role='p' and content=-1; |
| -- this session will pend here since the above injected fault |
| 1&:alter resource group rgroup_seg_down set CONCURRENCY 20; |
| -- this injected fault can make dispatcher think the primary is down |
| 2:select gp_inject_fault('fts_conn_startup_packet', 'error', dbid) from gp_segment_configuration where role='p' and content=0; |
| 2:select gp_request_fts_probe_scan(); |
| -- make sure one primary segment is down. |
| 2:select status = 'd' from gp_segment_configuration where content = 0 and role = 'm'; |
| -- reset the injected fault on QD and the "alter resource group" in session1 can continue |
| 2:select gp_inject_fault('dtm_broadcast_commit_prepared', 'reset', dbid) from gp_segment_configuration where role='p' and content=-1; |
| -- reset the injected fault on primary segment |
| 2:select gp_inject_fault('fts_conn_startup_packet', 'reset', dbid) from gp_segment_configuration where content=0; |
| 1<: |
| -- make sure "alter resource group" has taken effect. |
| 1:select concurrency from gp_toolkit.gp_resgroup_config where groupname = 'rgroup_seg_down'; |
| 2q: |
| |
| !\retcode gprecoverseg -aF --no-progress; |
| |
| -- loop while segments come in sync |
| 1:select wait_until_all_segments_synchronized(); |
| |
| !\retcode gprecoverseg -ar; |
| |
| -- loop while segments come in sync |
| 1:select wait_until_all_segments_synchronized(); |
| |
| -- verify no segment is down after recovery |
| 1:select count(*) from gp_segment_configuration where status = 'd'; |
| |
| -- verify resource group |
| 1:select concurrency from gp_toolkit.gp_resgroup_config where groupname = 'rgroup_seg_down'; |
| 1:drop resource group rgroup_seg_down; |
| |
| 1:alter system reset gp_fts_probe_interval; |
| 1:alter system reset gp_fts_probe_retries; |
| 1:select pg_reload_conf(); |
| 1q: |
| |