| -- Try to verify that a session fatal due to OOM should have no effect on other sessions. |
| -- Report on https://github.com/greenplum-db/gpdb/issues/12399 |
| |
| create extension if not exists gp_inject_fault; |
| CREATE |
| |
| 1: select gp_inject_fault('make_dispatch_result_error', 'skip', dbid) from gp_segment_configuration where role = 'p' and content = -1; |
| gp_inject_fault |
| ----------------- |
| Success: |
| (1 row) |
| 2: begin; |
| BEGIN |
| |
| -- session1 will be fatal. |
| 1: select count(*) > 0 from gp_dist_random('pg_class'); |
| FATAL: could not allocate resources for segworker communication (cdbdisp_async.c:319) |
| server closed the connection unexpectedly |
| This probably means the server terminated abnormally |
| before or while processing the request. |
| |
| -- session2 should be ok. |
| 2: select count(*) > 0 from gp_dist_random('pg_class'); |
| ?column? |
| ---------- |
| t |
| (1 row) |
| 2: commit; |
| COMMIT |
| 1q: ... <quitting> |
| 2q: ... <quitting> |
| |
| select gp_inject_fault('make_dispatch_result_error', 'reset', dbid) from gp_segment_configuration where role = 'p' and content = -1; |
| gp_inject_fault |
| ----------------- |
| Success: |
| (1 row) |
| |
| -- |
| -- Test case for the WaitEvent of ShareInputScan |
| -- |
| |
| create table test_waitevent(i int); |
| CREATE |
| insert into test_waitevent select generate_series(1,1000); |
| INSERT 1000 |
| |
| 1: set optimizer = off; |
| SET |
| 1: set gp_cte_sharing to on; |
| SET |
| 1: set max_parallel_workers_per_gather = 0; |
| SET |
| 1: select gp_inject_fault_infinite('shareinput_writer_notifyready', 'suspend', 2); |
| gp_inject_fault_infinite |
| -------------------------- |
| Success: |
| (1 row) |
| 1&: WITH a1 as (select * from test_waitevent), a2 as (select * from test_waitevent) SELECT sum(a1.i) FROM a1 INNER JOIN a2 ON a2.i = a1.i UNION ALL SELECT count(a1.i) FROM a1 INNER JOIN a2 ON a2.i = a1.i; <waiting ...> |
| -- start_ignore |
| 2: copy (select pg_stat_get_activity(NULL) from gp_dist_random('gp_id') where gp_segment_id=0) to '/tmp/_gpdb_test_output.txt'; |
| COPY 9 |
| -- end_ignore |
| 2: select gp_wait_until_triggered_fault('shareinput_writer_notifyready', 1, 2); |
| gp_wait_until_triggered_fault |
| ------------------------------- |
| Success: |
| (1 row) |
| 2: select gp_inject_fault_infinite('shareinput_writer_notifyready', 'resume', 2); |
| gp_inject_fault_infinite |
| -------------------------- |
| Success: |
| (1 row) |
| 2: select gp_inject_fault_infinite('shareinput_writer_notifyready', 'reset', 2); |
| gp_inject_fault_infinite |
| -------------------------- |
| Success: |
| (1 row) |
| 2q: ... <quitting> |
| 1<: <... completed> |
| sum |
| -------- |
| 500500 |
| 1000 |
| (2 rows) |
| 1q: ... <quitting> |
| |
| !\retcode grep ShareInputScan /tmp/_gpdb_test_output.txt; |
| -- start_ignore |
| (100897,9460,10,"",active,"WITH a1 as (select * from test_waitevent), a2 as (select * from test_waitevent) SELECT sum(a1.i) FROM a1 INNER JOIN a2 ON a2.i = a1.i UNION ALL SELECT count(a1.i) FROM a1 INNER JOIN a2 ON a2.i = a1.i;",IPC,ShareInputScan,"Sat Mar 12 23:51:16.151757 2022 PST","Sat Mar 12 23:51:16.151757 2022 PST","Sat Mar 12 23:51:16.14545 2022 PST","Sat Mar 12 23:51:16.151797 2022 PST",127.0.0.1,,63602,,7398,"client backend",f,,,,,,,,f,,f,247,0,unknown) |
| |
| -- end_ignore |
| (exited with code 0) |
| |
| -- |
| -- Test for issue https://github.com/greenplum-db/gpdb/issues/12703 |
| -- |
| |
| -- Case for cdbgang_createGang_async |
| 1: create table t_12703(a int); |
| CREATE |
| |
| 1:begin; |
| BEGIN |
| -- make a cursor so that we have a named portal |
| 1: declare cur12703 cursor for select * from t_12703; |
| DECLARE |
| |
| 2: select pg_ctl((select datadir from gp_segment_configuration c where c.role='p' and c.content=1), 'stop'); |
| pg_ctl |
| -------- |
| OK |
| (1 row) |
| -- next sql will trigger FTS to mark seg1 as down |
| 2: select gp_request_fts_probe_scan(); |
| gp_request_fts_probe_scan |
| --------------------------- |
| t |
| (1 row) |
| !\retcode gpfts -A -D; |
| -- start_ignore |
| |
| -- end_ignore |
| (exited with code 0) |
| -- sleep some seconds until the promotion of mirror 0 is done |
| 2: select pg_sleep(2); |
| pg_sleep |
| ---------- |
| |
| (1 row) |
| |
| -- this will go to cdbgang_createGang_async's code path |
| -- for some segments are DOWN. It should not PANIC even |
| -- with a named portal existing. |
| 1: select * from t_12703; |
| ERROR: gang was lost due to cluster reconfiguration (cdbgang_async.c:98) |
| 1: abort; |
| ABORT |
| |
| 1q: ... <quitting> |
| 2q: ... <quitting> |
| |
| -- Case for cdbCopyEndInternal |
| -- Provide some data to copy in |
| 4: insert into t_12703 select * from generate_series(1, 10)i; |
| INSERT 10 |
| 4: copy t_12703 to '/tmp/t_12703'; |
| COPY 10 |
| -- make copy in statement hang at the entry point of cdbCopyEndInternal |
| 4: select gp_inject_fault('cdb_copy_end_internal_start', 'suspend', dbid) from gp_segment_configuration where role = 'p' and content = -1; |
| gp_inject_fault |
| ----------------- |
| Success: |
| (1 row) |
| 4q: ... <quitting> |
| 1&: copy t_12703 from '/tmp/t_12703'; <waiting ...> |
| select gp_wait_until_triggered_fault('cdb_copy_end_internal_start', 1, dbid) from gp_segment_configuration where role = 'p' and content = -1; |
| gp_wait_until_triggered_fault |
| ------------------------------- |
| Success: |
| (1 row) |
| -- make Gang connection is BAD |
| select pg_ctl((select datadir from gp_segment_configuration c where c.role='p' and c.content=2), 'stop'); |
| pg_ctl |
| -------- |
| OK |
| (1 row) |
| !\retcode gpfts -A -D; |
| -- start_ignore |
| |
| -- end_ignore |
| (exited with code 0) |
| 2: select gp_request_fts_probe_scan(); |
| gp_request_fts_probe_scan |
| --------------------------- |
| t |
| (1 row) |
| 2: begin; |
| BEGIN |
| select gp_inject_fault('cdb_copy_end_internal_start', 'reset', dbid) from gp_segment_configuration where role = 'p' and content = -1; |
| gp_inject_fault |
| ----------------- |
| Success: |
| (1 row) |
| -- continue copy it should not PANIC |
| 1<: <... completed> |
| ERROR: MPP detected 1 segment failures, system is reconnected |
| 1q: ... <quitting> |
| -- session 2 still alive (means not PANIC happens) |
| 2: select 1; |
| ?column? |
| ---------- |
| 1 |
| (1 row) |
| 2: end; |
| END |
| 2q: ... <quitting> |
| |
| !\retcode gprecoverseg -aF --no-progress; |
| -- start_ignore |
| -- end_ignore |
| (exited with code 0) |
| |
| -- loop while segments come in sync |
| !\retcode gpfts -A -D; |
| -- start_ignore |
| |
| -- end_ignore |
| (exited with code 0) |
| select wait_until_all_segments_synchronized(); |
| wait_until_all_segments_synchronized |
| -------------------------------------- |
| OK |
| (1 row) |
| |
| !\retcode gprecoverseg -ar; |
| -- start_ignore |
| -- end_ignore |
| (exited with code 0) |
| |
| -- loop while segments come in sync |
| !\retcode gpfts -A -D; |
| -- start_ignore |
| |
| -- end_ignore |
| (exited with code 0) |
| select wait_until_all_segments_synchronized(); |
| wait_until_all_segments_synchronized |
| -------------------------------------- |
| OK |
| (1 row) |
| |
| -- verify no segment is down after recovery |
| select count(*) from gp_segment_configuration where status = 'd'; |
| count |
| ------- |
| 0 |
| (1 row) |