blob: c2a37c8af6e647b2a44c1328a7d7f8a3028515d8 [file] [log] [blame]
-- Misc tests related to dispatching queries to segments.
CREATE DATABASE dispatch_test_db;
\c dispatch_test_db;
CREATE EXTENSION IF NOT EXISTS gp_inject_fault;
-- Mask out the whoami message
-- start_matchsubs
-- m/^ \(seg\d .*:\d+\)/
-- s/^ \(seg\d .*:\d+\)//
-- m/^DETAIL: Internal error: No motion listener port \(seg\d.*:.*\)/
-- s/^DETAIL: Internal error: No motion listener port \(seg\d.*:.*\)/DETAIL: Internal error: No motion listener port/
-- m/WARNING:.*Any temporary tables for this session have been dropped because the gang was disconnected/
-- s/session id \=\s*\d+/session id \= DUMMY/gm
-- m/^DETAIL: connection to server at .*, port \d+ failed: FATAL: fault triggered, fault name:'send_qe_details_init_backend' fault type:'error'/
-- s/^DETAIL: connection to server at .*, port \d+ failed: FATAL: fault triggered, fault name:'send_qe_details_init_backend' fault type:'error'/^DETAIL: connection to server at XX, port XX failed: FATAL: fault triggered, fault name:'send_qe_details_init_backend' fault type:'error'/
-- m/ERROR: Query plan size limit exceeded, current size: 1[1-2]KB, max allowed size: 10KB/
-- s/ERROR: Query plan size limit exceeded, current size: 1[1-2]KB, max allowed size: 10KB/ERROR: Query plan size limit exceeded, current size: /
-- end_matchsubs
-- skip FTS probes always
select gp_inject_fault_infinite('fts_probe', 'skip', 1);
gp_inject_fault_infinite
--------------------------
Success:
(1 row)
select gp_request_fts_probe_scan();
gp_request_fts_probe_scan
---------------------------
t
(1 row)
-- Test cdbdisp_getDispatchResults() fail in CdbDispatchCopyStart()
create table dispatch_tbl (a int) distributed by (a);
insert into dispatch_tbl values (2),(1),(5);
-- fault on seg1 to block insert command into the dispatch_tbl table
select gp_inject_fault('DoCopyToFail', 'error', '', '', '',
1, 1, 0, dbid) from gp_segment_configuration
where content = 1 and role = 'p';
gp_inject_fault
-----------------
Success:
(1 row)
copy dispatch_tbl to stdout;
ERROR: fault triggered, fault name:'DoCopyToFail' fault type:'error' (seg1 192.168.235.128:7003 pid=68980)
--select gp_wait_until_triggered_fault('DoCopyToFail', 1, dbid)
-- from gp_segment_configuration where content = 1 and role = 'p';
select gp_inject_fault('DoCopyToFail', 'reset', dbid)
from gp_segment_configuration
where content = 1 and role = 'p';
gp_inject_fault
-----------------
Success:
(1 row)
select * from dispatch_tbl;
a
---
1
5
2
(3 rows)
drop table dispatch_tbl;
-- Test quoting of GUC values and database names when they're sent to segments
set client_min_messages='warning';
DROP DATABASE IF EXISTS "funny""db'with\\quotes";
reset client_min_messages;
CREATE DATABASE "funny""db'with\\quotes";
ALTER DATABASE "funny""db'with\\quotes" SET search_path="funny""schema'with\\quotes";
-- test if we can connect using dbconn
\! python3 test_dbconn.py 0
-- Leave the database in place, to also test gpcheckcat and pg_upgrade after
-- all the regression tests have completed.
-- set_config() used to have quoting problems as well when dispatching to
-- segments.
CREATE TABLE IF NOT EXISTS should_be_visible();
CREATE OR REPLACE FUNCTION visible_to_segments(tbl oid)
RETURNS SETOF boolean
EXECUTE ON ALL SEGMENTS
LANGUAGE plpgsql AS
$$
BEGIN
RETURN NEXT pg_catalog.pg_table_is_visible(tbl);
END
$$;
-- Spin up any gangs necessary to handle the SELECT DISTINCT so that they
-- receive the new search_path setting from the set_config() dispatch.
SELECT DISTINCT visible_to_segments('public.should_be_visible'::regclass);
visible_to_segments
---------------------
t
(1 row)
-- Now change search_path. This setting should not affect the visibility of the
-- public schema if it's dispatched correctly.
SELECT set_config('search_path', 'pg_catalog,public', false);
set_config
-------------------
pg_catalog,public
(1 row)
-- Check that our test table is still visible.
SELECT DISTINCT visible_to_segments('public.should_be_visible'::regclass);
visible_to_segments
---------------------
t
(1 row)
RESET search_path;
-- There used to be a bug in the quoting when the search_path setting was sent
-- to the segment. It was not easily visible when search_path was set with a
-- SET command, only when the setting was sent as part of the startup packet.
-- Set search_path as a per-user setting so that we can test that.
CREATE DATABASE "dispatch test db";
ALTER DATABASE "dispatch test db" SET search_path="my schema",public;
\c "dispatch test db"
CREATE SCHEMA "my schema";
-- Reconnect so that the search_path includes "my schema".
\c
-- Create a table with the same name in both schemas, "my schema" and public.
CREATE TABLE "my table" (t text);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 't' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
INSERT INTO "my table" VALUES ('myschema.mytable');
CREATE TABLE public."my table" (t text);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 't' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
INSERT INTO public."my table" VALUES ('public.mytable');
SELECT t as unquoted FROM "my table";
unquoted
------------------
myschema.mytable
(1 row)
SELECT t as myschema FROM "my schema"."my table";
myschema
------------------
myschema.mytable
(1 row)
SELECT t as public FROM public."my table";
public
----------------
public.mytable
(1 row)
DROP TABLE "my table";
DROP TABLE public."my table";
-- Create another table with the same name. To make sure the DROP worked
-- and dropped the correct table.
CREATE TABLE "my table" (id integer);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
DROP TABLE "my table";
-- test ALTER DATABASE SET FROM CURRENT
SHOW search_path;
search_path
---------------------
"my schema", public
(1 row)
ALTER DATABASE "dispatch test db" RESET ALL;
\c
SHOW search_path;
search_path
-----------------
"$user", public
(1 row)
SET search_path="my schema", public;
SHOW search_path;
search_path
---------------------
"my schema", public
(1 row)
ALTER DATABASE "dispatch test db" SET search_path FROM CURRENT;
\c
SHOW search_path;
search_path
---------------------
"my schema", public
(1 row)
SELECT 0 as gp_segment_id, setconfig FROM gp_dist_random('pg_db_role_setting')
WHERE setdatabase = (SELECT oid FROM pg_database WHERE datname='dispatch test db') and gp_segment_id = 0
UNION
SELECT -1 as gp_segment_id, setconfig FROM pg_db_role_setting
WHERE setdatabase = (SELECT oid FROM pg_database WHERE datname='dispatch test db')
ORDER BY gp_segment_id;
gp_segment_id | setconfig
---------------+-----------------------------------------------
-1 | {"search_path=\"my schema\", public"}
0 | {"search_path=\"\"\"my schema\"\", public\""}
(2 rows)
-- Clean up
\c dispatch_test_db
DROP DATABASE "dispatch test db";
-- Test gp_max_plan_size limit
set gp_max_plan_size='10 kB';
create table dispatchtesttab(t text);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 't' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
select * from dispatchtesttab where t = repeat('x', 1024*11);
ERROR: Query plan size limit exceeded, current size: 11KB, max allowed size: 10KB
HINT: Size controlled by gp_max_plan_size
--
-- test QD will report failure if QE fails to send its motion_listener back
-- during backend initialization
--
select gp_inject_fault('send_qe_details_init_backend', 'reset', 2);
gp_inject_fault
-----------------
Success:
(1 row)
-- inject a 'skip' fault before QE sends its motion_listener
select gp_inject_fault('send_qe_details_init_backend', 'skip', 2);
gp_inject_fault
-----------------
Success:
(1 row)
-- terminate exiting QEs first
\c dispatch_test_db
-- verify failure will be reported
SELECT 1 FROM gp_dist_random('gp_id');
ERROR: failed to acquire resources on one or more segments
DETAIL: Internal error: No motion listener port (seg0 127.0.0.1:40000)
-- reset fault injector
select gp_inject_fault('send_qe_details_init_backend', 'reset', 2);
gp_inject_fault
-----------------
Success:
(1 row)
--
-- Test suit : test gang creation and commands dispatching
--
--start_ignore
drop table if exists dispatch_test;
NOTICE: table "dispatch_test" does not exist, skipping
drop table if exists dispatch_test_t1;
NOTICE: table "dispatch_test_t1" does not exist, skipping
drop table if exists dispatch_test_t2;
NOTICE: table "dispatch_test_t2" does not exist, skipping
drop table if exists dispatch_test_t3;
NOTICE: table "dispatch_test_t3" does not exist, skipping
--end_ignore
create table dispatch_test as select i as c1 from generate_series(1, 10) i;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'c1' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table dispatch_test_t1 (c1 int, c2 int, c3 int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table dispatch_test_t2 (c1 int, c2 int, c3 int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table dispatch_test_t3 (c1 int, c2 int, c3 int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into dispatch_test_t1 values (1,1,2);
insert into dispatch_test_t2 values (2,1,2);
insert into dispatch_test_t3 values (3,1,2);
CREATE OR REPLACE FUNCTION cleanupAllGangs() RETURNS BOOL
AS '@abs_builddir@/regress@DLSUFFIX@', 'cleanupAllGangs' LANGUAGE C;
-- check if segments has backend running
CREATE OR REPLACE FUNCTION hasBackendsExist(int) RETURNS SETOF BOOL
AS '@abs_builddir@/regress@DLSUFFIX@', 'hasBackendsExist' LANGUAGE C EXECUTE ON ALL SEGMENTS;
-- check if QD has reusable gangs
CREATE OR REPLACE FUNCTION hasGangsExist() RETURNS BOOL
AS '@abs_builddir@/regress@DLSUFFIX@', 'hasGangsExist' LANGUAGE C;
-- test log debug related code within dispatch
set gp_log_gang to debug;
set log_min_messages to DEBUG;
-- Case 1.1
-- A segment in recovery mode, writer gang retry gp_gang_creation_retry_count times and finally success
-- set maximum retry time to 120 seconds, this should be long enough for segment
-- recovery back. otherwise it should be bug somewhere
set gp_gang_creation_retry_count to 120;
set gp_gang_creation_retry_timer to 1000;
select cleanupAllGangs();
cleanupallgangs
-----------------
t
(1 row)
set max_parallel_workers_per_gather=0;
-- trigger fault and report segment 0 in recovery for 5 times
select gp_inject_fault('process_startup_packet', 'skip', '', 'dispatch_test_db', '', 1, 5, 0, 2::smallint);
gp_inject_fault
-----------------
Success:
(1 row)
select cleanupAllGangs();
cleanupallgangs
-----------------
t
(1 row)
-- should success after retry
select * from dispatch_test_t1, dispatch_test_t2, dispatch_test_t3
where dispatch_test_t1.c2 = dispatch_test_t2.c2 and dispatch_test_t2.c3 = dispatch_test_t3.c3;
c1 | c2 | c3 | c1 | c2 | c3 | c1 | c2 | c3
----+----+----+----+----+----+----+----+----
1 | 1 | 2 | 2 | 1 | 2 | 3 | 1 | 2
(1 row)
select gp_inject_fault('process_startup_packet', 'reset', 2);
gp_inject_fault
-----------------
Success:
(1 row)
-- Case 1.2
-- A segment in recovery mode for long time, writer gang retry gp_gang_creation_retry_count times and finally failed
-- set maximun retry time to 0.4s, so we can test if gp_gang_creation_retry_count
-- is actually work
set gp_gang_creation_retry_count to 2;
set gp_gang_creation_retry_timer to 200;
select cleanupAllGangs();
cleanupallgangs
-----------------
t
(1 row)
-- trigger fault and put segment 0 into recovery mode
select gp_inject_fault('process_startup_packet', 'skip', '', 'dispatch_test_db', '', 1, 5, 0, 2::smallint);
gp_inject_fault
-----------------
Success:
(1 row)
select cleanupAllGangs();
cleanupallgangs
-----------------
t
(1 row)
-- should fail after 2 retries
select * from dispatch_test_t1, dispatch_test_t2, dispatch_test_t3
where dispatch_test_t1.c2 = dispatch_test_t2.c2 and dispatch_test_t2.c3 = dispatch_test_t3.c3;
ERROR: failed to acquire resources on one or more segments
DETAIL: Segments are in reset/recovery mode.
set gp_gang_creation_retry_count to 10;
-- should success and process_startup_packet will be invalid after this query
select * from dispatch_test_t1, dispatch_test_t2, dispatch_test_t3
where dispatch_test_t1.c2 = dispatch_test_t2.c2 and dispatch_test_t2.c3 = dispatch_test_t3.c3;
c1 | c2 | c3 | c1 | c2 | c3 | c1 | c2 | c3
----+----+----+----+----+----+----+----+----
1 | 1 | 2 | 2 | 1 | 2 | 3 | 1 | 2
(1 row)
select gp_inject_fault('process_startup_packet', 'reset', 2);
gp_inject_fault
-----------------
Success:
(1 row)
--start_ignore
-- enlarge the retry count
set gp_gang_creation_retry_count to 128 ;
-- this will block until segment 0 recovery back, or report an error
-- after 24 seconds.
select 'wait recovery finish' from gp_dist_random('gp_id');
?column?
----------------------
wait recovery finish
wait recovery finish
wait recovery finish
(3 rows)
--end_ignore
-- cleanup all reusable gangs
select cleanupAllGangs();
cleanupallgangs
-----------------
t
(1 row)
-- expect no zombie backends left on segments
select sum(case when h = 't' then 1 else 0 end) > 0 as hasbackends from hasbackendsexist(30) as h;
hasbackends
-------------
f
(1 row)
-- should success
select * from dispatch_test_t1, dispatch_test_t2, dispatch_test_t3
where dispatch_test_t1.c2 = dispatch_test_t2.c2 and dispatch_test_t2.c3 = dispatch_test_t3.c3;
c1 | c2 | c3 | c1 | c2 | c3 | c1 | c2 | c3
----+----+----+----+----+----+----+----+----
1 | 1 | 2 | 2 | 1 | 2 | 3 | 1 | 2
(1 row)
-- Case 1.3
-- segment has non in-recovery-mode errors
-- when creating writer gang, an error reported and all gangs should be cleaned.
-- when creating reader gang, an error reported and only current gang is cleaned.
select cleanupAllGangs();
cleanupallgangs
-----------------
t
(1 row)
-- segment 0 report an error when get a request
-- the fault may be triggered by other backend process, such as dtx recovery process, specify a session id.
select gp_inject_fault('send_qe_details_init_backend', 'error', 2, current_setting('gp_session_id')::int);
gp_inject_fault
-----------------
Success:
(1 row)
select cleanupAllGangs();
cleanupallgangs
-----------------
t
(1 row)
-- expect failure
select * from dispatch_test_t1, dispatch_test_t2, dispatch_test_t3
where dispatch_test_t1.c2 = dispatch_test_t2.c2 and dispatch_test_t2.c3 = dispatch_test_t3.c3;
ERROR: failed to acquire resources on one or more segments: fault injector
DETAIL: connection to server at "127.0.1.1", port 9002 failed: FATAL: fault triggered, fault name:'send_qe_details_init_backend' fault type:'error'
(seg0 127.0.0.1:40000)
-- expect no resuable gang exist
select * from hasGangsExist();
hasgangsexist
---------------
f
(1 row)
-- expect no zombie backends left on segments.
select sum(case when h = 't' then 1 else 0 end) > 0 as hasbackends from hasbackendsexist(30) as h;
hasbackends
-------------
f
(1 row)
-- cleanupAllGangs();
select cleanupAllGangs();
cleanupallgangs
-----------------
t
(1 row)
select gp_inject_fault('send_qe_details_init_backend', 'reset', 2);
gp_inject_fault
-----------------
Success:
(1 row)
-- segment 0 report an error when get the second request (reader gang creation request)
-- the fault may be triggered by other backend process, such as dtx recovery process, specify a session id.
select gp_inject_fault('send_qe_details_init_backend', 'error', '', '', '', 3, 3, 0, 2::smallint, current_setting('gp_session_id')::int);
gp_inject_fault
-----------------
Success:
(1 row)
select cleanupAllGangs();
cleanupallgangs
-----------------
t
(1 row)
-- expect failure
select * from dispatch_test_t1, dispatch_test_t2, dispatch_test_t3
where dispatch_test_t1.c2 = dispatch_test_t2.c2 and dispatch_test_t2.c3 = dispatch_test_t3.c3;
ERROR: failed to acquire resources on one or more segments: fault injector
DETAIL: connection to server at "127.0.1.1", port 9002 failed: FATAL: fault triggered, fault name:'send_qe_details_init_backend' fault type:'error'
(seg0 127.0.0.1:40000)
-- expect resuable gang exist
select * from hasGangsExist();
hasgangsexist
---------------
t
(1 row)
-- expect QEs exist.
select sum(case when h = 't' then 1 else 0 end) > 0 as hasbackends from hasbackendsexist(0) as h;
hasbackends
-------------
t
(1 row)
select gp_inject_fault('send_qe_details_init_backend', 'reset', 2);
gp_inject_fault
-----------------
Success:
(1 row)
-- Case 1.4
-- Test createGang timeout.
-- gp_segment_connect_timeout = 0 : wait forever
-- gp_segment_connect_timeout = 1 : wait 1 second
set gp_segment_connect_timeout to 1;
select gp_inject_fault('process_startup_packet', 'suspend', '', 'dispatch_test_db', '', 1, 1, 0, 2::smallint);
gp_inject_fault
-----------------
Success:
(1 row)
select cleanupAllGangs();
cleanupallgangs
-----------------
t
(1 row)
-- expect timeout failure
select * from dispatch_test_t1, dispatch_test_t2, dispatch_test_t3
where dispatch_test_t1.c2 = dispatch_test_t2.c2 and dispatch_test_t2.c3 = dispatch_test_t3.c3;
ERROR: failed to acquire resources on one or more segments
DETAIL: timeout expired
(seg0 127.0.0.1:40000)
set gp_segment_connect_timeout to 0;
select gp_inject_fault('process_startup_packet', 'resume', 2);
gp_inject_fault
-----------------
Success:
(1 row)
select gp_inject_fault('process_startup_packet', 'reset', 2);
gp_inject_fault
-----------------
Success:
(1 row)
select gp_inject_fault('process_startup_packet', 'sleep', '', '', '', 1, 1, 2, 2::smallint);
gp_inject_fault
-----------------
Success:
(1 row)
select cleanupAllGangs();
cleanupallgangs
-----------------
t
(1 row)
-- expect success
select * from dispatch_test_t1, dispatch_test_t2, dispatch_test_t3
where dispatch_test_t1.c2 = dispatch_test_t2.c2 and dispatch_test_t2.c3 = dispatch_test_t3.c3;
c1 | c2 | c3 | c1 | c2 | c3 | c1 | c2 | c3
----+----+----+----+----+----+----+----+----
1 | 1 | 2 | 2 | 1 | 2 | 3 | 1 | 2
(1 row)
select gp_inject_fault('process_startup_packet', 'reset', 2);
gp_inject_fault
-----------------
Success:
(1 row)
-- Case 1.5
-- query was cancelled when dispatching commands to one gang.
-- query should be cancelled as expected.
-- must set log_min_messages to default when using interrupt, there is a bug in fault injection.
set log_min_messages to default;
select gp_inject_fault('after_one_slice_dispatched', 'interrupt', 1);
gp_inject_fault
-----------------
Success:
(1 row)
-- should fail and report error
select * from dispatch_test_t1, dispatch_test_t2, dispatch_test_t3
where dispatch_test_t1.c2 = dispatch_test_t2.c2 and dispatch_test_t2.c3 = dispatch_test_t3.c3;
ERROR: canceling statement due to user request
select gp_inject_fault('after_one_slice_dispatched', 'reset', 1);
gp_inject_fault
-----------------
Success:
(1 row)
-- Case 1.6
-- For a query need two phase commit, if error happens before commands are actually
-- dispatched, the query can be cancelled correctly.
--
select gp_inject_fault('before_one_slice_dispatched', 'error', 1);
gp_inject_fault
-----------------
Success:
(1 row)
update dispatch_test_t1 set c2 = 3 from dispatch_test_t2, dispatch_test_t3
where dispatch_test_t1.c2 = dispatch_test_t2.c2 and dispatch_test_t2.c3 = dispatch_test_t3.c3;
ERROR: fault triggered, fault name:'before_one_slice_dispatched' fault type:'error'
select gp_inject_fault('before_one_slice_dispatched', 'reset', 1);
gp_inject_fault
-----------------
Success:
(1 row)
reset max_parallel_workers_per_gather;
-- test logging of gang management
SET gp_log_gang = 'debug';
-- test INFO raised from segments with various kinds of fields
CREATE OR REPLACE FUNCTION udf_raise_info() RETURNS BOOL
AS '@abs_builddir@/regress@DLSUFFIX@', 'gangRaiseInfo' LANGUAGE C;
SELECT udf_raise_info() FROM gp_dist_random('gp_id') WHERE gp_segment_id = 0;
INFO: testing hook function MPPnoticeReceiver (seg0 slice1 127.0.0.1:40000 pid=11337)
DETAIL: this test aims at covering code paths not hit before
HINT: no special hint
CONTEXT: PL/C function defined in regress.c
udf_raise_info
----------------
t
(1 row)
--
-- Error out when dispatching DTX command to busy primary writer gang
--
CREATE TABLE dtx_dispatch_t AS SELECT i AS c1 FROM generate_series(1,10) i;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'c1' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
CREATE or REPLACE FUNCTION dtx_dispatch_f(integer)
RETURNS INTEGER
AS
$$
BEGIN
return $1 + 1;
EXCEPTION
WHEN division_by_zero THEN
RAISE NOTICE 'caught division_by_zero';
return 0;
END
$$
LANGUAGE plpgSQL READS SQL DATA;
SELECT dtx_dispatch_f(foo.c1) FROM (SELECT c1 FROM dtx_dispatch_t WHERE c1='1' limit 1) foo;
ERROR: query plan with multiple segworker groups is not supported
HINT: dispatching DTX commands to a busy gang
CONTEXT: PL/pgSQL function "dtx_dispatch_f" line 1 during statement block entry
DROP FUNCTION dtx_dispatch_f(integer);
DROP TABLE dtx_dispatch_t;
-- Test interconnect is shut down under portal failure
CREATE OR REPLACE FUNCTION numActiveMotionConns() RETURNS INT
AS '@abs_builddir@/regress@DLSUFFIX@', 'numActiveMotionConns' LANGUAGE C;
CREATE TABLE foo_test AS SELECT i AS c1 FROM generate_series(1, 10) i;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry.
SELECT c1/0 FROM foo_test WHERE c1 = 1;
ERROR: division by zero (seg0 slice1 127.0.0.1:40000 pid=11337)
SELECT numActiveMotionConns();
numactivemotionconns
----------------------
0
(1 row)
BEGIN;
DECLARE C1 CURSOR FOR SELECT * FROM foo_test;
FETCH BACKWARD 1 FROM C1;
ERROR: backward scan is not supported in this version of Apache Cloudberry
END;
SELECT numActiveMotionConns();
numactivemotionconns
----------------------
0
(1 row)
DROP FUNCTION numActiveMotionConns();
DROP TABLE foo_test;
--
-- Test dangling Gang would be destroyed if interrupted during the creation
--
select cleanupAllGangs();
cleanupallgangs
-----------------
t
(1 row)
select gp_inject_fault('gang_created', 'reset', 1);
gp_inject_fault
-----------------
Success:
(1 row)
select gp_inject_fault('gang_created', 'error', 1);
gp_inject_fault
-----------------
Success:
(1 row)
select 1 from gp_dist_random('gp_id') limit 1;
ERROR: fault triggered, fault name:'gang_created' fault type:'error'
-- if previous gang is not destroyed, snapshot collision would happen
select 1 from gp_dist_random('gp_id') limit 1;
?column?
----------
1
(1 row)
select gp_inject_fault('gang_created', 'reset', 1);
gp_inject_fault
-----------------
Success:
(1 row)
--
-- Test that an error happens after a big command is dispatched.
--
select gp_inject_fault('after_one_slice_dispatched', 'error', 1);
gp_inject_fault
-----------------
Success:
(1 row)
select * from gp_dist_random('gp_id')
where gpname > (select * from repeat('sssss', 10000000));
ERROR: fault triggered, fault name:'after_one_slice_dispatched' fault type:'error'
select gp_inject_fault('after_one_slice_dispatched', 'reset', 1);
gp_inject_fault
-----------------
Success:
(1 row)
select * from gp_dist_random('gp_id')
where gpname > (select * from repeat('sssss', 10000000));
gpname | numsegments | dbid | content
--------+-------------+------+---------
(0 rows)
-- Cover all transaction isolation levels to ensure that a gang can be
-- created. Connect again so that existing gangs are destroyed.
\connect
begin isolation level serializable;
end;
\connect
begin isolation level repeatable read;
end;
\connect
begin isolation level read committed;
end;
\connect
begin isolation level read uncommitted;
end;
-- Test session will be reset if the writer gang of current global
-- transaction is lost
begin;
create temp table disp_temp_test (c1 int, c2 int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into disp_temp_test values (1, 2);
savepoint s1;
create temp table disp_temp_test1 (c1 int, c2 int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into disp_temp_test1 values (1, 2);
-- Let cleanupSegdb() return false and writer gang be destroyed
select gp_inject_fault('cleanup_qe', 'skip', 1);
gp_inject_fault
-----------------
Success:
(1 row)
select * from disp_temp_test1 where c1 / 0 = 9;
ERROR: division by zero (seg2 slice1 127.0.0.1:25434 pid=27218)
rollback to savepoint s1;
WARNING: writer gang of current global transaction is lost
WARNING: Any temporary tables for this session have been dropped because the gang was disconnected (session id = 104)
ERROR: Could not rollback to savepoint (ROLLBACK TO SAVEPOINT s1)
rollback;
-- session is reset, temp table is dropped.
select * from disp_temp_test;
ERROR: relation "disp_temp_test" does not exist
LINE 1: select * from disp_temp_test;
^
select * from disp_temp_test1;
ERROR: relation "disp_temp_test1" does not exist
LINE 1: select * from disp_temp_test1;
^
-- reset fault
select gp_inject_fault('cleanup_qe', 'reset', 1);
gp_inject_fault
-----------------
Success:
(1 row)
-- resume FTS probes
select gp_inject_fault('fts_probe', 'reset', 1);
gp_inject_fault
-----------------
Success:
(1 row)
--
-- Test portal cleanup
-- prepare a query contains init plan, main plan create a unnamed portal
-- and will need multiple slices , init plan also need to allocated multiple
-- slices in a named portal (eg: a cursor). The executing order is:
-- 1. gangs of main plan is pre-assigned 2. init plan is executed 3. main plan
-- is executed. This test it meant to test that cleanup of the named portal in
-- the end of step2 will not affect the pre-allocated gangs of the main plan.
--
create table dispatch_foo (c1 int, c2 int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table dispatch_bar (c1 int, c2 int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create or replace function dispatch_foo_func()
returns int
as
$BODY$
declare
t_record record;
BEGIN
drop table if exists tmp1;
create temp table tmp1 as select dispatch_foo.c1, dispatch_bar.c2 from dispatch_foo, dispatch_bar;
for t_record in (select count(*) from dispatch_foo, dispatch_bar)
loop
NULL;
end loop;
return 1;
END;
$BODY$
language plpgsql volatile;
set gp_cached_segworkers_threshold to 1;
select count(*) from dispatch_foo, dispatch_bar where exists (select dispatch_foo_func());
count
-------
0
(1 row)
reset gp_cached_segworkers_threshold;
--
-- Test executor double fault
-- check PG_TRY/PG_CATCH - we should always return an original error
-- (for a query and its explain analyze)
--
drop table if exists double_fault;
create table double_fault(a int) distributed by (a);
insert into double_fault select generate_series(1, 10);
create or replace function fault_exec_plan(explain_analyze bool default true) returns void as $_$
declare
query text;
begin
-- close parallel when fault inject
set enable_parallel = off;
perform gp_inject_fault('executor_pre_tuple_processed', 'reset', dbid) from gp_segment_configuration;
perform gp_inject_fault('send_exec_stats', 'reset', dbid) from gp_segment_configuration;
perform gp_inject_fault('executor_pre_tuple_processed', 'error', dbid)
from gp_segment_configuration where role = 'p' and content > -1;
perform gp_inject_fault('send_exec_stats', 'error', dbid)
from gp_segment_configuration where role = 'p' and content > -1;
if explain_analyze then
query := 'explain analyze select count(*) from double_fault';
else
query := 'select count(*) from double_fault';
end if;
execute query;
perform gp_inject_fault('executor_pre_tuple_processed', 'reset', dbid) from gp_segment_configuration;
perform gp_inject_fault('send_exec_stats', 'reset', dbid) from gp_segment_configuration;
exception when fault_inject then
perform gp_inject_fault('executor_pre_tuple_processed', 'reset', dbid) from gp_segment_configuration;
perform gp_inject_fault('send_exec_stats', 'reset', dbid) from gp_segment_configuration;
raise exception $$'%' fault triggered$$, case
when coalesce(substring(sqlerrm from 'send_exec_stats'), '') != '' then 'send_exec_stats'
when coalesce(substring(sqlerrm from 'executor_pre_tuple_processed'), '') != '' then 'executor_pre_tuple_processed'
end;
end;
$_$ language plpgsql;
-- Query raising 'executor_pre_tuple_processed' error
select fault_exec_plan(false);
ERROR: 'executor_pre_tuple_processed' fault triggered
-- Same query with explain analyze (should raise
-- 'executor_pre_tuple_processed' not 'send_exec_stats')
select fault_exec_plan(true);
ERROR: 'executor_pre_tuple_processed' fault triggered
\c regression
DROP DATABASE dispatch_test_db;
-- issue: https://github.com/greenplum-db/gpdb/issues/13393
-- We should avoid a double free and resultant PANIC if the
-- process of recycling the gang is interrupted by a signal
CREATE EXTENSION if not exists gp_inject_fault;
create table t13393(tc1 int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'tc1' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into t13393 select generate_series(1,10000);
-- start_ignore
analyze gp_segment_configuration;
WARNING: skipping "gp_segment_configuration" --- cannot analyze non-tables or special system tables
-- end_ignore
SELECT gp_inject_fault('cdbcomponent_recycle_idle_qe_error', 'interrupt', dbid, current_setting('gp_session_id')::int)
from gp_segment_configuration where content=-1 and role='p';
gp_inject_fault
-----------------
Success:
(1 row)
select count(*) from gp_segment_configuration a, t13393 ,gp_segment_configuration b where a.dbid = t13393.tc1 limit 0;
count
-------
(0 rows)
SELECT gp_inject_fault('cdbcomponent_recycle_idle_qe_error', 'reset', dbid, current_setting('gp_session_id')::int)
from gp_segment_configuration where content=-1 and role='p';
gp_inject_fault
-----------------
Success:
(1 row)
drop table t13393;
-- test for print create time for gang.
CREATE OR REPLACE FUNCTION cleanupAllGangs() RETURNS BOOL
AS '@abs_builddir@/regress@DLSUFFIX@', 'cleanupAllGangs' LANGUAGE C;
-- cleanupAllGangs();
select cleanupAllGangs();
cleanupallgangs
-----------------
t
(1 row)
show gp_print_create_gang_time;
gp_print_create_gang_time
---------------------------
off
(1 row)
-- create a new n-gang
set gp_print_create_gang_time=on;
INFO: The shortest establish conn time: 8.46 ms, segindex: 0,
The longest establish conn time: 9.87 ms, segindex: 2
set optimizer=off;
INFO: (Gang) is reused
--gang reused
create table t_create_gang_time(tc1 int,tc2 int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'tc1' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
INFO: (Gang) is reused
--n-gang reused and 1-gang is created.
select * from t_create_gang_time t1, t_create_gang_time t2 where t1.tc1=2;
INFO: (Slice1) is reused
INFO: (Slice2) The shortest establish conn time: 4.80 ms, segindex: 0,
The longest establish conn time: 4.80 ms, segindex: 0
tc1 | tc2 | tc1 | tc2
-----+-----+-----+-----
(0 rows)
explain analyze select * from t_create_gang_time t1, t_create_gang_time t2 where t1.tc1=2;
INFO: (Slice1) is reused
INFO: (Slice2) is reused
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=10000000000.00..10000130517.82 rows=7413210 width=16) (actual time=0.968..0.974 rows=0 loops=1)
-> Nested Loop (cost=10000000000.00..10000031675.02 rows=2471070 width=16) (never executed)
-> Broadcast Motion 1:3 (slice2; segments: 1) (cost=0.00..393.90 rows=86 width=8) (never executed)
-> Seq Scan on t_create_gang_time t1 (cost=0.00..392.75 rows=29 width=8) (never executed)
Filter: (tc1 = 2)
-> Materialize (cost=0.00..464.50 rows=28700 width=8) (never executed)
-> Seq Scan on t_create_gang_time t2 (cost=0.00..321.00 rows=28700 width=8) (never executed)
Optimizer: Postgres query optimizer
Planning Time: 0.285 ms
(slice0) Executor memory: 38K bytes.
(slice1) Executor memory: 39K bytes avg x 3 workers, 39K bytes max (seg0).
(slice2) Executor memory: 36K bytes (seg0).
Memory used: 128000kB
Execution Time: 1.948 ms
(14 rows)
reset gp_print_create_gang_time;
reset optimizer;
drop function cleanupAllGangs();
drop table t_create_gang_time;