blob: 21cfbe034caf1de0e91955948f5c2577fc5f5c5c [file] [log] [blame]
--
-- Tests the spill files disk space accounting mechanism
--
-- CBDB_PARALLEL_FIXME: it's hard to make fault_injection work with prallel processes.
set enable_parallel = false;
-- check segspace before test
reset statement_mem;
select max(bytes) as max, min(bytes) as min from gp_toolkit.gp_workfile_mgr_used_diskspace;
max | min
-----+-----
0 | 0
(1 row)
--- create and populate the table
DROP TABLE IF EXISTS segspace_test_hj_skew;
NOTICE: table "segspace_test_hj_skew" does not exist, skipping
CREATE TABLE segspace_test_hj_skew (i1 int, i2 int, i3 int, i4 int, i5 int, i6 int, i7 int, i8 int) DISTRIBUTED BY (i1);
set gp_autostats_mode = none;
-- many values with i1 = 1
INSERT INTO segspace_test_hj_skew SELECT 1,i,i,i,i,i,i,i FROM
(select generate_series(1, nsegments * 50000) as i from
(select count(*) as nsegments from gp_segment_configuration where role='p' and content >= 0) foo) bar;
-- some nicely distributed values
INSERT INTO segspace_test_hj_skew SELECT i,i,i,i,i,i,i,i FROM
(select generate_series(1, nsegments * 100000) as i from
(select count(*) as nsegments from gp_segment_configuration where role='p' and content >= 0) foo) bar;
ANALYZE segspace_test_hj_skew;
--
-- Testing that query cancelation during spilling updates the accounting
--
------------ Interrupting SELECT query that spills -------------------
-- enable the fault injector
select gp_inject_fault('exec_hashjoin_new_batch', 'reset', 2);
gp_inject_fault
-----------------
Success:
(1 row)
select gp_inject_fault('exec_hashjoin_new_batch', 'interrupt', 2);
gp_inject_fault
-----------------
Success:
(1 row)
set statement_mem=2048;
set gp_autostats_mode = none;
begin;
SELECT t1.* FROM segspace_test_hj_skew AS t1, segspace_test_hj_skew AS t2 WHERE t1.i1=t2.i2;
ERROR: canceling MPP operation (seg0 slice2 127.0.0.1:25432 pid=26876)
rollback;
--
-- GPDB parallel once got errors like:
-- could not read from shared tuplestore temporary file: read only 0 of 8 bytes from file.
-- Enable parallel here to test it.
--
begin;
set local enable_parallel = true;
set local optimizer=off;
set local min_parallel_table_scan_size=0;
set local min_parallel_index_scan_size = 0;
set local force_parallel_mode=1;
EXPLAIN(COSTS OFF) SELECT t1.* FROM segspace_test_hj_skew AS t1, segspace_test_hj_skew AS t2 WHERE t1.i1=t2.i2;
QUERY PLAN
-----------------------------------------------------------------------
Gather Motion 6:1 (slice1; segments: 6)
-> Parallel Hash Join
Hash Cond: (t1.i1 = t2.i2)
-> Parallel Seq Scan on segspace_test_hj_skew t1
-> Parallel Hash
-> Redistribute Motion 6:6 (slice2; segments: 6)
Hash Key: t2.i2
Hash Module: 3
-> Parallel Seq Scan on segspace_test_hj_skew t2
Optimizer: Postgres query optimizer
(10 rows)
SELECT count(t1.*) FROM segspace_test_hj_skew AS t1, segspace_test_hj_skew AS t2 WHERE t1.i1=t2.i2;
count
--------
750000
(1 row)
rollback;
select gp_inject_fault('exec_hashjoin_new_batch', 'status', 2);
gp_inject_fault
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Success: fault name:'exec_hashjoin_new_batch' fault type:'interrupt' ddl statement:'' database name:'' table name:'' start occurrence:'1' end occurrence:'1' extra arg:'0' fault injection state:'completed' num times hit:'1' +
(1 row)
-- check used segspace after test
select max(bytes) as max, min(bytes) as min from gp_toolkit.gp_workfile_mgr_used_diskspace;
max | min
-----+-----
0 | 0
(1 row)
-- Run the test without fault injection
begin;
-- Doing select count so output file doesn't have 75000 rows.
select count(*) from
(SELECT t1.* FROM segspace_test_hj_skew AS t1, segspace_test_hj_skew AS t2 WHERE t1.i1=t2.i2) temp;
count
--------
750000
(1 row)
rollback;
-- check used segspace after test
reset statement_mem;
select max(bytes) as max, min(bytes) as min from gp_toolkit.gp_workfile_mgr_used_diskspace;
max | min
-----+-----
0 | 0
(1 row)
------------ Interrupting INSERT INTO query that spills -------------------
drop table if exists segspace_t1_created;
NOTICE: table "segspace_t1_created" does not exist, skipping
create table segspace_t1_created (i1 int, i2 int, i3 int, i4 int, i5 int, i6 int, i7 int, i8 int) DISTRIBUTED BY (i1);
set statement_mem=2048;
set gp_autostats_mode = none;
-- enable the fault injector
select gp_inject_fault('exec_hashjoin_new_batch', 'reset', 2);
gp_inject_fault
-----------------
Success:
(1 row)
select gp_inject_fault('exec_hashjoin_new_batch', 'interrupt', 2);
gp_inject_fault
-----------------
Success:
(1 row)
begin;
insert into segspace_t1_created
SELECT t1.* FROM segspace_test_hj_skew AS t1, segspace_test_hj_skew AS t2 WHERE t1.i1=t2.i2;
ERROR: canceling MPP operation (seg0 127.0.0.1:25432 pid=26876)
rollback;
select gp_inject_fault('exec_hashjoin_new_batch', 'status', 2);
gp_inject_fault
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Success: fault name:'exec_hashjoin_new_batch' fault type:'interrupt' ddl statement:'' database name:'' table name:'' start occurrence:'1' end occurrence:'1' extra arg:'0' fault injection state:'completed' num times hit:'1' +
(1 row)
-- check used segspace after test
select max(bytes) as max, min(bytes) as min from gp_toolkit.gp_workfile_mgr_used_diskspace;
max | min
-----+-----
0 | 0
(1 row)
-- Run the test without fault injection
begin;
insert into segspace_t1_created
SELECT t1.* FROM segspace_test_hj_skew AS t1, segspace_test_hj_skew AS t2 WHERE t1.i1=t2.i2;
rollback;
-- check used segspace after test
reset statement_mem;
select max(bytes) as max, min(bytes) as min from gp_toolkit.gp_workfile_mgr_used_diskspace;
max | min
-----+-----
0 | 0
(1 row)
--start_ignore
drop table if exists segspace_t1_created;
--end_ignore
------------ Interrupting CREATE TABLE AS query that spills -------------------
drop table if exists segspace_t1_created;
NOTICE: table "segspace_t1_created" does not exist, skipping
set statement_mem=2048;
set gp_autostats_mode = none;
-- enable the fault injector
select gp_inject_fault('exec_hashjoin_new_batch', 'reset', 2);
gp_inject_fault
-----------------
Success:
(1 row)
select gp_inject_fault('exec_hashjoin_new_batch', 'interrupt', 2);
gp_inject_fault
-----------------
Success:
(1 row)
begin;
create table segspace_t1_created AS
SELECT t1.* FROM segspace_test_hj_skew AS t1, segspace_test_hj_skew AS t2 WHERE t1.i1=t2.i2;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'i1' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
ERROR: canceling MPP operation (seg0 127.0.0.1:25432 pid=26876)
rollback;
select gp_inject_fault('exec_hashjoin_new_batch', 'status', 2);
gp_inject_fault
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Success: fault name:'exec_hashjoin_new_batch' fault type:'interrupt' ddl statement:'' database name:'' table name:'' start occurrence:'1' end occurrence:'1' extra arg:'0' fault injection state:'completed' num times hit:'1' +
(1 row)
-- check used segspace after test
select max(bytes) as max, min(bytes) as min from gp_toolkit.gp_workfile_mgr_used_diskspace;
max | min
-----+-----
0 | 0
(1 row)
-- Run the test without fault injection
begin;
create table segspace_t1_created AS
SELECT t1.* FROM segspace_test_hj_skew AS t1, segspace_test_hj_skew AS t2 WHERE t1.i1=t2.i2;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'i1' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
rollback;
-- check used segspace after test
reset statement_mem;
select max(bytes) as max, min(bytes) as min from gp_toolkit.gp_workfile_mgr_used_diskspace;
max | min
-----+-----
0 | 0
(1 row)
------------ workfile_limit_per_segment leak check during ERROR on UPDATE with CTE and MK_SORT -------------------
drop table if exists testsisc;
NOTICE: table "testsisc" does not exist, skipping
drop table if exists foo;
NOTICE: table "foo" does not exist, skipping
create table testsisc (i1 int, i2 int, i3 int, i4 int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i1' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into testsisc select i, i % 1000, i % 100000, i % 75 from
(select generate_series(1, nsegments * 100000) as i from
(select count(*) as nsegments from gp_segment_configuration where role='p' and content >= 0) foo) bar;
create table foo (i int, j int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
set statement_mem=1024; -- 1mb for 3 segment to get leak.
set gp_resqueue_print_operator_memory_limits=on;
set gp_cte_sharing=on;
-- enable the fault injector
select gp_inject_fault('workfile_write_failure', 'reset', 2);
gp_inject_fault
-----------------
Success:
(1 row)
select gp_inject_fault('workfile_write_failure', 'error', 2);
gp_inject_fault
-----------------
Success:
(1 row)
-- LEAK in UPDATE: update with sisc xslice sort
update foo set j=m.cc1 from (
with ctesisc as
(select * from testsisc order by i2)
select t1.i1 as cc1, t1.i2 as cc2
from ctesisc as t1, ctesisc as t2
where t1.i1 = t2.i2 ) as m;
ERROR: fault triggered, fault name:'workfile_write_failure' fault type:'error' (seg0 slice3 127.0.0.1:25432 pid=3175)
select gp_inject_fault('exec_hashjoin_new_batch', 'status', 2);
gp_inject_fault
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Success: fault name:'exec_hashjoin_new_batch' fault type:'interrupt' ddl statement:'' database name:'' table name:'' start occurrence:'1' end occurrence:'1' extra arg:'0' fault injection state:'completed' num times hit:'1' +
(1 row)
select max(bytes) as max, min(bytes) as min from gp_toolkit.gp_workfile_mgr_used_diskspace;
max | min
-----+-----
0 | 0
(1 row)
-- Run the test without fault injection
-- LEAK in UPDATE: update with sisc xslice sort
update foo set j=m.cc1 from (
with ctesisc as
(select * from testsisc order by i2)
select t1.i1 as cc1, t1.i2 as cc2
from ctesisc as t1, ctesisc as t2
where t1.i1 = t2.i2 ) as m;
select max(bytes) as max, min(bytes) as min from gp_toolkit.gp_workfile_mgr_used_diskspace;
max | min
-----+-----
0 | 0
(1 row)
reset statement_mem;
reset gp_resqueue_print_operator_memory_limits;
reset gp_cte_sharing;
------------ workfile_limit_per_segment leak check during ERROR on DELETE with APPEND-ONLY table -------------------
drop table if exists testsisc;
drop table if exists foo;
create table testsisc (i1 int, i2 int, i3 int, i4 int) WITH (appendonly=true, compresstype=zlib) ;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i1' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into testsisc select i, i % 1000, i % 100000, i % 75 from
(select generate_series(1, nsegments * 100000) as i from
(select count(*) as nsegments from gp_segment_configuration where role='p' and content >= 0) foo) bar;
create table foo (i int, j int) WITH (appendonly=true, compresstype=zlib);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into foo select i, i % 1000 from
(select generate_series(1, nsegments * 100000) as i from
(select count(*) as nsegments from gp_segment_configuration where role='p' and content >= 0) foo) bar;
set statement_mem=1024; -- 1mb for 3 segment to get leak.
-- enable the fault injector
select gp_inject_fault('workfile_write_failure', 'reset', 2);
gp_inject_fault
-----------------
Success:
(1 row)
select gp_inject_fault('workfile_write_failure', 'error', 2);
gp_inject_fault
-----------------
Success:
(1 row)
-- LEAK in DELETE with APPEND ONLY tables
delete from testsisc using (
select *
from foo
) src where testsisc.i1 = src.i;
ERROR: fault triggered, fault name:'workfile_write_failure' fault type:'error' (seg0 127.0.0.1:25432 pid=3143)
select gp_inject_fault('exec_hashjoin_new_batch', 'status', 2);
gp_inject_fault
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Success: fault name:'exec_hashjoin_new_batch' fault type:'interrupt' ddl statement:'' database name:'' table name:'' start occurrence:'1' end occurrence:'1' extra arg:'0' fault injection state:'completed' num times hit:'1' +
(1 row)
select max(bytes) as max, min(bytes) as min from gp_toolkit.gp_workfile_mgr_used_diskspace;
max | min
-----+-----
0 | 0
(1 row)
-- Run the test without fault injection
-- LEAK in DELETE with APPEND ONLY tables
delete from testsisc using (
select *
from foo
) src where testsisc.i1 = src.i;
reset statement_mem;
select max(bytes) as max, min(bytes) as min from gp_toolkit.gp_workfile_mgr_used_diskspace;
max | min
-----+-----
0 | 0
(1 row)
------------ workfile_limit_per_segment leak check during UPDATE of SORT -------------------
drop table if exists testsort;
NOTICE: table "testsort" does not exist, skipping
create table testsort (i1 int, i2 int, i3 int, i4 int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i1' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into testsort select i, i % 1000, i % 100000, i % 75 from generate_series(0,1000000) i;
analyze testsort;
set statement_mem="1MB";
drop table if exists foo;
create table foo (c int, d int);
insert into foo values (1, 1);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
-- enable the fault injector
select gp_inject_fault('workfile_write_failure', 'reset', 2);
gp_inject_fault
-----------------
Success:
(1 row)
select gp_inject_fault('workfile_write_failure', 'error', 2);
gp_inject_fault
-----------------
Success:
(1 row)
-- expect to see leak if we hit error
update foo set d = i1 from (select i1,i2 from testsort order by i2) x;
ERROR: fault triggered, fault name:'workfile_write_failure' fault type:'error' (seg0 slice1 127.0.0.1:25432 pid=27014)
-- check counter leak
select max(bytes) as max, min(bytes) as min from gp_toolkit.gp_workfile_mgr_used_diskspace;
max | min
-----+-----
0 | 0
(1 row)
select gp_inject_fault('exec_hashjoin_new_batch', 'status', 2);
gp_inject_fault
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Success: fault name:'exec_hashjoin_new_batch' fault type:'interrupt' ddl statement:'' database name:'' table name:'' start occurrence:'1' end occurrence:'1' extra arg:'0' fault injection state:'completed' num times hit:'1' +
(1 row)
-- Run the test without fault injection
-- expect to see leak if we hit error
-- start_ignore
update foo set d = i1 from (select i1,i2 from testsort order by i2) x;
-- end_ignore
-- check counter leak
select max(bytes) as max, min(bytes) as min from gp_toolkit.gp_workfile_mgr_used_diskspace;
max | min
-----+-----
0 | 0
(1 row)
drop table testsort;
------------ workfile_limit_per_segment leak check during UPDATE of SHARE_SORT_XSLICE -------------------
drop table if exists testsisc;
create table testsisc (i1 int, i2 int, i3 int, i4 int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i1' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into testsisc select i, i % 1000, i % 100000, i % 75 from generate_series(0,1000000) i;
set statement_mem="2MB";
set gp_cte_sharing=on;
drop table if exists foo;
create table foo (c int, d int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
-- expect to see leak if we hit error
-- enable the fault injector
select gp_inject_fault('workfile_write_failure', 'reset', 2);
gp_inject_fault
-----------------
Success:
(1 row)
select gp_inject_fault('workfile_write_failure', 'error', 2);
gp_inject_fault
-----------------
Success:
(1 row)
update foo set d = i1 from (with ctesisc as (select * from testsisc order by i2)
select * from
(select count(*) from ctesisc) x(a), ctesisc
where x.a = ctesisc.i1) y;
ERROR: fault triggered, fault name:'workfile_write_failure' fault type:'error' (seg0 slice1 127.0.0.1:25432 pid=3175)
select gp_inject_fault('exec_hashjoin_new_batch', 'status', 2);
gp_inject_fault
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Success: fault name:'exec_hashjoin_new_batch' fault type:'interrupt' ddl statement:'' database name:'' table name:'' start occurrence:'1' end occurrence:'1' extra arg:'0' fault injection state:'completed' num times hit:'1' +
(1 row)
-- check counter leak
select max(bytes) as max, min(bytes) as min from gp_toolkit.gp_workfile_mgr_used_diskspace;
max | min
-----+-----
0 | 0
(1 row)
-- Run the test without fault injection
update foo set d = i1 from (with ctesisc as (select * from testsisc order by i2)
select * from
(select count(*) from ctesisc) x(a), ctesisc
where x.a = ctesisc.i1) y;
-- check counter leak
select max(bytes) as max, min(bytes) as min from gp_toolkit.gp_workfile_mgr_used_diskspace;
max | min
-----+-----
0 | 0
(1 row)
-- Check if we delete workfile_set at each subtransaction.
-- This test doesn't depend on the guc `gp_workfile_limit_per_segment` like rest
-- start_ignore
drop external table if exists exttest;
NOTICE: table "exttest" does not exist, skipping
create external web table exttest (x int)
execute E'echo 1; echo 2; echo 3; echo bogus; echo 5'
on master
format 'text';
-- end_ignore
create or replace function workset_cleanup_test()
returns boolean as
$func$
declare
sub_work_set_cnt integer;
top_work_set_cnt integer;
begin
select count(*) into top_work_set_cnt from gp_toolkit.gp_workfile_entries;
for i in 1..2 loop
begin
select * from exttest a, exttest b;
exception when others then
raise notice 'caught exception: %', sqlerrm;
end;
select count(*) into sub_work_set_cnt from gp_toolkit.gp_workfile_entries;
end loop;
return top_work_set_cnt = sub_work_set_cnt;
end;
$func$ language plpgsql;
select workset_cleanup_test();
NOTICE: caught exception: invalid input syntax for type integer: "bogus"
NOTICE: caught exception: invalid input syntax for type integer: "bogus"
workset_cleanup_test
----------------------
t
(1 row)
drop table segspace_test_hj_skew;
reset enable_parallel;
-- don't disturb other processes.
select cleanupAllGangs();
cleanupallgangs
-----------------
t
(1 row)