| -- |
| -- Queries that lead to hanging (not dead lock) when we don't handle synchronization properly in shared scan |
| -- Queries that lead to wrong result when we don't finish executing the subtree below the shared scan being squelched. |
| -- |
| |
| CREATE SCHEMA shared_scan; |
| |
| SET search_path = shared_scan; |
| |
| CREATE TABLE foo (a int, b int); |
| CREATE TABLE bar (c int, d int); |
| CREATE TABLE jazz(e int, f int); |
| |
| INSERT INTO foo values (1, 2); |
| INSERT INTO bar SELECT i, i from generate_series(1, 100)i; |
| INSERT INTO jazz VALUES (2, 2), (3, 3); |
| |
| ANALYZE foo; |
| ANALYZE bar; |
| ANALYZE jazz; |
| |
| SELECT $query$ |
| SELECT * FROM |
| ( |
| WITH cte AS (SELECT * FROM foo) |
| SELECT * FROM (SELECT * FROM cte UNION ALL SELECT * FROM cte) |
| AS X |
| JOIN bar ON b = c |
| ) AS XY |
| JOIN jazz on c = e AND b = f; |
| $query$ AS qry \gset |
| |
| -- We are very particular about this plan shape and data distribution with ORCA: |
| -- 1. `jazz` has to be the inner table of the outer HASH JOIN, so that on a |
| -- segment which has zero tuples in `jazz`, the Sequence node that contains the |
| -- Shared Scan will be squelched on that segment. If `jazz` is not on the inner |
| -- side, the above mentioned "hang" scenario will not be covered. |
| -- 2. The Shared Scan producer has to be on a different slice from consumers, |
| -- and some tuples coming out of the Share Scan producer on one segments are |
| -- redistributed to a different segment over Motion. If not, the above mentioned |
| -- "wrong result" scenario will not be covered. |
| EXPLAIN (COSTS OFF) |
| :qry ; |
| |
| SET statement_timeout = '15s'; |
| |
| :qry ; |
| |
| RESET statement_timeout; |
| |
| SELECT COUNT(*) |
| FROM (SELECT *, |
| ( |
| WITH cte AS (SELECT * FROM jazz WHERE jazz.e = bar.c) |
| SELECT 1 FROM cte c1, cte c2 |
| ) |
| FROM bar) as s; |
| |
| CREATE TABLE t1 (a int, b int); |
| CREATE TABLE t2 (a int); |
| |
| -- ORCA plan contains a Shared Scan producer with a unsorted Motion below it |
| EXPLAIN (COSTS OFF) |
| WITH cte AS (SELECT * FROM t1 WHERE random() < 0.1 LIMIT 10) SELECT a, 1, 1 FROM cte JOIN t2 USING (a); |
| -- This functions returns one more column than expected. |
| CREATE OR REPLACE FUNCTION col_mismatch_func1() RETURNS TABLE (field1 int, field2 int) |
| LANGUAGE 'plpgsql' VOLATILE STRICT AS |
| $$ |
| DECLARE |
| v_qry text; |
| BEGIN |
| v_qry := 'WITH cte AS (SELECT * FROM t1 WHERE random() < 0.1 LIMIT 10) SELECT a, 1 , 1 FROM cte JOIN t2 USING (a)'; |
| RETURN QUERY EXECUTE v_qry; |
| END |
| $$; |
| |
| -- This should only ERROR and should not SIGSEGV |
| SELECT col_mismatch_func1(); |
| |
| -- ORCA plan contains a Shared Scan producer with a sorted Motion below it |
| EXPLAIN (COSTS OFF) |
| WITH cte AS (SELECT * FROM t1 WHERE random() < 0.1 ORDER BY b LIMIT 10) SELECT a, 1, 1 FROM cte JOIN t2 USING (a); |
| --- This functions returns one more column than expected. |
| CREATE OR REPLACE FUNCTION col_mismatch_func2() RETURNS TABLE (field1 int, field2 int) |
| LANGUAGE 'plpgsql' VOLATILE STRICT AS |
| $$ |
| DECLARE |
| v_qry text; |
| BEGIN |
| v_qry := 'WITH cte AS (SELECT * FROM t1 WHERE random() < 0.1 ORDER BY b LIMIT 10) SELECT a, 1 , 1 FROM cte JOIN t2 USING (a)'; |
| RETURN QUERY EXECUTE v_qry; |
| END |
| $$; |
| |
| -- This should only ERROR and should not SIGSEGV |
| SELECT col_mismatch_func2(); |
| |
| -- https://github.com/greenplum-db/gpdb/issues/12701 |
| -- Disable cte sharing in subquery |
| drop table if exists pk_list; |
| create table pk_list (id int, schema_name varchar, table_name varchar) distributed by (id); |
| drop table if exists calender; |
| create table calender (id int, data_hour timestamp) distributed by (id); |
| |
| explain (costs off) |
| with |
| tbls as (select distinct schema_name, table_name as table_nm from pk_list), |
| tbls_daily_report_23 as (select unnest(string_to_array('mart_cm.card' ,',')) as table_nm_23), |
| tbls_w_onl_actl_data as (select unnest(string_to_array('mart_cm.cont_resp,mart_cm.card', ',')) as table_nm_onl_act) |
| select data_hour, stat.schema_name as schema_nm, dt.table_nm |
| from ( |
| select * from calender c |
| cross join tbls |
| ) dt |
| inner join ( |
| select tbls.schema_name, tbls.table_nm as table_name |
| from tbls tbls |
| ) stat on dt.table_nm = stat.table_name |
| where |
| (data_hour = date_trunc('day',data_hour) and stat.schema_name || '.' ||stat.table_name not in (select table_nm_23 from tbls_daily_report_23)) |
| and (stat.schema_name || '.' ||stat.table_name not in (select table_nm_onl_act from tbls_w_onl_actl_data)) |
| or (stat.schema_name || '.' ||stat.table_name in (select table_nm_onl_act from tbls_w_onl_actl_data)); |