| -- |
| -- Test queries mixes window functions with aggregate functions or grouping. |
| -- |
| set optimizer_trace_fallback=on; |
| DROP TABLE IF EXISTS test_group_window; |
| |
| CREATE TABLE test_group_window(c1 int, c2 int); |
| |
| WITH tt AS (SELECT * FROM test_group_window) |
| SELECT tt.c1, COUNT() over () as fraction |
| FROM tt |
| GROUP BY tt.c1 |
| ORDER BY tt.c1; |
| |
| DROP TABLE test_group_window; |
| |
| -- |
| -- Set up |
| -- |
| CREATE TABLE bfv_cte_foo AS SELECT i as a, i+1 as b from generate_series(1,10)i; |
| CREATE TABLE bfv_cte_bar AS SELECT i as c, i+1 as d from generate_series(1,10)i; |
| |
| -- |
| -- Test with CTE inlining disabled |
| -- |
| set optimizer_cte_inlining = off; |
| |
| -- |
| -- With clause select test 1 |
| -- |
| WITH t AS |
| ( |
| SELECT e.*,f.* |
| FROM |
| ( |
| SELECT * FROM bfv_cte_foo WHERE a < 10 |
| ) e |
| LEFT OUTER JOIN |
| ( |
| SELECT * FROM bfv_cte_bar WHERE c < 10 |
| ) f |
| |
| ON e.a = f.d ) |
| SELECT t.a,t.d, count(*) over () AS window |
| FROM t |
| GROUP BY t.a,t.d ORDER BY t.a,t.d LIMIT 2; |
| |
| -- |
| -- With clause select test 2 |
| -- |
| WITH t(a,b,d) AS |
| ( |
| SELECT bfv_cte_foo.a,bfv_cte_foo.b,bfv_cte_bar.d FROM bfv_cte_foo,bfv_cte_bar WHERE bfv_cte_foo.a = bfv_cte_bar.d |
| ) |
| SELECT t.b,avg(t.a), rank() OVER (PARTITION BY t.a ORDER BY t.a) FROM bfv_cte_foo,t GROUP BY bfv_cte_foo.a,bfv_cte_foo.b,t.b,t.a ORDER BY 1,2,3 LIMIT 5; |
| |
| -- |
| -- With clause select test 3 |
| -- |
| WITH t(a,b,d) AS |
| ( |
| SELECT bfv_cte_foo.a,bfv_cte_foo.b,bfv_cte_bar.d FROM bfv_cte_foo,bfv_cte_bar WHERE bfv_cte_foo.a = bfv_cte_bar.d |
| ) |
| SELECT cup.*, SUM(t.d) OVER(PARTITION BY t.b) FROM |
| ( |
| SELECT bfv_cte_bar.*, AVG(t.b) OVER(PARTITION BY t.a ORDER BY t.b desc) AS e FROM t,bfv_cte_bar |
| ) AS cup, |
| t WHERE cup.e < 10 |
| GROUP BY cup.c,cup.d, cup.e ,t.d, t.b |
| ORDER BY 1,2,3,4 |
| LIMIT 10; |
| |
| -- |
| -- With clause select test 4 |
| -- |
| WITH t(a,b,d) AS |
| ( |
| SELECT bfv_cte_foo.a,bfv_cte_foo.b,bfv_cte_bar.d FROM bfv_cte_foo,bfv_cte_bar WHERE bfv_cte_foo.a = bfv_cte_bar.d |
| ) |
| SELECT cup.*, SUM(t.d) FROM |
| ( |
| SELECT bfv_cte_bar.*, count(*) OVER() AS e FROM t,bfv_cte_bar WHERE t.a = bfv_cte_bar.c |
| ) AS cup, |
| t GROUP BY cup.c,cup.d, cup.e,t.a |
| HAVING AVG(t.d) < 10 ORDER BY 1,2,3,4 LIMIT 10; |
| |
| -- |
| -- With clause select test 5 |
| -- |
| WITH t(a,b,d) AS |
| ( |
| SELECT bfv_cte_foo.a,bfv_cte_foo.b,bfv_cte_bar.d FROM bfv_cte_foo,bfv_cte_bar WHERE bfv_cte_foo.a = bfv_cte_bar.d |
| ) |
| SELECT cup.*, SUM(t.d) OVER(PARTITION BY t.b) FROM |
| ( |
| SELECT bfv_cte_bar.c as e,r.d FROM |
| ( |
| SELECT t.d, avg(t.a) over() FROM t |
| ) r,bfv_cte_bar |
| ) AS cup, |
| t WHERE cup.e < 10 |
| GROUP BY cup.d, cup.e, t.d, t.b |
| ORDER BY 1,2,3 |
| LIMIT 10; |
| |
| -- |
| -- Test with CTE inlining enabled |
| -- |
| set optimizer_cte_inlining = on; |
| set optimizer_cte_inlining_bound = 1000; |
| |
| -- |
| -- With clause select test 1 |
| -- |
| WITH t AS |
| ( |
| SELECT e.*,f.* |
| FROM |
| ( |
| SELECT * FROM bfv_cte_foo WHERE a < 10 |
| ) e |
| LEFT OUTER JOIN |
| ( |
| SELECT * FROM bfv_cte_bar WHERE c < 10 |
| ) f |
| |
| ON e.a = f.d ) |
| SELECT t.a,t.d, count(*) over () AS window |
| FROM t |
| GROUP BY t.a,t.d ORDER BY t.a,t.d LIMIT 2; |
| |
| -- |
| -- With clause select test 2 |
| -- |
| WITH t(a,b,d) AS |
| ( |
| SELECT bfv_cte_foo.a,bfv_cte_foo.b,bfv_cte_bar.d FROM bfv_cte_foo,bfv_cte_bar WHERE bfv_cte_foo.a = bfv_cte_bar.d |
| ) |
| SELECT t.b,avg(t.a), rank() OVER (PARTITION BY t.a ORDER BY t.a) FROM bfv_cte_foo,t GROUP BY bfv_cte_foo.a,bfv_cte_foo.b,t.b,t.a ORDER BY 1,2,3 LIMIT 5; |
| |
| -- |
| -- With clause select test 3 |
| -- |
| WITH t(a,b,d) AS |
| ( |
| SELECT bfv_cte_foo.a,bfv_cte_foo.b,bfv_cte_bar.d FROM bfv_cte_foo,bfv_cte_bar WHERE bfv_cte_foo.a = bfv_cte_bar.d |
| ) |
| SELECT cup.*, SUM(t.d) OVER(PARTITION BY t.b) FROM |
| ( |
| SELECT bfv_cte_bar.*, AVG(t.b) OVER(PARTITION BY t.a ORDER BY t.b desc) AS e FROM t,bfv_cte_bar |
| ) AS cup, |
| t WHERE cup.e < 10 |
| GROUP BY cup.c,cup.d, cup.e ,t.d, t.b |
| ORDER BY 1,2,3,4 |
| LIMIT 10; |
| |
| -- |
| -- With clause select test 4 |
| -- |
| WITH t(a,b,d) AS |
| ( |
| SELECT bfv_cte_foo.a,bfv_cte_foo.b,bfv_cte_bar.d FROM bfv_cte_foo,bfv_cte_bar WHERE bfv_cte_foo.a = bfv_cte_bar.d |
| ) |
| SELECT cup.*, SUM(t.d) FROM |
| ( |
| SELECT bfv_cte_bar.*, count(*) OVER() AS e FROM t,bfv_cte_bar WHERE t.a = bfv_cte_bar.c |
| ) AS cup, |
| t GROUP BY cup.c,cup.d, cup.e,t.a |
| HAVING AVG(t.d) < 10 ORDER BY 1,2,3,4 LIMIT 10; |
| |
| -- |
| -- With clause select test 5 |
| -- |
| WITH t(a,b,d) AS |
| ( |
| SELECT bfv_cte_foo.a,bfv_cte_foo.b,bfv_cte_bar.d FROM bfv_cte_foo,bfv_cte_bar WHERE bfv_cte_foo.a = bfv_cte_bar.d |
| ) |
| SELECT cup.*, SUM(t.d) OVER(PARTITION BY t.b) FROM |
| ( |
| SELECT bfv_cte_bar.c as e,r.d FROM |
| ( |
| SELECT t.d, avg(t.a) over() FROM t |
| ) r,bfv_cte_bar |
| ) AS cup, |
| t WHERE cup.e < 10 |
| GROUP BY cup.d, cup.e, t.d, t.b |
| ORDER BY 1,2,3 |
| LIMIT 10; |
| |
| DROP TABLE IF EXISTS bfv_cte_foo; |
| DROP TABLE IF EXISTS bfv_cte_bar; |
| reset optimizer_cte_inlining; |
| reset optimizer_cte_inlining_bound; |
| |
| -- |
| -- Test for an old bug with rescanning window functions. This needs to use |
| -- catalog tables, otherwise the plan will contain a Motion node that |
| -- materializes the result, and masks the problem. |
| -- |
| with t (a,b,d) as (select 1,2,1 from pg_class limit 1) |
| SELECT cup.* FROM |
| t, ( |
| SELECT sum(t.b) OVER(PARTITION BY t.a ) AS e FROM (select 1 as a, 2 as b from pg_class limit 1)foo,t |
| ) as cup |
| GROUP BY cup.e; |
| |
| -- |
| -- Test for a bug in translating a CTE's locus to the outer query. |
| -- |
| -- This crashed at one point, when the code to translate the locus of |
| -- CTE subquery to the outer query's equivalence classes was broken. |
| -- The UNION ALL is a simple "pulled up" UNION ALL, creating an append |
| -- rel. |
| -- |
| create temp table bfv_cte_tab (t text, i int4, j int4) distributed randomly; |
| |
| insert into bfv_cte_tab values ('foo', 1, -1); |
| insert into bfv_cte_tab values ('bar', 2, -2); |
| |
| with |
| foo as (select t, sum(i) as n from bfv_cte_tab group by t), |
| bar as (select t, sum(j) as n from bfv_cte_tab group by t) |
| |
| select const_a, const_b, sum(n) |
| from |
| (select 'foo_a' as const_a, 'foo_b' as const_b, n from foo |
| union all |
| select 'bar_a' as const_a, 'bar_b' as const_b, n from bar |
| ) x |
| group by const_a, const_b |
| ; |
| |
| -- test cte can not be the param for gp_dist_random |
| -- so in set_cte_pathlist we do not neet to check forceDistRandom |
| create table ttt(tc1 int,tc2 int) ; |
| insert into ttt values(1,1); |
| insert into ttt values(2,2); |
| |
| WITH cte AS ( |
| SELECT oid, relname |
| FROM pg_class |
| WHERE oid <2000 |
| ) |
| SELECT * |
| FROM gp_dist_random('cte') |
| JOIN ttt ON cte.oid = ttt.tc2; |
| |
| -- |
| -- Test bug fix for reader-writer communication in Share Input Scan (SISC) |
| -- https://github.com/greenplum-db/gpdb/issues/16429 |
| -- |
| -- Helper function |
| CREATE OR REPLACE FUNCTION wait_until_query_output_to_file(file_path text) RETURNS VOID AS $$ |
| DECLARE |
| content TEXT; |
| line TEXT; |
| match TEXT; |
| BEGIN |
| LOOP |
| content := pg_read_file(file_path); |
| FOR line IN (SELECT unnest(string_to_array(content, E'\n'))) LOOP |
| match := substring(line FROM '\(([0-9]+) row[s]?\)'); |
| IF match IS NOT NULL THEN |
| RETURN; |
| END IF; |
| END LOOP; |
| PERFORM pg_sleep(0.1); -- Sleep for a short period before checking again |
| END LOOP; |
| END; |
| $$ LANGUAGE plpgsql; |
| |
| -- Setup tables |
| CREATE TABLE sisc_t1(a int, b int) DISTRIBUTED BY (a); |
| INSERT INTO sisc_t1 VALUES (1, 1), (2, 2), (5, 5); |
| ANALYZE sisc_t1; |
| CREATE TABLE sisc_t2(c int, d int) DISTRIBUTED BY (c); |
| -- Ensure that sisc_t2 has 0 tuple on seg2 |
| INSERT INTO sisc_t2 VALUES (1, 1), (2, 2); |
| ANALYZE sisc_t2; |
| |
| -- ORCA plan contains one SISC writer on slice1 and two readers on slice2 and slice4. |
| -- The Hash Join on slice4 has its hash side being empty on seg2, so the SISC |
| -- reader on the probe side will be squelched. Same applies to the Hash Join |
| -- on slice2 for seg2. |
| -- Similary, planner plan contains one SISC wirter on slice4 and one reader on slice2. |
| -- The Hash Join on slice2 has its hash side being empty on seg2, so the SISC |
| -- reader on the probe side will be squelched. |
| SET optimizer_enable_motion_broadcast TO off; |
| SET enable_parallel=off; |
| EXPLAIN (COSTS OFF) |
| WITH cte AS MATERIALIZED (SELECT * FROM sisc_t1) |
| SELECT y.a, z.a |
| FROM (SELECT cte1.a, cte1.b FROM cte cte1 JOIN sisc_t2 ON (cte1.a = sisc_t2.d)) y, |
| (SELECT cte2.a, cte2.b FROM cte cte2 JOIN sisc_t2 ON (cte2.a = sisc_t2.d)) z |
| WHERE y.b = z.b - 1; |
| |
| -- On seg2, introduce a delay in SISC WRITER (slice1) so that the xslice shared state |
| -- which is stored in shared memory is initialized by the SISC READER (slice2 or slice4) |
| select gp_inject_fault('get_shareinput_reference_delay_writer', 'suspend', dbid) from gp_segment_configuration where content = 2 and role = 'p'; |
| select gp_inject_fault('get_shareinput_reference_done', 'skip', dbid) from gp_segment_configuration where content = 2 and role = 'p'; |
| |
| -- Run the above CTE query again with additional debug logging. |
| -- Use debug_shareinput_xslice to verify that the SISC READER on seg2 is indeed |
| -- initialized and squelched before the SISC WRITER starts its execution. |
| -- |
| -- XXX: since this is a blocking query, we need to run it in a separate shell. |
| -- isolation2 test seems to be a more intuitive option, however, it lacks the |
| -- ability to redirect the LOGs as sterr into stdout. Hence directly using bash. |
| \! bash -c 'psql -X regression -c "set client_min_messages to log; set enable_parallel=off; set debug_shareinput_xslice to true; set optimizer_enable_motion_broadcast to off; WITH cte AS MATERIALIZED (SELECT * FROM sisc_t1) SELECT y.a, z.a FROM (SELECT cte1.a, cte1.b FROM cte cte1 JOIN sisc_t2 ON (cte1.a = sisc_t2.d)) y, (SELECT cte2.a, cte2.b FROM cte cte2 JOIN sisc_t2 ON (cte2.a = sisc_t2.d)) z WHERE y.b = z.b - 1;" &> /tmp/bfv_cte.out' & |
| |
| -- Wait for both SISC READERs to be initialized and squelched |
| select gp_wait_until_triggered_fault('get_shareinput_reference_done', 1, dbid) from gp_segment_configuration where content = 2 and role = 'p'; |
| select gp_inject_fault('get_shareinput_reference_done', 'reset', dbid) from gp_segment_configuration where content = 2 and role = 'p'; |
| select gp_inject_fault('get_shareinput_reference_delay_writer', 'reset', dbid) from gp_segment_configuration where content = 2 and role = 'p'; |
| |
| -- Wait for the query to finish |
| select wait_until_query_output_to_file('/tmp/bfv_cte.out'); |
| -- start_matchsubs |
| -- m/SISC READER \(shareid=0, slice=2\)/ |
| -- s/SISC READER \(shareid=0, slice=2\)/SISC READER (shareid=0, slice={2|4})/g |
| -- m/SISC READER \(shareid=0, slice=4\)/ |
| -- s/SISC READER \(shareid=0, slice=4\)/SISC READER (shareid=0, slice={2|4})/g |
| -- m/slice=2\): initialized xslice state/ |
| -- s/slice=2\): initialized xslice state/slice={2|4}): initialized xslice state/g |
| -- m/slice=4\): initialized xslice state/ |
| -- s/slice=4\): initialized xslice state/slice={2|4}): initialized xslice state/g |
| -- s/One-Time Filter: \(gp_execution_segment\(\) =\d+\)/One-Time Filter: \(gp_execution_segment\(\) =#\)/ |
| -- end_matchsubs |
| -- Filter out irrelevant LOG messages from segments other than seg2. |
| \! cat /tmp/bfv_cte.out | grep -P '^(?!LOG)|^(LOG.*seg2)' | grep -vP 'LOG.*fault|decreased xslice state refcount' |
| |
| -- cleanup |
| select gp_inject_fault_infinite('all', 'reset', dbid) from gp_segment_configuration; |
| RESET optimizer_enable_motion_broadcast; |
| RESET enable_parallel; |
| DROP TABLE sisc_t1; |
| DROP TABLE sisc_t2; |
| \! rm /tmp/bfv_cte.out |
| |
| -- The following tests ensures queries involving CTE don't hang in ORCA or fall back. |
| -- Query hang is caused by a mismatch between the number of CTE producers and |
| -- consumers. Either there are more producers than consumers, or more consumers than |
| -- producers, the unconsumed producers or the starved consumers would cause the query |
| -- to hang. Query fall back, on the other hand, is due to a missing plan that satisfies |
| -- all the required plan properties. The issues stated above are fixed by sending |
| -- appropriate distribution requests based on how data is distributed to begin with, |
| -- to ensure the number of CTE producers and consumers match. For each of the tests, |
| -- we use explain to verify the plan and the actual query to verify the query doesn't |
| -- hang. |
| |
| -- This test involves a tainted-replicated CTE and two consumer |
| -- To ensure there's no duplicate hazard, the cost model chooses a plan that gathers |
| -- the CTE onto the coordinator. Both the producer and the consumer executes on the |
| -- coordiator. There's 1 producer matching 1 consumer. |
| drop table if exists rep; |
| create table rep (i character varying(10)) distributed replicated; |
| |
| explain (analyze off, costs off, verbose off) |
| with cte1 as ( select *,row_number() over ( partition by i) as rank_desc from rep), |
| cte2 as ( select 'col1' tblnm,count(*) diffcnt from ( select * from cte1) x) |
| select * from ( select 'col1' tblnm from cte1) a left join cte2 c on a.tblnm=c.tblnm; |
| |
| with cte1 as ( select *,row_number() over ( partition by i) as rank_desc from rep), |
| cte2 as ( select 'col1' tblnm,count(*) diffcnt from ( select * from cte1) x) |
| select * from ( select 'col1' tblnm from cte1) a left join cte2 c on a.tblnm=c.tblnm; |
| drop table rep; |
| |
| -- This test involves a tainted-replicated CTE and two consumers |
| -- To ensure there's no duplicate hazard, the cost model chooses a plan that executes |
| -- the producer and both consumers on one segment, before gathering the data onto |
| -- the coordinator and return. There's 1 producer matching 1 consumer for each consumer. |
| drop table if exists rep1, rep2; |
| create table rep1 (id bigserial not null, isc varchar(15) not null,iscd varchar(15) null) distributed replicated; |
| create table rep2 (id numeric null, rc varchar(255) null,ri numeric null) distributed replicated; |
| insert into rep1 (isc,iscd) values ('cmn_bin_yes', 'cmn_bin_yes'); |
| insert into rep2 (id,rc,ri) values (113551,'cmn_bin_yes',101991), (113552,'cmn_bin_no',101991), (113553,'cmn_bin_err',101991), (113554,'cmn_bin_null',101991); |
| explain (analyze off, costs off, verbose off) |
| with |
| t1 as (select * from rep1), |
| t2 as (select id, rc from rep2 where ri = 101991) |
| select p.*from t1 p join t2 r on p.isc = r.rc join t2 r1 on p.iscd = r1.rc; |
| |
| with |
| t1 as (select * from rep1), |
| t2 as (select id, rc from rep2 where ri = 101991) |
| select p.*from t1 p join t2 r on p.isc = r.rc join t2 r1 on p.iscd = r1.rc limit 1; |
| drop table rep1, rep2; |
| |
| -- This test involves a strictly replicated CTE and two consumers |
| -- To ensure there's no duplicate hazard, the cost model chooses a plan that executes |
| -- the producer and both consumers on one segment by placing a one-time segment filter |
| -- on the producer side. The coordinator gathers data from all three segments, even |
| -- though only one segment has the tuples from the join due to the one-time filter. |
| -- There's 1 producer matching 1 consumer for each consumer. |
| drop table if exists t1, t2, rep; |
| create table t1 (a int, b int); |
| create table t2 (a int, b int); |
| create table rep (a int, b int) distributed replicated; |
| insert into t1 select 1, generate_series(1,10); |
| insert into t2 select 1, generate_series(1,20); |
| insert into rep select 1, 1; |
| |
| explain (analyze off, costs off, verbose off) |
| with t1_cte as (select b from t1), |
| rep_cte as (select a from rep) |
| select |
| case when (t2.b in (1,2)) then (select rep_cte.a from rep_cte) |
| when (t2.b in (1,2)) then (select rep_cte.a from rep_cte) |
| end as rep_cte_a |
| from t1_cte join t2 on t1_cte.b = t2.b; |
| |
| with t1_cte as (select b from t1), |
| rep_cte as (select a from rep) |
| select |
| case when (t2.b in (1,2)) then (select rep_cte.a from rep_cte) |
| when (t2.b in (1,2)) then (select rep_cte.a from rep_cte) |
| end as rep_cte_a |
| from t1_cte join t2 on t1_cte.b = t2.b; |
| drop table t1, t2, rep; |
| |
| -- |
| -- Test for a bug in ORCA optimizing a CTE view. |
| -- |
| -- This crashed at one point in retail build due to preprocessor creating a |
| -- duplicate CTE anchor. That led ORCA to construct a bad plan where |
| -- CTEConsumer project list contained an invalid scalar subplan and caused a |
| -- SIGSEGV during DXL to PlStmt translation. |
| -- |
| create table a_table(a smallint); |
| |
| create view cte_view as |
| with t1 as (select a from a_table) |
| select t1.a from t1 |
| where (t1.a = (select t1.a from t1)); |
| |
| -- Only by tampering with pg_stats directly I was able to guide ORCA cost model |
| -- to pick a plan that would crash before this fix |
| set allow_system_table_mods=true; |
| update pg_class set relpages = 1::int, reltuples = 12.0::real where relname = 'a_table'; |
| reset allow_system_table_mods; |
| |
| explain select * from a_table join cte_view on a_table.a = (select a from cte_view) where cte_view.a = 2024; |
| |
| -- CTE tests with outer references. Ensure Orca produces an inlined plan in these cases rather than falling back to planner |
| drop table if exists foo; |
| drop table if exists jazz; |
| create table foo (a int) distributed by (a); |
| create table jazz (a int) distributed by (a); |
| insert into foo values (2); |
| insert into jazz values (2); |
| analyze foo,jazz; |
| explain (COSTS OFF) select ((with cte as (select * from jazz) select 1 from cte cte1, cte cte2 where foo.a = 2)) as t FROM foo; |
| select ((with cte as (select * from jazz) select 1 from cte cte1, cte cte2 where foo.a = 2)) as t FROM foo; |
| |
| -- outer ref in limit |
| explain (COSTS OFF) select ((with cte as (select * from jazz) select 1 from cte cte1, cte cte2 limit foo.a)) as t FROM foo; |
| select ((with cte as (select * from jazz) select 1 from cte cte1, cte cte2 limit foo.a)) as t FROM foo; |
| |
| reset optimizer_trace_fallback; |