blob: 6a7b15874aece644149d3f0b7ed3599a07dd2487 [file] [log] [blame]
--
-- Test queries mixes window functions with aggregate functions or grouping.
--
set optimizer_trace_fallback=on;
DROP TABLE IF EXISTS test_group_window;
CREATE TABLE test_group_window(c1 int, c2 int);
WITH tt AS (SELECT * FROM test_group_window)
SELECT tt.c1, COUNT() over () as fraction
FROM tt
GROUP BY tt.c1
ORDER BY tt.c1;
DROP TABLE test_group_window;
--
-- Set up
--
CREATE TABLE bfv_cte_foo AS SELECT i as a, i+1 as b from generate_series(1,10)i;
CREATE TABLE bfv_cte_bar AS SELECT i as c, i+1 as d from generate_series(1,10)i;
--
-- Test with CTE inlining disabled
--
set optimizer_cte_inlining = off;
--
-- With clause select test 1
--
WITH t AS
(
SELECT e.*,f.*
FROM
(
SELECT * FROM bfv_cte_foo WHERE a < 10
) e
LEFT OUTER JOIN
(
SELECT * FROM bfv_cte_bar WHERE c < 10
) f
ON e.a = f.d )
SELECT t.a,t.d, count(*) over () AS window
FROM t
GROUP BY t.a,t.d ORDER BY t.a,t.d LIMIT 2;
--
-- With clause select test 2
--
WITH t(a,b,d) AS
(
SELECT bfv_cte_foo.a,bfv_cte_foo.b,bfv_cte_bar.d FROM bfv_cte_foo,bfv_cte_bar WHERE bfv_cte_foo.a = bfv_cte_bar.d
)
SELECT t.b,avg(t.a), rank() OVER (PARTITION BY t.a ORDER BY t.a) FROM bfv_cte_foo,t GROUP BY bfv_cte_foo.a,bfv_cte_foo.b,t.b,t.a ORDER BY 1,2,3 LIMIT 5;
--
-- With clause select test 3
--
WITH t(a,b,d) AS
(
SELECT bfv_cte_foo.a,bfv_cte_foo.b,bfv_cte_bar.d FROM bfv_cte_foo,bfv_cte_bar WHERE bfv_cte_foo.a = bfv_cte_bar.d
)
SELECT cup.*, SUM(t.d) OVER(PARTITION BY t.b) FROM
(
SELECT bfv_cte_bar.*, AVG(t.b) OVER(PARTITION BY t.a ORDER BY t.b desc) AS e FROM t,bfv_cte_bar
) AS cup,
t WHERE cup.e < 10
GROUP BY cup.c,cup.d, cup.e ,t.d, t.b
ORDER BY 1,2,3,4
LIMIT 10;
--
-- With clause select test 4
--
WITH t(a,b,d) AS
(
SELECT bfv_cte_foo.a,bfv_cte_foo.b,bfv_cte_bar.d FROM bfv_cte_foo,bfv_cte_bar WHERE bfv_cte_foo.a = bfv_cte_bar.d
)
SELECT cup.*, SUM(t.d) FROM
(
SELECT bfv_cte_bar.*, count(*) OVER() AS e FROM t,bfv_cte_bar WHERE t.a = bfv_cte_bar.c
) AS cup,
t GROUP BY cup.c,cup.d, cup.e,t.a
HAVING AVG(t.d) < 10 ORDER BY 1,2,3,4 LIMIT 10;
--
-- With clause select test 5
--
WITH t(a,b,d) AS
(
SELECT bfv_cte_foo.a,bfv_cte_foo.b,bfv_cte_bar.d FROM bfv_cte_foo,bfv_cte_bar WHERE bfv_cte_foo.a = bfv_cte_bar.d
)
SELECT cup.*, SUM(t.d) OVER(PARTITION BY t.b) FROM
(
SELECT bfv_cte_bar.c as e,r.d FROM
(
SELECT t.d, avg(t.a) over() FROM t
) r,bfv_cte_bar
) AS cup,
t WHERE cup.e < 10
GROUP BY cup.d, cup.e, t.d, t.b
ORDER BY 1,2,3
LIMIT 10;
--
-- Test with CTE inlining enabled
--
set optimizer_cte_inlining = on;
set optimizer_cte_inlining_bound = 1000;
--
-- With clause select test 1
--
WITH t AS
(
SELECT e.*,f.*
FROM
(
SELECT * FROM bfv_cte_foo WHERE a < 10
) e
LEFT OUTER JOIN
(
SELECT * FROM bfv_cte_bar WHERE c < 10
) f
ON e.a = f.d )
SELECT t.a,t.d, count(*) over () AS window
FROM t
GROUP BY t.a,t.d ORDER BY t.a,t.d LIMIT 2;
--
-- With clause select test 2
--
WITH t(a,b,d) AS
(
SELECT bfv_cte_foo.a,bfv_cte_foo.b,bfv_cte_bar.d FROM bfv_cte_foo,bfv_cte_bar WHERE bfv_cte_foo.a = bfv_cte_bar.d
)
SELECT t.b,avg(t.a), rank() OVER (PARTITION BY t.a ORDER BY t.a) FROM bfv_cte_foo,t GROUP BY bfv_cte_foo.a,bfv_cte_foo.b,t.b,t.a ORDER BY 1,2,3 LIMIT 5;
--
-- With clause select test 3
--
WITH t(a,b,d) AS
(
SELECT bfv_cte_foo.a,bfv_cte_foo.b,bfv_cte_bar.d FROM bfv_cte_foo,bfv_cte_bar WHERE bfv_cte_foo.a = bfv_cte_bar.d
)
SELECT cup.*, SUM(t.d) OVER(PARTITION BY t.b) FROM
(
SELECT bfv_cte_bar.*, AVG(t.b) OVER(PARTITION BY t.a ORDER BY t.b desc) AS e FROM t,bfv_cte_bar
) AS cup,
t WHERE cup.e < 10
GROUP BY cup.c,cup.d, cup.e ,t.d, t.b
ORDER BY 1,2,3,4
LIMIT 10;
--
-- With clause select test 4
--
WITH t(a,b,d) AS
(
SELECT bfv_cte_foo.a,bfv_cte_foo.b,bfv_cte_bar.d FROM bfv_cte_foo,bfv_cte_bar WHERE bfv_cte_foo.a = bfv_cte_bar.d
)
SELECT cup.*, SUM(t.d) FROM
(
SELECT bfv_cte_bar.*, count(*) OVER() AS e FROM t,bfv_cte_bar WHERE t.a = bfv_cte_bar.c
) AS cup,
t GROUP BY cup.c,cup.d, cup.e,t.a
HAVING AVG(t.d) < 10 ORDER BY 1,2,3,4 LIMIT 10;
--
-- With clause select test 5
--
WITH t(a,b,d) AS
(
SELECT bfv_cte_foo.a,bfv_cte_foo.b,bfv_cte_bar.d FROM bfv_cte_foo,bfv_cte_bar WHERE bfv_cte_foo.a = bfv_cte_bar.d
)
SELECT cup.*, SUM(t.d) OVER(PARTITION BY t.b) FROM
(
SELECT bfv_cte_bar.c as e,r.d FROM
(
SELECT t.d, avg(t.a) over() FROM t
) r,bfv_cte_bar
) AS cup,
t WHERE cup.e < 10
GROUP BY cup.d, cup.e, t.d, t.b
ORDER BY 1,2,3
LIMIT 10;
DROP TABLE IF EXISTS bfv_cte_foo;
DROP TABLE IF EXISTS bfv_cte_bar;
reset optimizer_cte_inlining;
reset optimizer_cte_inlining_bound;
--
-- Test for an old bug with rescanning window functions. This needs to use
-- catalog tables, otherwise the plan will contain a Motion node that
-- materializes the result, and masks the problem.
--
with t (a,b,d) as (select 1,2,1 from pg_class limit 1)
SELECT cup.* FROM
t, (
SELECT sum(t.b) OVER(PARTITION BY t.a ) AS e FROM (select 1 as a, 2 as b from pg_class limit 1)foo,t
) as cup
GROUP BY cup.e;
--
-- Test for a bug in translating a CTE's locus to the outer query.
--
-- This crashed at one point, when the code to translate the locus of
-- CTE subquery to the outer query's equivalence classes was broken.
-- The UNION ALL is a simple "pulled up" UNION ALL, creating an append
-- rel.
--
create temp table bfv_cte_tab (t text, i int4, j int4) distributed randomly;
insert into bfv_cte_tab values ('foo', 1, -1);
insert into bfv_cte_tab values ('bar', 2, -2);
with
foo as (select t, sum(i) as n from bfv_cte_tab group by t),
bar as (select t, sum(j) as n from bfv_cte_tab group by t)
select const_a, const_b, sum(n)
from
(select 'foo_a' as const_a, 'foo_b' as const_b, n from foo
union all
select 'bar_a' as const_a, 'bar_b' as const_b, n from bar
) x
group by const_a, const_b
;
-- test cte can not be the param for gp_dist_random
-- so in set_cte_pathlist we do not neet to check forceDistRandom
create table ttt(tc1 int,tc2 int) ;
insert into ttt values(1,1);
insert into ttt values(2,2);
WITH cte AS (
SELECT oid, relname
FROM pg_class
WHERE oid <2000
)
SELECT *
FROM gp_dist_random('cte')
JOIN ttt ON cte.oid = ttt.tc2;
--
-- Test bug fix for reader-writer communication in Share Input Scan (SISC)
-- https://github.com/greenplum-db/gpdb/issues/16429
--
-- Helper function
CREATE OR REPLACE FUNCTION wait_until_query_output_to_file(file_path text) RETURNS VOID AS $$
DECLARE
content TEXT;
line TEXT;
match TEXT;
BEGIN
LOOP
content := pg_read_file(file_path);
FOR line IN (SELECT unnest(string_to_array(content, E'\n'))) LOOP
match := substring(line FROM '\(([0-9]+) row[s]?\)');
IF match IS NOT NULL THEN
RETURN;
END IF;
END LOOP;
PERFORM pg_sleep(0.1); -- Sleep for a short period before checking again
END LOOP;
END;
$$ LANGUAGE plpgsql;
-- Setup tables
CREATE TABLE sisc_t1(a int, b int) DISTRIBUTED BY (a);
INSERT INTO sisc_t1 VALUES (1, 1), (2, 2), (5, 5);
ANALYZE sisc_t1;
CREATE TABLE sisc_t2(c int, d int) DISTRIBUTED BY (c);
-- Ensure that sisc_t2 has 0 tuple on seg2
INSERT INTO sisc_t2 VALUES (1, 1), (2, 2);
ANALYZE sisc_t2;
-- ORCA plan contains one SISC writer on slice1 and two readers on slice2 and slice4.
-- The Hash Join on slice4 has its hash side being empty on seg2, so the SISC
-- reader on the probe side will be squelched. Same applies to the Hash Join
-- on slice2 for seg2.
-- Similary, planner plan contains one SISC wirter on slice4 and one reader on slice2.
-- The Hash Join on slice2 has its hash side being empty on seg2, so the SISC
-- reader on the probe side will be squelched.
SET optimizer_enable_motion_broadcast TO off;
SET enable_parallel=off;
EXPLAIN (COSTS OFF)
WITH cte AS MATERIALIZED (SELECT * FROM sisc_t1)
SELECT y.a, z.a
FROM (SELECT cte1.a, cte1.b FROM cte cte1 JOIN sisc_t2 ON (cte1.a = sisc_t2.d)) y,
(SELECT cte2.a, cte2.b FROM cte cte2 JOIN sisc_t2 ON (cte2.a = sisc_t2.d)) z
WHERE y.b = z.b - 1;
-- On seg2, introduce a delay in SISC WRITER (slice1) so that the xslice shared state
-- which is stored in shared memory is initialized by the SISC READER (slice2 or slice4)
select gp_inject_fault('get_shareinput_reference_delay_writer', 'suspend', dbid) from gp_segment_configuration where content = 2 and role = 'p';
select gp_inject_fault('get_shareinput_reference_done', 'skip', dbid) from gp_segment_configuration where content = 2 and role = 'p';
-- Run the above CTE query again with additional debug logging.
-- Use debug_shareinput_xslice to verify that the SISC READER on seg2 is indeed
-- initialized and squelched before the SISC WRITER starts its execution.
--
-- XXX: since this is a blocking query, we need to run it in a separate shell.
-- isolation2 test seems to be a more intuitive option, however, it lacks the
-- ability to redirect the LOGs as sterr into stdout. Hence directly using bash.
\! bash -c 'psql -X regression -c "set client_min_messages to log; set enable_parallel=off; set debug_shareinput_xslice to true; set optimizer_enable_motion_broadcast to off; WITH cte AS MATERIALIZED (SELECT * FROM sisc_t1) SELECT y.a, z.a FROM (SELECT cte1.a, cte1.b FROM cte cte1 JOIN sisc_t2 ON (cte1.a = sisc_t2.d)) y, (SELECT cte2.a, cte2.b FROM cte cte2 JOIN sisc_t2 ON (cte2.a = sisc_t2.d)) z WHERE y.b = z.b - 1;" &> /tmp/bfv_cte.out' &
-- Wait for both SISC READERs to be initialized and squelched
select gp_wait_until_triggered_fault('get_shareinput_reference_done', 1, dbid) from gp_segment_configuration where content = 2 and role = 'p';
select gp_inject_fault('get_shareinput_reference_done', 'reset', dbid) from gp_segment_configuration where content = 2 and role = 'p';
select gp_inject_fault('get_shareinput_reference_delay_writer', 'reset', dbid) from gp_segment_configuration where content = 2 and role = 'p';
-- Wait for the query to finish
select wait_until_query_output_to_file('/tmp/bfv_cte.out');
-- start_matchsubs
-- m/SISC READER \(shareid=0, slice=2\)/
-- s/SISC READER \(shareid=0, slice=2\)/SISC READER (shareid=0, slice={2|4})/g
-- m/SISC READER \(shareid=0, slice=4\)/
-- s/SISC READER \(shareid=0, slice=4\)/SISC READER (shareid=0, slice={2|4})/g
-- m/slice=2\): initialized xslice state/
-- s/slice=2\): initialized xslice state/slice={2|4}): initialized xslice state/g
-- m/slice=4\): initialized xslice state/
-- s/slice=4\): initialized xslice state/slice={2|4}): initialized xslice state/g
-- s/One-Time Filter: \(gp_execution_segment\(\) =\d+\)/One-Time Filter: \(gp_execution_segment\(\) =#\)/
-- end_matchsubs
-- Filter out irrelevant LOG messages from segments other than seg2.
\! cat /tmp/bfv_cte.out | grep -P '^(?!LOG)|^(LOG.*seg2)' | grep -vP 'LOG.*fault|decreased xslice state refcount'
-- cleanup
select gp_inject_fault_infinite('all', 'reset', dbid) from gp_segment_configuration;
RESET optimizer_enable_motion_broadcast;
RESET enable_parallel;
DROP TABLE sisc_t1;
DROP TABLE sisc_t2;
\! rm /tmp/bfv_cte.out
-- The following tests ensures queries involving CTE don't hang in ORCA or fall back.
-- Query hang is caused by a mismatch between the number of CTE producers and
-- consumers. Either there are more producers than consumers, or more consumers than
-- producers, the unconsumed producers or the starved consumers would cause the query
-- to hang. Query fall back, on the other hand, is due to a missing plan that satisfies
-- all the required plan properties. The issues stated above are fixed by sending
-- appropriate distribution requests based on how data is distributed to begin with,
-- to ensure the number of CTE producers and consumers match. For each of the tests,
-- we use explain to verify the plan and the actual query to verify the query doesn't
-- hang.
-- This test involves a tainted-replicated CTE and two consumer
-- To ensure there's no duplicate hazard, the cost model chooses a plan that gathers
-- the CTE onto the coordinator. Both the producer and the consumer executes on the
-- coordiator. There's 1 producer matching 1 consumer.
drop table if exists rep;
create table rep (i character varying(10)) distributed replicated;
explain (analyze off, costs off, verbose off)
with cte1 as ( select *,row_number() over ( partition by i) as rank_desc from rep),
cte2 as ( select 'col1' tblnm,count(*) diffcnt from ( select * from cte1) x)
select * from ( select 'col1' tblnm from cte1) a left join cte2 c on a.tblnm=c.tblnm;
with cte1 as ( select *,row_number() over ( partition by i) as rank_desc from rep),
cte2 as ( select 'col1' tblnm,count(*) diffcnt from ( select * from cte1) x)
select * from ( select 'col1' tblnm from cte1) a left join cte2 c on a.tblnm=c.tblnm;
drop table rep;
-- This test involves a tainted-replicated CTE and two consumers
-- To ensure there's no duplicate hazard, the cost model chooses a plan that executes
-- the producer and both consumers on one segment, before gathering the data onto
-- the coordinator and return. There's 1 producer matching 1 consumer for each consumer.
drop table if exists rep1, rep2;
create table rep1 (id bigserial not null, isc varchar(15) not null,iscd varchar(15) null) distributed replicated;
create table rep2 (id numeric null, rc varchar(255) null,ri numeric null) distributed replicated;
insert into rep1 (isc,iscd) values ('cmn_bin_yes', 'cmn_bin_yes');
insert into rep2 (id,rc,ri) values (113551,'cmn_bin_yes',101991), (113552,'cmn_bin_no',101991), (113553,'cmn_bin_err',101991), (113554,'cmn_bin_null',101991);
explain (analyze off, costs off, verbose off)
with
t1 as (select * from rep1),
t2 as (select id, rc from rep2 where ri = 101991)
select p.*from t1 p join t2 r on p.isc = r.rc join t2 r1 on p.iscd = r1.rc;
with
t1 as (select * from rep1),
t2 as (select id, rc from rep2 where ri = 101991)
select p.*from t1 p join t2 r on p.isc = r.rc join t2 r1 on p.iscd = r1.rc limit 1;
drop table rep1, rep2;
-- This test involves a strictly replicated CTE and two consumers
-- To ensure there's no duplicate hazard, the cost model chooses a plan that executes
-- the producer and both consumers on one segment by placing a one-time segment filter
-- on the producer side. The coordinator gathers data from all three segments, even
-- though only one segment has the tuples from the join due to the one-time filter.
-- There's 1 producer matching 1 consumer for each consumer.
drop table if exists t1, t2, rep;
create table t1 (a int, b int);
create table t2 (a int, b int);
create table rep (a int, b int) distributed replicated;
insert into t1 select 1, generate_series(1,10);
insert into t2 select 1, generate_series(1,20);
insert into rep select 1, 1;
explain (analyze off, costs off, verbose off)
with t1_cte as (select b from t1),
rep_cte as (select a from rep)
select
case when (t2.b in (1,2)) then (select rep_cte.a from rep_cte)
when (t2.b in (1,2)) then (select rep_cte.a from rep_cte)
end as rep_cte_a
from t1_cte join t2 on t1_cte.b = t2.b;
with t1_cte as (select b from t1),
rep_cte as (select a from rep)
select
case when (t2.b in (1,2)) then (select rep_cte.a from rep_cte)
when (t2.b in (1,2)) then (select rep_cte.a from rep_cte)
end as rep_cte_a
from t1_cte join t2 on t1_cte.b = t2.b;
drop table t1, t2, rep;
--
-- Test for a bug in ORCA optimizing a CTE view.
--
-- This crashed at one point in retail build due to preprocessor creating a
-- duplicate CTE anchor. That led ORCA to construct a bad plan where
-- CTEConsumer project list contained an invalid scalar subplan and caused a
-- SIGSEGV during DXL to PlStmt translation.
--
create table a_table(a smallint);
create view cte_view as
with t1 as (select a from a_table)
select t1.a from t1
where (t1.a = (select t1.a from t1));
-- Only by tampering with pg_stats directly I was able to guide ORCA cost model
-- to pick a plan that would crash before this fix
set allow_system_table_mods=true;
update pg_class set relpages = 1::int, reltuples = 12.0::real where relname = 'a_table';
reset allow_system_table_mods;
explain select * from a_table join cte_view on a_table.a = (select a from cte_view) where cte_view.a = 2024;
-- CTE tests with outer references. Ensure Orca produces an inlined plan in these cases rather than falling back to planner
drop table if exists foo;
drop table if exists jazz;
create table foo (a int) distributed by (a);
create table jazz (a int) distributed by (a);
insert into foo values (2);
insert into jazz values (2);
analyze foo,jazz;
explain (COSTS OFF) select ((with cte as (select * from jazz) select 1 from cte cte1, cte cte2 where foo.a = 2)) as t FROM foo;
select ((with cte as (select * from jazz) select 1 from cte cte1, cte cte2 where foo.a = 2)) as t FROM foo;
-- outer ref in limit
explain (COSTS OFF) select ((with cte as (select * from jazz) select 1 from cte cte1, cte cte2 limit foo.a)) as t FROM foo;
select ((with cte as (select * from jazz) select 1 from cte cte1, cte cte2 limit foo.a)) as t FROM foo;
reset optimizer_trace_fallback;