blob: ecf5c26e03a084d49fd858034c4080debc1bfd7c [file] [log] [blame]
-- Tests exercising different behaviour of the WITH RECURSIVE implementation in GPDB
-- GPDB's distributed nature requires thorough testing of many use cases in order to ensure correctness
-- Setup
create schema recursive_cte;
set search_path=recursive_cte;
create table recursive_table_1(id int);
insert into recursive_table_1 values (1), (2), (100);
-- Test the featureblocking GUC for recursive CTE
set gp_recursive_cte to off;
with recursive r(i) as (
select 1
union all
select i + 1 from r
)
select * from recursive_table_1 where recursive_table_1.id IN (select * from r limit 10);
ERROR: RECURSIVE clauses in WITH queries are currently disabled
HINT: In order to use recursive CTEs, "gp_recursive_cte" must be turned on.
set gp_recursive_cte to on;
-- WITH RECURSIVE ref used with IN without correlation
with recursive r(i) as (
select 1
union all
select i + 1 from r
)
select * from recursive_table_1 where recursive_table_1.id IN (select * from r limit 10);
id
----
1
2
(2 rows)
-- WITH RECURSIVE ref used with NOT IN without correlation
with recursive r(i) as (
select 1
union all
select i + 1 from r
)
select * from recursive_table_1 where recursive_table_1.id NOT IN (select * from r limit 10);
id
-----
100
(1 row)
-- WITH RECURSIVE ref used with EXISTS without correlation
with recursive r(i) as (
select 1
union all
select i + 1 from r
)
select * from recursive_table_1 where EXISTS (select * from r limit 10);
id
-----
1
2
100
(3 rows)
-- WITH RECURSIVE ref used with NOT EXISTS without correlation
with recursive r(i) as (
select 1
union all
select i + 1 from r
)
select * from recursive_table_1 where NOT EXISTS (select * from r limit 10);
id
----
(0 rows)
create table recursive_table_2(id int);
insert into recursive_table_2 values (11) , (21), (31);
-- WITH RECURSIVE ref used with IN & correlation
with recursive r(i) as (
select * from recursive_table_2
union all
select r.i + 1 from r, recursive_table_2 where r.i = recursive_table_2.id
)
select recursive_table_1.id from recursive_table_1, recursive_table_2 where recursive_table_1.id IN (select * from r where r.i = recursive_table_2.id);
id
----
(0 rows)
-- WITH RECURSIVE ref used with NOT IN & correlation
with recursive r(i) as (
select * from recursive_table_2
union all
select r.i + 1 from r, recursive_table_2 where r.i = recursive_table_2.id
)
select recursive_table_1.id from recursive_table_1, recursive_table_2 where recursive_table_1.id NOT IN (select * from r where r.i = recursive_table_2.id);
id
-----
1
1
1
2
2
2
100
100
100
(9 rows)
-- WITH RECURSIVE ref used with EXISTS & correlation
with recursive r(i) as (
select * from recursive_table_2
union all
select r.i + 1 from r, recursive_table_2 where r.i = recursive_table_2.id
)
select recursive_table_1.id from recursive_table_1, recursive_table_2 where recursive_table_1.id = recursive_table_2.id and EXISTS (select * from r where r.i = recursive_table_2.id);
id
----
(0 rows)
-- WITH RECURSIVE ref used with NOT EXISTS & correlation
with recursive r(i) as (
select * from recursive_table_2
union all
select r.i + 1 from r, recursive_table_2 where r.i = recursive_table_2.id
)
select recursive_table_1.id from recursive_table_1, recursive_table_2 where recursive_table_1.id = recursive_table_2.id and NOT EXISTS (select * from r where r.i = recursive_table_2.id);
id
----
(0 rows)
-- WITH RECURSIVE ref used within a Expression sublink
with recursive r(i) as (
select 1
union all
select i + 1 from r
)
select * from recursive_table_1 where recursive_table_1.id >= (select i from r limit 1) order by recursive_table_1.id;
id
-----
1
2
100
(3 rows)
-- WITH RECURSIVE ref used within an EXISTS subquery in another recursive CTE
with recursive
r(i) as (
select 1
union all
select r.i + 1 from r, recursive_table_2 where i = recursive_table_2.id
),
y(i) as (
select 1
union all
select i + 1 from y, recursive_table_1 where i = recursive_table_1.id and EXISTS (select * from r limit 10)
)
select * from y limit 10;
i
---
1
2
3
(3 rows)
-- WITH RECURSIVE ref used within a NOT EXISTS subquery in another recursive CTE
with recursive
r(i) as (
select 1
union all
select r.i + 1 from r, recursive_table_2 where i = recursive_table_2.id
),
y(i) as (
select 1
union all
select i + 1 from y, recursive_table_1 where i = recursive_table_1.id and NOT EXISTS (select * from r limit 10)
)
select * from y limit 10;
i
---
1
(1 row)
-- WITH RECURSIVE ref used within an IN subquery in a non-recursive CTE
with recursive
r(i) as (
select 1
union all
select r.i + 1 from r, recursive_table_2 where i = recursive_table_2.id
),
y as (
select * from recursive_table_1 where recursive_table_1.id IN (select * from r limit 10)
)
select * from y;
id
----
1
(1 row)
-- WITH RECURSIVE ref used within a NOT IN subquery in a non-recursive CTE
with recursive
r(i) as (
select 1
union all
select r.i + 1 from r, recursive_table_2 where i = recursive_table_2.id
),
y as (
select * from recursive_table_1 where recursive_table_1.id NOT IN (select * from r limit 10)
)
select * from y;
id
-----
2
100
(2 rows)
-- WITH RECURSIVE ref used within an EXISTS subquery in a non-recursive CTE
with recursive
r(i) as (
select 1
union all
select r.i + 1 from r, recursive_table_2 where i = recursive_table_2.id
),
y as (
select * from recursive_table_1 where EXISTS (select * from r limit 10)
)
select * from y;
id
-----
1
2
100
(3 rows)
-- WITH RECURSIVE ref used within a NOT EXISTS subquery in a non-recursive CTE
with recursive
r(i) as (
select 1
union all
select r.i + 1 from r, recursive_table_2 where i = recursive_table_2.id
),
y as (
select * from recursive_table_1 where NOT EXISTS (select * from r limit 10)
)
select * from y;
id
----
(0 rows)
-- WITH RECURSIVE non-recursive ref used within an EXISTS subquery in a recursive CTE
with recursive
r as (
select * from recursive_table_2
),
y(i) as (
select 1
union all
select i + 1 from y, recursive_table_1 where i = recursive_table_1.id and EXISTS (select * from r)
)
select * from y limit 10;
i
---
1
2
3
(3 rows)
-- WITH RECURSIVE non-recursive ref used within a NOT EXISTS subquery in a recursive CTE
with recursive
r as (
select * from recursive_table_2
),
y(i) as (
select 1
union all
select i + 1 from y, recursive_table_1 where i = recursive_table_1.id and NOT EXISTS (select * from r)
)
select * from y limit 10;
i
---
1
(1 row)
-- WITH ref used within an IN subquery in another CTE
with
r as (
select * from recursive_table_2 where id < 21
),
y as (
select * from recursive_table_1 where id IN (select * from r)
)
select * from y;
id
----
(0 rows)
-- WITH ref used within a NOT IN subquery in another CTE
with
r as (
select * from recursive_table_2 where id < 21
),
y as (
select * from recursive_table_1 where id NOT IN (select * from r)
)
select * from y;
id
-----
1
2
100
(3 rows)
-- WITH ref used within an EXISTS subquery in another CTE
with
r as (
select * from recursive_table_2 where id < 21
),
y as (
select * from recursive_table_1 where EXISTS (select * from r)
)
select * from y;
id
-----
1
2
100
(3 rows)
-- WITH ref used within a NOT EXISTS subquery in another CTE
with
r as (
select * from recursive_table_2 where id < 21
),
y as (
select * from recursive_table_1 where NOT EXISTS (select * from r)
)
select * from y;
id
----
(0 rows)
-- WITH RECURSIVE ref used within a IN subquery in another recursive CTE
with recursive
r(i) as (
select 1
union all
select r.i + 1 from r, recursive_table_2 where i = recursive_table_2.id
),
y(i) as (
select 1
union all
select i + 1 from y, recursive_table_1 where i = recursive_table_1.id and i IN (select * from r limit 10)
)
select * from y limit 10;
i
---
1
2
(2 rows)
-- WITH RECURSIVE ref used within a NOT IN subquery in another recursive CTE
with recursive
r(i) as (
select 1
union all
select r.i + 1 from r, recursive_table_2 where i = recursive_table_2.id
),
y(i) as (
select 1
union all
select i + 1 from y, recursive_table_1 where i = recursive_table_1.id and i NOT IN (select * from r limit 10)
)
select * from y limit 10;
i
---
1
(1 row)
-- WITH RECURSIVE non-recursive ref used within an IN subquery in a recursive CTE
with recursive
r as (
select * from recursive_table_2
),
y(i) as (
select 1
union all
select i + 1 from y, recursive_table_1 where i = recursive_table_1.id and i IN (select * from r)
)
select * from y limit 10;
i
---
1
(1 row)
-- WITH RECURSIVE non-recursive ref used within a NOT IN subquery in a recursive CTE
with recursive
r as (
select * from recursive_table_2
),
y(i) as (
select 1
union all
select i + 1 from y, recursive_table_1 where i = recursive_table_1.id and i NOT IN (select * from r)
)
select * from y limit 10;
i
---
1
2
3
(3 rows)
create table recursive_table_3(id int, a int);
insert into recursive_table_3 values (1, 2), (2, 3);
-- WITH RECURSIVE ref used within a window function
with recursive r(i, j) as (
select * from recursive_table_3
union all
select i + 1, j from r, recursive_table_3 where r.i < recursive_table_3.id
)
select avg(i) over(partition by j) from r limit 100;
avg
--------------------
2.0000000000000000
1.5000000000000000
1.5000000000000000
(3 rows)
-- WITH RECURSIVE ref used within a UDF
create function sum_to_zero(integer) returns bigint as $$
with recursive r(i) as (
select $1
union all
select i - 1 from r where i > 0
)
select sum(i) from r;
$$ language sql;
select sum_to_zero(10);
sum_to_zero
-------------
55
(1 row)
-- WITH RECURSIVE ref used within a UDF against a distributed table
create table people(name text, parent_of text);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'name' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into people values ('a', 'b'), ('b', 'c'), ('c', 'd'), ('d', 'e');
create function get_lineage(text) returns setof text as $$
with recursive r(person) as (
select name from people where name = $1
union all
select name from r, people where people.parent_of = r.person
)
select * from r;
$$ language sql;
select get_lineage('d');
get_lineage
-------------
d
c
b
a
(4 rows)
-- non-recursive CTE nested in non-recursive enclosing CTE
INSERT INTO recursive_table_1 SELECT i FROM generate_series(0, 100) i;
SELECT MAX(j)
FROM
(
WITH nr1(i) AS (SELECT id FROM recursive_table_1 WHERE id >= 10)
SELECT * FROM
(
WITH nr2(j) AS (SELECT i FROM nr1 WHERE i >= 50)
SELECT nr2.j FROM nr2, nr1
) sub2
) sub1;
max
-----
100
(1 row)
-- non-recursive CTE nested in recursive enclosing CTE
WITH RECURSIVE r1(i) AS
(
SELECT 1
UNION ALL
(
WITH r2(j) AS
(
SELECT id FROM recursive_table_1 WHERE id < 5
)
SELECT SUM(j) FROM r2
)
)
SELECT * FROM r1;
i
----
1
13
(2 rows)
-- recursive CTE nested in recursive enclosing CTE
WITH RECURSIVE r1(i) AS
(
SELECT 1
UNION ALL
(
WITH RECURSIVE r2(j) AS
(
SELECT 1
UNION ALL
SELECT j + 1 FROM r2 WHERE j < 5
)
SELECT i + 1 FROM r1, r2 WHERE i < 5
)
)
SELECT SUM(i) FROM r1;
sum
------
3711
(1 row)
-- recursive CTE nested in non-recursive enclosing CTE
WITH nr(i) AS
(
WITH RECURSIVE r(j) AS
(
SELECT 1
UNION ALL
SELECT j + 1 FROM r WHERE j < 5
)
SELECT SUM(j) FROM r
)
SELECT SUM(i) FROM nr;
sum
-----
15
(1 row)
-- WITH RECURSIVE ref within a correlated subquery
create table recursive_table_4(a int, b int);
create table recursive_table_5(c int, d int);
insert into recursive_table_4 select i, i* 2 from generate_series(1, 10) i;
insert into recursive_table_5 select i/2, i from generate_series(1, 10) i;
select * from recursive_table_4 where a > ALL (
with recursive r(i) as (
select sum(c) from recursive_table_5 where d < recursive_table_4.b
union all
select i / 2 from r where i > 0
)
select * from r
);
a | b
---+---
1 | 2
(1 row)
with recursive x(i) as (
select 1
),
y(i) as (
select sum(i) from x
union all
select i + 1 from y
),
z(i) as (
select avg(i) from x
union all
select i + 1 from z
)
(select * from y limit 5)
union
(select * from z limit 10);
i
-------------------------
1
2
3
4
5
6.00000000000000000000
7.00000000000000000000
8.00000000000000000000
9.00000000000000000000
10.00000000000000000000
(10 rows)
-- WTIH RECURSIVE and replicated table
create table t_rep_test_rcte(c int) distributed replicated;
create table t_rand_test_rcte(c int) distributed by (c);
insert into t_rep_test_rcte values (1);
insert into t_rand_test_rcte values (1), (2), (3);
analyze t_rep_test_rcte;
analyze t_rand_test_rcte;
explain
with recursive the_cte_here(n) as (
select * from t_rep_test_rcte
union all
select n+1 from the_cte_here join t_rand_test_rcte
on t_rand_test_rcte.c = the_cte_here.n)
select * from the_cte_here;
QUERY PLAN
----------------------------------------------------------------------------------------------
Recursive Union (cost=1.02..15.27 rows=34 width=4)
-> Gather Motion 1:1 (slice1; segments: 1) (cost=1.02..1.02 rows=1 width=4)
-> Seq Scan on t_rep_test_rcte (cost=0.00..1.01 rows=1 width=4)
-> Hash Join (cost=1.09..1.36 rows=3 width=4)
Hash Cond: (the_cte_here.n = t_rand_test_rcte.c)
-> WorkTable Scan on the_cte_here (cost=0.00..0.20 rows=10 width=4)
-> Hash (cost=1.05..1.05 rows=3 width=4)
-> Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..1.05 rows=3 width=4)
-> Seq Scan on t_rand_test_rcte (cost=0.00..1.01 rows=1 width=4)
Optimizer: Postgres query optimizer
(10 rows)
with recursive the_cte_here(n) as (
select * from t_rep_test_rcte
union all
select n+1 from the_cte_here join t_rand_test_rcte
on t_rand_test_rcte.c = the_cte_here.n)
select * from the_cte_here;
n
---
1
2
3
4
(4 rows)
-- WITH RECURSIVE non-recursive shouldn't have hash locus
create table recursive_locus_t1(id int, pid int, name text) distributed by(id);
insert into recursive_locus_t1 values(0, -1, 'AAA');
insert into recursive_locus_t1 values(1, 0, 'B1');
insert into recursive_locus_t1 values(2, 0, 'B2');
insert into recursive_locus_t1 values(3, 1, 'C1_1');
insert into recursive_locus_t1 values(4, 1, 'C1_2');
insert into recursive_locus_t1 values(5, 1, 'C1_3');
insert into recursive_locus_t1 values(6, 2, 'C2_1');
insert into recursive_locus_t1 values(7, 2, 'C2_2');
insert into recursive_locus_t1 values(8, 2, 'C2_3');
explain (costs off) with RECURSIVE cte as (
select a.id, a.name from recursive_locus_t1 a where id=0
union all
select k.id, (c.name || '>' || k.name) as name from recursive_locus_t1 k inner join cte c on c.id = k.pid
)
select id,name from cte;
QUERY PLAN
---------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Recursive Union
-> Seq Scan on recursive_locus_t1 a
Filter: (id = 0)
-> Hash Join
Hash Cond: (c.id = k.pid)
-> WorkTable Scan on cte c
-> Hash
-> Broadcast Motion 3:3 (slice2; segments: 3)
-> Seq Scan on recursive_locus_t1 k
Optimizer: Postgres query optimizer
(11 rows)
with RECURSIVE cte as (
select a.id, a.name from recursive_locus_t1 a where id=0
union all
select k.id, (c.name || '>' || k.name) as name from recursive_locus_t1 k inner join cte c on c.id = k.pid
)
select id,name from cte;
id | name
----+-------------
0 | AAA
1 | AAA>B1
2 | AAA>B2
4 | AAA>B1>C1_2
3 | AAA>B1>C1_1
5 | AAA>B1>C1_3
8 | AAA>B2>C2_3
7 | AAA>B2>C2_2
6 | AAA>B2>C2_1
(9 rows)
create table recursive_locus_t2(id int, pid int, name text) distributed replicated;
insert into recursive_locus_t2 values(0, -1, 'AAA');
insert into recursive_locus_t2 values(1, 0, 'B1');
insert into recursive_locus_t2 values(2, 0, 'B2');
insert into recursive_locus_t2 values(3, 1, 'C1_1');
insert into recursive_locus_t2 values(4, 1, 'C1_2');
insert into recursive_locus_t2 values(5, 1, 'C1_3');
insert into recursive_locus_t2 values(6, 2, 'C2_1');
insert into recursive_locus_t2 values(7, 2, 'C2_2');
insert into recursive_locus_t2 values(8, 2, 'C2_3');
explain (costs off) with RECURSIVE cte as (
select a.id, a.name from recursive_locus_t2 a where id=0
union all
select k.id, (c.name || '>' || k.name) as name from recursive_locus_t2 k inner join cte c on c.id = k.pid
)
select id,name from cte;
QUERY PLAN
------------------------------------------------------------
Recursive Union
-> Gather Motion 1:1 (slice1; segments: 1)
-> Seq Scan on recursive_locus_t2 a
Filter: (id = 0)
-> Hash Join
Hash Cond: (c.id = k.pid)
-> WorkTable Scan on cte c
-> Hash
-> Gather Motion 1:1 (slice2; segments: 1)
-> Seq Scan on recursive_locus_t2 k
Optimizer: Postgres query optimizer
(11 rows)
with RECURSIVE cte as (
select a.id, a.name from recursive_locus_t2 a where id=0
union all
select k.id, (c.name || '>' || k.name) as name from recursive_locus_t2 k inner join cte c on c.id = k.pid
)
select id,name from cte;
id | name
----+-------------
0 | AAA
2 | AAA>B2
1 | AAA>B1
8 | AAA>B2>C2_3
7 | AAA>B2>C2_2
6 | AAA>B2>C2_1
5 | AAA>B1>C1_3
4 | AAA>B1>C1_2
3 | AAA>B1>C1_1
(9 rows)
create table recursive_locus_t3(id int, pid int, name text) distributed randomly;
insert into recursive_locus_t3 values(0, -1, 'AAA');
insert into recursive_locus_t3 values(1, 0, 'B1');
insert into recursive_locus_t3 values(2, 0, 'B2');
insert into recursive_locus_t3 values(3, 1, 'C1_1');
insert into recursive_locus_t3 values(4, 1, 'C1_2');
insert into recursive_locus_t3 values(5, 1, 'C1_3');
insert into recursive_locus_t3 values(6, 2, 'C2_1');
insert into recursive_locus_t3 values(7, 2, 'C2_2');
insert into recursive_locus_t3 values(8, 2, 'C2_3');
explain (costs off) with RECURSIVE cte as (
select a.id, a.name from recursive_locus_t3 a where id=0
union all
select k.id, (c.name || '>' || k.name) as name from recursive_locus_t3 k inner join cte c on c.id = k.pid
)
select id,name from cte;
QUERY PLAN
---------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Recursive Union
-> Seq Scan on recursive_locus_t3 a
Filter: (id = 0)
-> Hash Join
Hash Cond: (c.id = k.pid)
-> WorkTable Scan on cte c
-> Hash
-> Broadcast Motion 3:3 (slice2; segments: 3)
-> Seq Scan on recursive_locus_t3 k
Optimizer: Postgres query optimizer
(11 rows)
with RECURSIVE cte as (
select a.id, a.name from recursive_locus_t3 a where id=0
union all
select k.id, (c.name || '>' || k.name) as name from recursive_locus_t3 k inner join cte c on c.id = k.pid
)
select id,name from cte;
id | name
----+-------------
0 | AAA
2 | AAA>B2
1 | AAA>B1
7 | AAA>B2>C2_2
8 | AAA>B2>C2_3
6 | AAA>B2>C2_1
3 | AAA>B1>C1_1
4 | AAA>B1>C1_2
5 | AAA>B1>C1_3
(9 rows)
-- WTIH RECURSIVE and subquery
with recursive cte (n) as
(
select 1
union all
select * from
(
with x(n) as (select n from cte)
select n + 1 from x where n < 10
) q
)
select * from cte;
n
----
1
2
3
4
5
6
7
8
9
10
(10 rows)
-- Test recursive CTE when the non-recursive term is a table scan with a
-- predicate on the distribution key, and the recursive term joins the CTE with
-- the same table on its non-distribution key
create table recursive_table_6(a numeric(4), b int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into recursive_table_6 values (0::numeric, 3);
insert into recursive_table_6 values (2::numeric, 0);
insert into recursive_table_6 values (5::numeric, 0);
analyze recursive_table_6;
SELECT $query$
WITH RECURSIVE cte (i, j) AS (
SELECT a, b FROM recursive_table_6 WHERE a = 0::numeric::numeric
UNION ALL
SELECT a, b FROM recursive_table_6, cte WHERE cte.i = recursive_table_6.b
)
SELECT i, j FROM cte;
$query$ AS qry \gset
EXPLAIN (COSTS OFF)
:qry ;
QUERY PLAN
---------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Recursive Union
-> Seq Scan on recursive_table_6
Filter: (a = '0'::numeric)
-> Hash Join
Hash Cond: (cte.i = (recursive_table_6_1.b)::numeric)
-> WorkTable Scan on cte
-> Hash
-> Broadcast Motion 3:3 (slice2; segments: 3)
-> Seq Scan on recursive_table_6 recursive_table_6_1
Optimizer: Postgres query optimizer
(11 rows)
:qry ;
i | j
---+---
0 | 3
2 | 0
5 | 0
(3 rows)
-- Test recursive CTE doesnt create a plan with motion on top of worktablescan
CREATE TABLE t1 (a int, b int) DISTRIBUTED BY (a);
SET enable_nestloop = off;
SET enable_hashjoin = off;
SET enable_mergejoin = on;
explain (costs off) with recursive rcte as
(
( select a, b, 1::integer recursion_level from t1 order by 1 )
union all
select parent_table.a, parent_table.b, rcte.recursion_level + 1
from
( select a, b from t1 order by 1 ) parent_table
join rcte on rcte.b = parent_table.a
)
select count(*) from rcte;
QUERY PLAN
---------------------------------------------------------------------------------
Finalize Aggregate
-> Gather Motion 3:1 (slice1; segments: 3)
-> Partial Aggregate
-> Recursive Union
-> Sort
Sort Key: t1.a
-> Seq Scan on t1
-> Nested Loop
Join Filter: (t1_1.a = rcte.b)
-> WorkTable Scan on rcte
-> Materialize
-> Broadcast Motion 3:3 (slice2; segments: 3)
-> Sort
Sort Key: t1_1.a
-> Seq Scan on t1 t1_1
Optimizer: Postgres query optimizer
(16 rows)
RESET enable_nestloop;
RESET enable_hashjoin;
RESET enable_mergejoin;
-- using union rather than union all for recursive union
CREATE TABLE tmp(a int, b int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
INSERT INTO tmp SELECT generate_series(1,5);
INSERT INTO tmp SELECT * FROM tmp;
EXPLAIN (costs off)
WITH RECURSIVE x(a) as
(
select a from tmp
union
select a+1 from x where a<10
)
select * from x ;
QUERY PLAN
------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> HashAggregate
Group Key: tmp.a
-> Redistribute Motion 3:3 (slice2; segments: 3)
Hash Key: tmp.a
-> Recursive Union
-> Seq Scan on tmp
-> WorkTable Scan on x
Filter: (a < 10)
Optimizer: Postgres query optimizer
(10 rows)
WITH RECURSIVE x(a) as
(
select a from tmp
union
select a+1 from x where a<10
)
select * from x ;
a
----
8
2
4
3
7
1
6
5
9
10
(10 rows)