blob: 72830e65e0856bff93704b1777636c652871bc7e [file] [log] [blame]
set optimizer_enable_master_only_queries = on;
set optimizer_segments = 3;
set optimizer_nestloop_factor = 1.0;
--
-- Base tables for CSQ tests
--
drop table if exists csq_t1_base;
NOTICE: table "csq_t1_base" does not exist, skipping
create table csq_t1_base(x int, y int) distributed by (x);
insert into csq_t1_base values(1,2);
insert into csq_t1_base values(2,1);
insert into csq_t1_base values(4,2);
drop table if exists csq_t2_base;
NOTICE: table "csq_t2_base" does not exist, skipping
create table csq_t2_base(x int, y int) distributed by (x);
insert into csq_t2_base values(3,2);
insert into csq_t2_base values(3,2);
insert into csq_t2_base values(3,2);
insert into csq_t2_base values(3,2);
insert into csq_t2_base values(3,1);
--
-- Correlated subqueries
--
drop table if exists csq_t1;
NOTICE: table "csq_t1" does not exist, skipping
drop table if exists csq_t2;
NOTICE: table "csq_t2" does not exist, skipping
create table csq_t1(x int, y int) distributed by (x);
create table csq_t2(x int, y int) distributed by (x);
insert into csq_t1 select * from csq_t1_base;
insert into csq_t2 select * from csq_t2_base;
select * from csq_t1 where csq_t1.x >ALL (select csq_t2.x from csq_t2 where csq_t2.y=csq_t1.y) order by 1; -- expected (4,2)
x | y
---+---
4 | 2
(1 row)
--
-- correlations in the targetlist
--
select csq_t1.x, (select sum(bar.x) from csq_t1 bar where bar.x >= csq_t1.x) as sum from csq_t1 order by csq_t1.x;
x | sum
---+-----
1 | 7
2 | 6
4 | 4
(3 rows)
select csq_t1.x, (select sum(bar.x) from csq_t1 bar where bar.x = csq_t1.x) as sum from csq_t1 order by csq_t1.x;
x | sum
---+-----
1 | 1
2 | 2
4 | 4
(3 rows)
select csq_t1.x, (select bar.x from csq_t1 bar where bar.x = csq_t1.x) as sum from csq_t1 order by csq_t1.x;
x | sum
---+-----
1 | 1
2 | 2
4 | 4
(3 rows)
--
-- Another case correlations in the targetlist: PlaceHolderVar
--
drop table if exists phv_t;
NOTICE: table "phv_t" does not exist, skipping
create table phv_t(a int, b int) distributed by (a);
insert into phv_t values(1,1),(2,2);
explain(costs off) select *, (select ss.y as z from phv_t as t3 limit 1) from phv_t t1 left join
(select a as x, 42 as y from phv_t t2) ss on t1.b = ss.x order by 1,2;
QUERY PLAN
-----------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
Merge Key: t1.a, t1.b
-> Result
-> Sort
Sort Key: t1.a, t1.b
-> Hash Left Join
Hash Cond: (t1.b = t2.a)
-> Redistribute Motion 3:3 (slice2; segments: 3)
Hash Key: t1.b
-> Seq Scan on phv_t t1
-> Hash
-> Seq Scan on phv_t t2
SubPlan 1
-> Limit
-> Result
-> Materialize
-> Broadcast Motion 3:3 (slice3; segments: 3)
-> Seq Scan on phv_t t3
Optimizer: GPORCA
(19 rows)
select *, (select ss.y as z from phv_t as t3 limit 1) from phv_t t1 left join
(select a as x, 42 as y from phv_t t2) ss on t1.b = ss.x order by 1,2;
a | b | x | y | z
---+---+---+----+----
1 | 1 | 1 | 42 | 42
2 | 2 | 2 | 42 | 42
(2 rows)
--
-- CSQs with partitioned tables
--
drop table if exists csq_t1;
drop table if exists csq_t2;
create table csq_t1(x int, y int)
distributed by (x)
partition by range (y) ( start (0) end (4) every (1))
;
create table csq_t2(x int, y int)
distributed by (x)
partition by range (y) ( start (0) end (4) every (1))
;
insert into csq_t1 select * from csq_t1_base;
insert into csq_t2 select * from csq_t2_base;
explain select * from csq_t1 where csq_t1.x >ALL (select csq_t2.x from csq_t2 where csq_t2.y=csq_t1.y) order by 1;
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Result (cost=0.00..1293.00 rows=1 width=8)
Filter: (SubPlan 1)
-> Sort (cost=0.00..431.00 rows=1 width=8)
Sort Key: csq_t1.x
-> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=8)
-> Dynamic Seq Scan on csq_t1 (cost=0.00..431.00 rows=1 width=8)
Number of partitions to scan: 4
SubPlan 1
-> Result (cost=0.00..431.00 rows=1 width=1)
Filter: ((CASE WHEN (sum((CASE WHEN (csq_t1.x <= csq_t2.x) THEN 1 ELSE 0 END)) IS NULL) THEN true WHEN (sum((CASE WHEN (csq_t2.x IS NULL) THEN 1 ELSE 0 END)) > '0'::bigint) THEN NULL::boolean WHEN (csq_t1.x IS NULL) THEN NULL::boolean WHEN (sum((CASE WHEN (csq_t1.x <= csq_t2.x) THEN 1 ELSE 0 END)) = '0'::bigint) THEN true ELSE false END) = true)
-> Aggregate (cost=0.00..431.00 rows=1 width=16)
-> Result (cost=0.00..431.00 rows=1 width=4)
Filter: (csq_t2.y = csq_t1.y)
-> Materialize (cost=0.00..431.00 rows=1 width=8)
-> Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=8)
-> Dynamic Seq Scan on csq_t2 (cost=0.00..431.00 rows=1 width=8)
Number of partitions to scan: 4
Optimizer: Pivotal Optimizer (GPORCA)
(18 rows)
select * from csq_t1 where csq_t1.x >ALL (select csq_t2.x from csq_t2 where csq_t2.y=csq_t1.y) order by 1; -- expected (4,2)
x | y
---+---
4 | 2
(1 row)
drop table if exists csq_t1;
drop table if exists csq_t2;
drop table if exists csq_t1_base;
drop table if exists csq_t2_base;
--
-- Multi-row subqueries
--
drop table if exists mrs_t1;
NOTICE: table "mrs_t1" does not exist, skipping
create table mrs_t1(x int) distributed by (x);
insert into mrs_t1 select generate_series(1,20);
analyze mrs_t1;
explain select * from mrs_t1 where exists (select x from mrs_t1 where x < -1);
QUERY PLAN
------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1293.00 rows=20 width=4)
-> Nested Loop Semi Join (cost=0.00..1293.00 rows=7 width=4)
Join Filter: true
-> Seq Scan on mrs_t1 mrs_t1_1 (cost=0.00..431.00 rows=7 width=4)
-> Materialize (cost=0.00..431.00 rows=1 width=1)
-> Broadcast Motion 1:3 (slice2) (cost=0.00..431.00 rows=3 width=1)
-> Limit (cost=0.00..431.00 rows=1 width=1)
-> Gather Motion 3:1 (slice3; segments: 3) (cost=0.00..431.00 rows=1 width=1)
-> Seq Scan on mrs_t1 (cost=0.00..431.00 rows=1 width=1)
Filter: x < (-1)
Optimizer: Pivotal Optimizer (GPORCA) version 2.74.0
(11 rows)
select * from mrs_t1 where exists (select x from mrs_t1 where x < -1) order by 1;
x
---
(0 rows)
explain select * from mrs_t1 where exists (select x from mrs_t1 where x = 1);
QUERY PLAN
------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1293.00 rows=20 width=4)
-> Nested Loop Semi Join (cost=0.00..1293.00 rows=7 width=4)
Join Filter: true
-> Seq Scan on mrs_t1 mrs_t1_1 (cost=0.00..431.00 rows=7 width=4)
-> Materialize (cost=0.00..431.00 rows=1 width=1)
-> Broadcast Motion 1:3 (slice2) (cost=0.00..431.00 rows=3 width=1)
-> Limit (cost=0.00..431.00 rows=1 width=1)
-> Gather Motion 3:1 (slice3; segments: 3) (cost=0.00..431.00 rows=2 width=1)
-> Seq Scan on mrs_t1 (cost=0.00..431.00 rows=1 width=1)
Filter: x = 1
Optimizer: Pivotal Optimizer (GPORCA) version 2.74.0
(11 rows)
select * from mrs_t1 where exists (select x from mrs_t1 where x = 1) order by 1;
x
----
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
(20 rows)
explain select * from mrs_t1 where x in (select x-95 from mrs_t1) or x < 5;
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1293.02 rows=20 width=4)
-> Result (cost=0.00..1293.02 rows=7 width=4)
Filter: (CASE WHEN ((count((true))) > '0'::bigint) THEN CASE WHEN ((sum((CASE WHEN ((mrs_t1_1.x = ((mrs_t1.x - 95))) IS NULL) THEN 1 ELSE 0 END))) = (count((true)))) THEN NULL::boolean ELSE true END ELSE false END OR (mrs_t1_1.x < 5))
-> GroupAggregate (cost=0.00..1293.02 rows=7 width=20)
Group Key: mrs_t1_1.x, mrs_t1_1.ctid, mrs_t1_1.gp_segment_id
-> Nested Loop Left Join (cost=0.00..1293.02 rows=56 width=19)
Join Filter: ((mrs_t1_1.x = ((mrs_t1.x - 95))) IS NOT FALSE)
-> Sort (cost=0.00..431.00 rows=7 width=14)
Sort Key: mrs_t1_1.x, mrs_t1_1.ctid, mrs_t1_1.gp_segment_id
-> Seq Scan on mrs_t1 mrs_t1_1 (cost=0.00..431.00 rows=7 width=14)
-> Materialize (cost=0.00..431.00 rows=20 width=5)
-> Result (cost=0.00..431.00 rows=20 width=5)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=20 width=4)
-> Seq Scan on mrs_t1 (cost=0.00..431.00 rows=7 width=4)
Optimizer: Pivotal Optimizer (GPORCA)
(15 rows)
select * from mrs_t1 where x in (select x-95 from mrs_t1) or x < 5 order by 1;
x
---
1
2
3
4
(4 rows)
drop table if exists mrs_t1;
--
-- Multi-row subquery from MSTR
--
drop table if exists mrs_u1;
NOTICE: table "mrs_u1" does not exist, skipping
drop table if exists mrs_u2;
NOTICE: table "mrs_u2" does not exist, skipping
create TABLE mrs_u1 (a int, b int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create TABLE mrs_u2 (a int, b int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into mrs_u1 values (1,2),(11,22);
insert into mrs_u2 values (1,2),(11,22),(33,44);
select * from mrs_u1 join mrs_u2 on mrs_u1.a=mrs_u2.a where mrs_u1.a in (1,11) or mrs_u2.a in (select a from mrs_u1 where a=1) order by 1;
a | b | a | b
----+----+----+----
1 | 2 | 1 | 2
11 | 22 | 11 | 22
(2 rows)
drop table if exists mrs_u1;
drop table if exists mrs_u2;
--
-- Set right motion type to subquery
--
drop table if exists gs_tab;
NOTICE: table "gs_tab" does not exist, skipping
create table gs_tab(a int, b int, c int) distributed by (a);
insert into gs_tab values (1,1,1),(2,2,2);
explain(costs off)
select a from gs_tab t1 where b in
(select b from gs_tab t2 where c in
(select c from gs_tab t3)
or (c >= 2))
or (b <= 3)
order by a;
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
Merge Key: t1.a
-> Sort
Sort Key: t1.a
-> Result
Filter: (CASE WHEN ((count((true))) > '0'::bigint) THEN CASE WHEN ((sum((CASE WHEN ((t1.b = t2.b) IS NULL) THEN 1 ELSE 0 END))) = (count((true)))) THEN NULL::boolean ELSE true END ELSE false END OR (t1.b <= 3))
-> GroupAggregate
Group Key: t1.a, t1.b, t1.ctid, t1.gp_segment_id
-> Nested Loop Left Join
Join Filter: ((t1.b = t2.b) IS NOT FALSE)
-> Sort
Sort Key: t1.a, t1.b, t1.ctid, t1.gp_segment_id
-> Seq Scan on gs_tab t1
-> Materialize
-> Result
-> Broadcast Motion 3:3 (slice2; segments: 3)
-> Result
Filter: (CASE WHEN ((count((true))) > '0'::bigint) THEN CASE WHEN ((sum((CASE WHEN ((t2.c = t3.c) IS NULL) THEN 1 ELSE 0 END))) = (count((true)))) THEN NULL::boolean ELSE true END ELSE false END OR (t2.c >= 2))
-> GroupAggregate
Group Key: t2.a, t2.b, t2.c, t2.ctid, t2.gp_segment_id
-> Nested Loop Left Join
Join Filter: ((t2.c = t3.c) IS NOT FALSE)
-> Sort
Sort Key: t2.a, t2.b, t2.c, t2.ctid, t2.gp_segment_id
-> Seq Scan on gs_tab t2
-> Materialize
-> Result
-> Broadcast Motion 3:3 (slice3; segments: 3)
-> Seq Scan on gs_tab t3
Optimizer: GPORCA
(30 rows)
select a from gs_tab t1 where b in
(select b from gs_tab t2 where c in
(select c from gs_tab t3)
or (c >= 2))
or (b <= 3)
order by a;
a
---
1
2
(2 rows)
drop table if exists gs_tab;
--
-- MPP-13758
--
drop table if exists csq_m1;
NOTICE: table "csq_m1" does not exist, skipping
create table csq_m1();
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause, and no column type is suitable for a distribution key. Creating a NULL policy entry.
set allow_system_table_mods=true;
delete from gp_distribution_policy where localoid='csq_m1'::regclass;
reset allow_system_table_mods;
alter table csq_m1 add column x int;
insert into csq_m1 values(1);
analyze csq_m1;
drop table if exists csq_d1;
NOTICE: table "csq_d1" does not exist, skipping
create table csq_d1(x int) distributed by (x);
insert into csq_d1 select * from csq_m1;
analyze csq_d1;
explain select array(select x from csq_m1); -- no initplan
QUERY PLAN
--------------------------------------------------------------
Result (cost=1.01..1.02 rows=1 width=0)
InitPlan 1 (returns $0)
-> Seq Scan on csq_m1 (cost=0.00..1.01 rows=1 width=4)
Settings: optimizer_segments=3
Optimizer status: Postgres query optimizer
(5 rows)
select array(select x from csq_m1); -- {1}
array
-------
{1}
(1 row)
explain select array(select x from csq_d1); -- initplan
QUERY PLAN
------------------------------------------------------------------------------------
Result (cost=1.01..1.02 rows=1 width=0)
InitPlan 1 (returns $0) (slice1)
-> Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..1.01 rows=1 width=4)
-> Seq Scan on csq_d1 (cost=0.00..1.01 rows=1 width=4)
Settings: optimizer_segments=3
Optimizer status: Postgres query optimizer
(6 rows)
select array(select x from csq_d1); -- {1}
array
-------
{1}
(1 row)
--
-- CSQs involving master-only and distributed tables
--
drop table if exists t3cozlib;
NOTICE: table "t3cozlib" does not exist, skipping
create table t3cozlib (c1 int , c2 varchar) with (appendonly=true, compresstype=zlib, orientation=column) distributed by (c1);
drop table if exists pg_attribute_storage;
NOTICE: table "pg_attribute_storage" does not exist, skipping
create table pg_attribute_storage (attrelid int, attnum int, attoptions text[]) distributed by (attrelid);
insert into pg_attribute_storage values ('t3cozlib'::regclass, 1, E'{\'something\'}');
insert into pg_attribute_storage values ('t3cozlib'::regclass, 2, E'{\'something2\'}');
SELECT a.attname
, pg_catalog.format_type(a.atttypid, a.atttypmod)
, ( SELECT substring(pg_catalog.pg_get_expr(d.adbin, d.adrelid) for 128)
FROM pg_catalog.pg_attrdef d
WHERE d.adrelid = a.attrelid AND d.adnum = a.attnum AND a.atthasdef
)
, a.attnotnull
, a.attnum
, a.attstorage
, pg_catalog.col_description(a.attrelid, a.attnum)
, ( SELECT s.attoptions
FROM pg_attribute_storage s
WHERE s.attrelid = a.attrelid AND s.attnum = a.attnum
) newcolumn
FROM pg_catalog.pg_attribute a
WHERE a.attrelid = 't3cozlib'::regclass AND a.attnum > 0 AND NOT a.attisdropped
ORDER BY a.attnum
; -- expect to see 2 rows
attname | format_type | substring | attnotnull | attnum | attstorage | col_description | newcolumn
---------+-------------------+-----------+------------+--------+------------+-----------------+----------------
c1 | integer | | f | 1 | p | | {'something'}
c2 | character varying | | f | 2 | x | | {'something2'}
(2 rows)
--
-- More CSQs involving master-only and distributed relations
--
drop table if exists csq_m1;
create table csq_m1();
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause, and no column type is suitable for a distribution key. Creating a NULL policy entry.
set allow_system_table_mods=true;
delete from gp_distribution_policy where localoid='csq_m1'::regclass;
reset allow_system_table_mods;
alter table csq_m1 add column x int;
insert into csq_m1 values(1),(2),(3);
analyze csq_m1;
drop table if exists csq_d1;
create table csq_d1(x int) distributed by (x);
insert into csq_d1 select * from csq_m1 where x < 3;
insert into csq_d1 values(4);
analyze csq_d1;
select * from csq_m1;
x
---
1
2
3
(3 rows)
select * from csq_d1;
x
---
1
2
4
(3 rows)
--
-- outer plan node is master-only and CSQ has distributed relation
--
explain select * from csq_m1 where x not in (select x from csq_d1) or x < -100; -- gather motion
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Result (cost=0.00..1293.00 rows=3 width=4)
Filter: (CASE WHEN ((count((true))) > '0'::bigint) THEN CASE WHEN ((sum((CASE WHEN ((csq_m1.x = csq_d1.x) IS NULL) THEN 1 ELSE 0 END))) = (count((true)))) THEN NULL::boolean ELSE false END ELSE true END OR (csq_m1.x < '-100'::integer))
-> GroupAggregate (cost=0.00..1293.00 rows=3 width=20)
Group Key: csq_m1.x, csq_m1.ctid, csq_m1.gp_segment_id
-> Nested Loop Left Join (cost=0.00..1293.00 rows=5 width=19)
Join Filter: ((csq_m1.x = csq_d1.x) IS NOT FALSE)
-> Sort (cost=0.00..431.00 rows=3 width=14)
Sort Key: csq_m1.x, csq_m1.ctid, csq_m1.gp_segment_id
-> Seq Scan on csq_m1 (cost=0.00..431.00 rows=3 width=14)
-> Materialize (cost=0.00..431.00 rows=3 width=5)
-> Result (cost=0.00..431.00 rows=3 width=5)
-> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=3 width=4)
-> Seq Scan on csq_d1 (cost=0.00..431.00 rows=1 width=4)
Optimizer: Pivotal Optimizer (GPORCA)
(14 rows)
select * from csq_m1 where x not in (select x from csq_d1) or x < -100; -- (3)
x
---
3
(1 row)
--
-- outer plan node is master-only and CSQ has distributed relation
--
explain select * from csq_d1 where x not in (select x from csq_m1) or x < -100; -- broadcast motion
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1293.00 rows=2 width=4)
-> Result (cost=0.00..1293.00 rows=1 width=4)
Filter: (CASE WHEN ((count((true))) > '0'::bigint) THEN CASE WHEN ((sum((CASE WHEN ((csq_d1.x = csq_m1.x) IS NULL) THEN 1 ELSE 0 END))) = (count((true)))) THEN NULL::boolean ELSE false END ELSE true END OR (csq_d1.x < '-100'::integer))
-> GroupAggregate (cost=0.00..1293.00 rows=1 width=20)
Group Key: csq_d1.x, csq_d1.ctid, csq_d1.gp_segment_id
-> Nested Loop Left Join (cost=0.00..1293.00 rows=2 width=19)
Join Filter: ((csq_d1.x = csq_m1.x) IS NOT FALSE)
-> Sort (cost=0.00..431.00 rows=1 width=14)
Sort Key: csq_d1.x, csq_d1.ctid, csq_d1.gp_segment_id
-> Seq Scan on csq_d1 (cost=0.00..431.00 rows=1 width=14)
-> Materialize (cost=0.00..431.00 rows=3 width=5)
-> Result (cost=0.00..431.00 rows=3 width=5)
-> Broadcast Motion 1:3 (slice2) (cost=0.00..431.00 rows=3 width=4)
-> Seq Scan on csq_m1 (cost=0.00..431.00 rows=3 width=4)
Optimizer: Pivotal Optimizer (GPORCA)
(15 rows)
select * from csq_d1 where x not in (select x from csq_m1) or x < -100; -- (4)
x
---
4
(1 row)
-- drop csq_m1 since we deleted its gp_distribution_policy entry
drop table csq_m1;
--
-- MPP-14441 Don't lose track of initplans
--
drop table if exists csq_t1;
NOTICE: table "csq_t1" does not exist, skipping
CREATE TABLE csq_t1 (a int, b int, c int, d int, e text) DISTRIBUTED BY (a);
INSERT INTO csq_t1 SELECT i, i/3, i%2, 100-i, 'text'||i FROM generate_series(1,100) i;
select count(*) from csq_t1 t1 where a > (SELECT x.b FROM ( select avg(a)::int as b,'haha'::text from csq_t1 t2 where t2.a=t1.d) x ) ;
count
-------
49
(1 row)
select count(*) from csq_t1 t1 where a > ( select avg(a)::int from csq_t1 t2 where t2.a=t1.d) ;
count
-------
49
(1 row)
--
-- correlation in a func expr
--
CREATE OR REPLACE FUNCTION csq_f(a int) RETURNS int AS $$ select $1 $$ LANGUAGE SQL CONTAINS SQL;
DROP TABLE IF EXISTS csq_r;
NOTICE: table "csq_r" does not exist, skipping
CREATE TABLE csq_r(a int) distributed by (a);
INSERT INTO csq_r VALUES (1);
-- subqueries shouldn't be pulled into a join if the from clause has a function call
-- with a correlated argument
-- force_explain
explain SELECT * FROM csq_r WHERE a IN (SELECT * FROM csq_f(csq_r.a));
QUERY PLAN
------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=1 width=4)
-> Seq Scan on csq_r (cost=0.00..862.00 rows=1 width=4)
Filter: (a = (SubPlan 1))
SubPlan 1
-> Result (cost=0.00..0.00 rows=1 width=1)
Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0
(6 rows)
SELECT * FROM csq_r WHERE a IN (SELECT * FROM csq_f(csq_r.a));
a
---
1
(1 row)
-- force_explain
explain SELECT * FROM csq_r WHERE a not IN (SELECT * FROM csq_f(csq_r.a));
QUERY PLAN
------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=1 width=4)
-> Seq Scan on csq_r (cost=0.00..862.00 rows=1 width=4)
Filter: (a <> (SubPlan 1))
SubPlan 1
-> Result (cost=0.00..0.00 rows=1 width=1)
Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0
(6 rows)
SELECT * FROM csq_r WHERE a not IN (SELECT * FROM csq_f(csq_r.a));
a
---
(0 rows)
-- force_explain
explain SELECT * FROM csq_r WHERE exists (SELECT * FROM csq_f(csq_r.a));
QUERY PLAN
------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=1 width=4)
-> Seq Scan on csq_r (cost=0.00..862.00 rows=1 width=4)
Filter: (SubPlan 1)
SubPlan 1
-> Result (cost=0.00..0.00 rows=1 width=1)
Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0
(6 rows)
SELECT * FROM csq_r WHERE exists (SELECT * FROM csq_f(csq_r.a));
a
---
1
(1 row)
-- force_explain
explain SELECT * FROM csq_r WHERE not exists (SELECT * FROM csq_f(csq_r.a));
QUERY PLAN
------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=1 width=4)
-> Seq Scan on csq_r (cost=0.00..862.00 rows=1 width=4)
Filter: (SubPlan 1)
SubPlan 1
-> Result (cost=0.00..0.00 rows=1 width=1)
Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0
(6 rows)
SELECT * FROM csq_r WHERE not exists (SELECT * FROM csq_f(csq_r.a));
a
---
(0 rows)
-- force_explain
explain SELECT * FROM csq_r WHERE a > (SELECT csq_f FROM csq_f(csq_r.a) limit 1);
QUERY PLAN
------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=1 width=4)
-> Seq Scan on csq_r (cost=0.00..862.00 rows=1 width=4)
Filter: (a > (SubPlan 1))
SubPlan 1
-> Limit (cost=0.00..0.00 rows=1 width=4)
-> Result (cost=0.00..0.00 rows=1 width=1)
Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0
(7 rows)
SELECT * FROM csq_r WHERE a > (SELECT csq_f FROM csq_f(csq_r.a) limit 1);
a
---
(0 rows)
-- force_explain
explain SELECT * FROM csq_r WHERE a < ANY (SELECT csq_f FROM csq_f(csq_r.a));
QUERY PLAN
------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=1 width=4)
-> Seq Scan on csq_r (cost=0.00..862.00 rows=1 width=4)
Filter: (a < (SubPlan 1))
SubPlan 1
-> Result (cost=0.00..0.00 rows=1 width=1)
Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0
(6 rows)
SELECT * FROM csq_r WHERE a < ANY (SELECT csq_f FROM csq_f(csq_r.a));
a
---
(0 rows)
-- force_explain
explain SELECT * FROM csq_r WHERE a <= ALL (SELECT csq_f FROM csq_f(csq_r.a));
QUERY PLAN
------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=1 width=4)
-> Seq Scan on csq_r (cost=0.00..862.00 rows=1 width=4)
Filter: (a <= (SubPlan 1))
SubPlan 1
-> Result (cost=0.00..0.00 rows=1 width=1)
Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0
(6 rows)
SELECT * FROM csq_r WHERE a <= ALL (SELECT csq_f FROM csq_f(csq_r.a));
a
---
1
(1 row)
-- force_explain
explain SELECT * FROM csq_r WHERE a IN (SELECT csq_f FROM csq_f(csq_r.a),csq_r);
QUERY PLAN
-----------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1724.00 rows=1 width=4)
-> Seq Scan on csq_r (cost=0.00..1724.00 rows=1 width=4)
Filter: (SubPlan 1)
SubPlan 1
-> Nested Loop (cost=0.00..862.00 rows=1 width=4)
Join Filter: true
-> Result (cost=0.00..0.00 rows=1 width=1)
-> Materialize (cost=0.00..431.00 rows=1 width=1)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=1)
-> Seq Scan on csq_r csq_r_1 (cost=0.00..431.00 rows=1 width=1)
Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0
(11 rows)
SELECT * FROM csq_r WHERE a IN (SELECT csq_f FROM csq_f(csq_r.a),csq_r);
a
---
1
(1 row)
--
-- Test pullup of expr CSQs to joins
--
--
-- Test data
--
drop table if exists csq_pullup;
NOTICE: table "csq_pullup" does not exist, skipping
create table csq_pullup(t text, n numeric, i int, v varchar(10)) distributed by (t);
insert into csq_pullup values ('abc',1, 2, 'xyz');
analyze csq_pullup;
insert into csq_pullup values ('xyz',2, 3, 'def');
insert into csq_pullup values ('def',3, 1, 'abc');
--
-- Expr CSQs to joins
--
--
-- text, text
--
explain select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t0.t=t1.t);
QUERY PLAN
-------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=1 width=17)
-> Result (cost=0.00..862.00 rows=1 width=17)
Filter: (1 = COALESCE((count(*)), '0'::bigint))
-> Hash Left Join (cost=0.00..862.00 rows=1 width=25)
Hash Cond: (t0.t = t1.t)
-> Seq Scan on csq_pullup t0 (cost=0.00..431.00 rows=1 width=17)
-> Hash (cost=431.00..431.00 rows=1 width=12)
-> GroupAggregate (cost=0.00..431.00 rows=1 width=12)
Group Key: t1.t
-> Sort (cost=0.00..431.00 rows=1 width=4)
Sort Key: t1.t
-> Seq Scan on csq_pullup t1 (cost=0.00..431.00 rows=1 width=4)
Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0
(13 rows)
select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t0.t=t1.t);
t | n | i | v
-----+---+---+-----
xyz | 2 | 3 | def
def | 3 | 1 | abc
abc | 1 | 2 | xyz
(3 rows)
--
-- text, varchar
--
explain select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t0.t=t1.v);
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=1 width=17)
-> Result (cost=0.00..862.00 rows=1 width=17)
Filter: (1 = COALESCE((count(*)), '0'::bigint))
-> Hash Left Join (cost=0.00..862.00 rows=1 width=25)
Hash Cond: (t0.t = (t1.v)::text)
-> Seq Scan on csq_pullup t0 (cost=0.00..431.00 rows=1 width=17)
-> Hash (cost=431.00..431.00 rows=1 width=12)
-> GroupAggregate (cost=0.00..431.00 rows=1 width=12)
Group Key: t1.v
-> Sort (cost=0.00..431.00 rows=1 width=4)
Sort Key: t1.v
-> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=4)
Hash Key: t1.v
-> Seq Scan on csq_pullup t1 (cost=0.00..431.00 rows=1 width=4)
Optimizer: Pivotal Optimizer (GPORCA)
(15 rows)
select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t0.t=t1.v);
t | n | i | v
-----+---+---+-----
xyz | 2 | 3 | def
def | 3 | 1 | abc
abc | 1 | 2 | xyz
(3 rows)
--
-- numeric, numeric
--
explain select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t0.n=t1.n);
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------
Result (cost=0.00..862.00 rows=1 width=17)
Filter: (1 = COALESCE((count(*)), '0'::bigint))
-> Hash Left Join (cost=0.00..862.00 rows=2 width=25)
Hash Cond: (t0.n = t1.n)
-> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=17)
-> Seq Scan on csq_pullup t0 (cost=0.00..431.00 rows=1 width=17)
-> Hash (cost=431.00..431.00 rows=1 width=13)
-> Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=13)
-> GroupAggregate (cost=0.00..431.00 rows=1 width=13)
Group Key: t1.n
-> Sort (cost=0.00..431.00 rows=1 width=5)
Sort Key: t1.n
-> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..431.00 rows=1 width=5)
Hash Key: t1.n
-> Seq Scan on csq_pullup t1 (cost=0.00..431.00 rows=1 width=5)
Optimizer: Pivotal Optimizer (GPORCA)
(16 rows)
select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t0.n=t1.n);
t | n | i | v
-----+---+---+-----
xyz | 2 | 3 | def
def | 3 | 1 | abc
abc | 1 | 2 | xyz
(3 rows)
--
-- function(numeric), function(numeric)
--
explain select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t0.n + 1=t1.n + 1);
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..5172.20 rows=1 width=17)
-> Result (cost=0.00..5172.20 rows=1 width=17)
Filter: (1 = (SubPlan 1))
-> Seq Scan on csq_pullup t0 (cost=0.00..5172.19 rows=334 width=36)
SubPlan 1
-> Aggregate (cost=0.00..431.00 rows=1 width=8)
-> Result (cost=0.00..431.00 rows=1 width=1)
Filter: ((t0.n + '1'::numeric) = (t1.n + '1'::numeric))
-> Materialize (cost=0.00..431.00 rows=1 width=5)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=5)
-> Seq Scan on csq_pullup t1 (cost=0.00..431.00 rows=1 width=5)
Optimizer: Pivotal Optimizer (GPORCA) version 2.74.0
(12 rows)
select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t0.n + 1=t1.n + 1);
t | n | i | v
-----+---+---+-----
abc | 1 | 2 | xyz
xyz | 2 | 3 | def
def | 3 | 1 | abc
(3 rows)
--
-- function(numeric), function(int)
--
explain select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t0.n + 1=t1.i + 1);
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..5172.20 rows=1 width=17)
-> Result (cost=0.00..5172.20 rows=1 width=17)
Filter: (1 = (SubPlan 1))
-> Seq Scan on csq_pullup t0 (cost=0.00..5172.19 rows=334 width=36)
SubPlan 1
-> Aggregate (cost=0.00..431.00 rows=1 width=8)
-> Result (cost=0.00..431.00 rows=1 width=1)
Filter: ((t0.n + '1'::numeric) = ((t1.i + 1))::numeric)
-> Materialize (cost=0.00..431.00 rows=1 width=4)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=4)
-> Seq Scan on csq_pullup t1 (cost=0.00..431.00 rows=1 width=4)
Optimizer: Pivotal Optimizer (GPORCA)
(12 rows)
select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t0.n + 1=t1.i + 1);
t | n | i | v
-----+---+---+-----
abc | 1 | 2 | xyz
xyz | 2 | 3 | def
def | 3 | 1 | abc
(3 rows)
--
-- Test a few cases where pulling up an aggregate subquery is not possible
--
-- subquery contains a LIMIT
explain select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t0.t=t1.t LIMIT 1);
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..5172.20 rows=1 width=17)
-> Result (cost=0.00..5172.20 rows=1 width=17)
Filter: (1 = (SubPlan 1))
-> Seq Scan on csq_pullup t0 (cost=0.00..5172.19 rows=334 width=36)
SubPlan 1
-> Limit (cost=0.00..431.00 rows=1 width=8)
-> Aggregate (cost=0.00..431.00 rows=1 width=8)
-> Result (cost=0.00..431.00 rows=1 width=1)
Filter: (t0.t = t1.t)
-> Materialize (cost=0.00..431.00 rows=1 width=4)
-> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=4)
-> Seq Scan on csq_pullup t1 (cost=0.00..431.00 rows=1 width=4)
Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0
(13 rows)
select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t0.t=t1.t LIMIT 1);
t | n | i | v
-----+---+---+-----
xyz | 2 | 3 | def
def | 3 | 1 | abc
abc | 1 | 2 | xyz
(3 rows)
-- subquery contains a HAVING clause
explain select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t0.t=t1.t HAVING count(*) < 10);
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=1 width=17)
-> Result (cost=0.00..862.00 rows=1 width=17)
Filter: (1 = COALESCE((count(*)), '0'::bigint))
-> Hash Left Join (cost=0.00..862.00 rows=1 width=25)
Hash Cond: (t0.t = t1.t)
-> Seq Scan on csq_pullup t0 (cost=0.00..431.00 rows=1 width=17)
-> Hash (cost=431.00..431.00 rows=1 width=12)
-> Result (cost=0.00..431.00 rows=1 width=12)
Filter: ((count(*)) < 10)
-> GroupAggregate (cost=0.00..431.00 rows=1 width=20)
Group Key: t1.t
-> Sort (cost=0.00..431.00 rows=1 width=4)
Sort Key: t1.t
-> Seq Scan on csq_pullup t1 (cost=0.00..431.00 rows=1 width=4)
Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0
(15 rows)
select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t0.t=t1.t HAVING count(*) < 10);
t | n | i | v
-----+---+---+-----
xyz | 2 | 3 | def
def | 3 | 1 | abc
abc | 1 | 2 | xyz
(3 rows)
-- subquery contains quals of form 'function(outervar, innervar1) = innvervar2'
explain select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t0.n + t1.n =t1.i);
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..5172.20 rows=1 width=17)
-> Result (cost=0.00..5172.20 rows=1 width=17)
Filter: (1 = (SubPlan 1))
-> Seq Scan on csq_pullup t0 (cost=0.00..5172.19 rows=334 width=36)
SubPlan 1
-> Aggregate (cost=0.00..431.00 rows=1 width=8)
-> Result (cost=0.00..431.00 rows=1 width=1)
Filter: ((t0.n + t1.n) = (t1.i)::numeric)
-> Materialize (cost=0.00..431.00 rows=1 width=9)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=9)
-> Seq Scan on csq_pullup t1 (cost=0.00..431.00 rows=1 width=9)
Optimizer: Pivotal Optimizer (GPORCA) version 3.93.0
(12 rows)
select * from csq_pullup t0 where 1= (select count(*) from csq_pullup t1 where t0.n + t1.n =t1.i);
t | n | i | v
---+---+---+---
(0 rows)
--
-- NOT EXISTS CSQs to joins
--
--
-- text, text
--
explain select * from csq_pullup t0 where not exists (select 1 from csq_pullup t1 where t0.t=t1.t and t1.i = 1);
QUERY PLAN
-------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=1 width=17)
-> Hash Anti Join (cost=0.00..862.00 rows=1 width=17)
Hash Cond: (t0.t = t1.t)
-> Seq Scan on csq_pullup t0 (cost=0.00..431.00 rows=1 width=17)
-> Hash (cost=431.00..431.00 rows=1 width=4)
-> Seq Scan on csq_pullup t1 (cost=0.00..431.00 rows=1 width=4)
Filter: (i = 1)
Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0
(8 rows)
select * from csq_pullup t0 where not exists (select 1 from csq_pullup t1 where t0.t=t1.t and t1.i = 1);
t | n | i | v
-----+---+---+-----
abc | 1 | 2 | xyz
xyz | 2 | 3 | def
(2 rows)
--
-- int, function(int)
--
explain select * from csq_pullup t0 where not exists (select 1 from csq_pullup t1 where t0.i=t1.i + 1);
QUERY PLAN
---------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=1 width=17)
-> Hash Anti Join (cost=0.00..862.00 rows=1 width=17)
Hash Cond: (t0.i = (t1.i + 1))
-> Seq Scan on csq_pullup t0 (cost=0.00..431.00 rows=1 width=17)
-> Hash (cost=431.00..431.00 rows=1 width=4)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=4)
-> Seq Scan on csq_pullup t1 (cost=0.00..431.00 rows=1 width=4)
Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0
(8 rows)
select * from csq_pullup t0 where not exists (select 1 from csq_pullup t1 where t0.i=t1.i + 1);
t | n | i | v
-----+---+---+-----
def | 3 | 1 | abc
(1 row)
--
-- wrong results bug MPP-16477
--
drop table if exists subselect_t1;
NOTICE: table "subselect_t1" does not exist, skipping
drop table if exists subselect_t2;
NOTICE: table "subselect_t2" does not exist, skipping
create table subselect_t1(x int) distributed by (x);
insert into subselect_t1 values(1),(2);
create table subselect_t2(y int) distributed by (y);
insert into subselect_t2 values(1),(2),(2);
analyze subselect_t1;
analyze subselect_t2;
explain select * from subselect_t1 where x in (select y from subselect_t2);
QUERY PLAN
--------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=2 width=4)
-> Hash Semi Join (cost=0.00..862.00 rows=1 width=4)
Hash Cond: (subselect_t1.x = subselect_t2.y)
-> Seq Scan on subselect_t1 (cost=0.00..431.00 rows=1 width=4)
-> Hash (cost=431.00..431.00 rows=1 width=4)
-> Seq Scan on subselect_t2 (cost=0.00..431.00 rows=1 width=4)
Optimizer: Pivotal Optimizer (GPORCA) version 2.64.0
(7 rows)
select * from subselect_t1 where x in (select y from subselect_t2);
x
---
1
2
(2 rows)
-- start_ignore
-- Known_opt_diff: MPP-21351
-- end_ignore
explain select * from subselect_t1 where x in (select y from subselect_t2 union all select y from subselect_t2);
QUERY PLAN
-----------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1293.00 rows=2 width=4)
-> Hash Join (cost=0.00..1293.00 rows=1 width=4)
Hash Cond: (subselect_t1.x = subselect_t2.y)
-> Seq Scan on subselect_t1 (cost=0.00..431.00 rows=1 width=4)
-> Hash (cost=862.00..862.00 rows=1 width=4)
-> GroupAggregate (cost=0.00..862.00 rows=1 width=4)
Group Key: subselect_t2.y
-> Sort (cost=0.00..862.00 rows=2 width=4)
Sort Key: subselect_t2.y
-> Append (cost=0.00..862.00 rows=2 width=4)
-> Seq Scan on subselect_t2 (cost=0.00..431.00 rows=1 width=4)
-> Seq Scan on subselect_t2 subselect_t2_1 (cost=0.00..431.00 rows=1 width=4)
Optimizer: Pivotal Optimizer (GPORCA)
(13 rows)
select * from subselect_t1 where x in (select y from subselect_t2 union all select y from subselect_t2);
x
---
1
2
(2 rows)
explain select count(*) from subselect_t1 where x in (select y from subselect_t2);
QUERY PLAN
--------------------------------------------------------------------------------------
Aggregate (cost=0.00..862.00 rows=1 width=8)
-> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=2 width=1)
-> Hash Semi Join (cost=0.00..862.00 rows=1 width=1)
Hash Cond: (subselect_t1.x = subselect_t2.y)
-> Seq Scan on subselect_t1 (cost=0.00..431.00 rows=1 width=4)
-> Hash (cost=431.00..431.00 rows=1 width=4)
-> Seq Scan on subselect_t2 (cost=0.00..431.00 rows=1 width=4)
Optimizer: Pivotal Optimizer (GPORCA)
(8 rows)
select count(*) from subselect_t1 where x in (select y from subselect_t2);
count
-------
2
(1 row)
-- start_ignore
-- Known_opt_diff: MPP-21351
-- end_ignore
explain select count(*) from subselect_t1 where x in (select y from subselect_t2 union all select y from subselect_t2);
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------
Aggregate (cost=0.00..1293.00 rows=1 width=8)
-> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1293.00 rows=2 width=1)
-> Hash Join (cost=0.00..1293.00 rows=1 width=1)
Hash Cond: (subselect_t1.x = subselect_t2.y)
-> Seq Scan on subselect_t1 (cost=0.00..431.00 rows=1 width=4)
-> Hash (cost=862.00..862.00 rows=1 width=4)
-> GroupAggregate (cost=0.00..862.00 rows=1 width=4)
Group Key: subselect_t2.y
-> Sort (cost=0.00..862.00 rows=2 width=4)
Sort Key: subselect_t2.y
-> Append (cost=0.00..862.00 rows=2 width=4)
-> Seq Scan on subselect_t2 (cost=0.00..431.00 rows=1 width=4)
-> Seq Scan on subselect_t2 subselect_t2_1 (cost=0.00..431.00 rows=1 width=4)
Optimizer: Pivotal Optimizer (GPORCA)
(14 rows)
select count(*) from subselect_t1 where x in (select y from subselect_t2 union all select y from subselect_t2);
count
-------
2
(1 row)
select count(*) from
( select 1 as FIELD_1 union all select 2 as FIELD_1 ) TABLE_1
where FIELD_1 in ( select 1 as FIELD_1 union all select 1 as FIELD_1 union all select 1 as FIELD_1 );
count
-------
1
(1 row)
--
-- Query was deadlocking because of not squelching subplans (MPP-18936)
--
drop table if exists t1;
NOTICE: table "t1" does not exist, skipping
drop table if exists t2;
NOTICE: table "t2" does not exist, skipping
drop table if exists t3;
NOTICE: table "t3" does not exist, skipping
drop table if exists t4;
NOTICE: table "t4" does not exist, skipping
CREATE TABLE t1 AS (SELECT generate_series(1, 5000) AS i, generate_series(5001, 10000) AS j);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry.
CREATE TABLE t2 AS (SELECT * FROM t1 WHERE gp_segment_id = 0);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry.
CREATE TABLE t3 AS (SELECT * FROM t1 WHERE gp_segment_id = 1);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry.
CREATE TABLE t4 (i1 int, i2 int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i1' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
set gp_interconnect_queue_depth=1;
-- This query was deadlocking on a 2P system
INSERT INTO t4
(
SELECT t1.i, (SELECT t3.i FROM t3 WHERE t3.i + 1 = t1.i + 1)
FROM t1, t3
WHERE t1.i = t3.i
)
UNION
(
SELECT t1.i, (SELECT t2.i FROM t2 WHERE t2.i + 1 = t1.i + 1)
FROM t1, t2
WHERE t1.i = t2.i
);
drop table if exists t1;
drop table if exists t2;
drop table if exists t3;
drop table if exists t4;
--
-- Initplans with no corresponding params should be removed MPP-20600
--
drop table if exists t1;
NOTICE: table "t1" does not exist, skipping
drop table if exists t2;
NOTICE: table "t2" does not exist, skipping
create table t1(a int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table t2(b int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'b' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
select * from t1 where a=1 and a=2 and a > (select t2.b from t2);
a
---
(0 rows)
explain select * from t1 where a=1 and a=2 and a > (select t2.b from t2);
QUERY PLAN
------------------------------------------------------
Result (cost=0.00..0.00 rows=0 width=4)
One-Time Filter: false
Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0
(3 rows)
explain select * from t1 where a=1 and a=2 and a > (select t2.b from t2)
union all
select * from t1 where a=1 and a=2 and a > (select t2.b from t2);
QUERY PLAN
------------------------------------------------------
Result (cost=0.00..0.00 rows=0 width=4)
One-Time Filter: false
Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0
(3 rows)
select * from t1 where a=1 and a=2 and a > (select t2.b from t2)
union all
select * from t1 where a=1 and a=2 and a > (select t2.b from t2);
a
---
(0 rows)
explain select * from t1,
(select * from t1 where a=1 and a=2 and a > (select t2.b from t2)) foo
where t1.a = foo.a;
QUERY PLAN
------------------------------------------------------
Result (cost=0.00..0.00 rows=0 width=8)
One-Time Filter: false
Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0
(3 rows)
select * from t1,
(select * from t1 where a=1 and a=2 and a > (select t2.b from t2)) foo
where t1.a = foo.a;
a | a
---+---
(0 rows)
--
-- Correlated subqueries with limit/offset clause must not be pulled up as join
--
insert into t1 values (1);
insert into t2 values (1);
explain select 1 from t1 where a in (select b from t2 where a = 1 limit 1);
QUERY PLAN
--------------------------------------------------------------------------------------------------------
Result (cost=0.00..1293.00 rows=1 width=1)
Filter: (SubPlan 1)
-> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=4)
-> Seq Scan on t1 (cost=0.00..431.00 rows=1 width=4)
Filter: (a = 1)
SubPlan 1
-> Limit (cost=0.00..431.00 rows=1 width=4)
-> Result (cost=0.00..431.00 rows=1 width=4)
One-Time Filter: (t1.a = 1)
-> Materialize (cost=0.00..431.00 rows=1 width=4)
-> Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=4)
-> Seq Scan on t2 (cost=0.00..431.00 rows=1 width=4)
Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0
(12 rows)
explain select 1 from t1 where a in (select b from t2 where a = 1 offset 1);
QUERY PLAN
--------------------------------------------------------------------------------------------------------
Result (cost=0.00..1293.00 rows=1 width=1)
Filter: (SubPlan 1)
-> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=4)
-> Seq Scan on t1 (cost=0.00..431.00 rows=1 width=4)
Filter: (a = 1)
SubPlan 1
-> Limit (cost=0.00..431.00 rows=1 width=4)
-> Result (cost=0.00..431.00 rows=1 width=4)
One-Time Filter: (t1.a = 1)
-> Materialize (cost=0.00..431.00 rows=1 width=4)
-> Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=4)
-> Seq Scan on t2 (cost=0.00..431.00 rows=1 width=4)
Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0
(12 rows)
select 1 from t1 where a in (select b from t2 where a = 1 limit 1);
?column?
----------
1
(1 row)
select 1 from t1 where a in (select b from t2 where a = 1 offset 1);
?column?
----------
(0 rows)
drop table if exists t1;
drop table if exists t2;
--
-- Test for a bug we used to have with eliminating InitPlans. The subplan,
-- (select max(content) from y), was eliminated when it shouldn't have been.
-- The query is supposed to return 0 rows, but returned > 0 when the bug was
-- present.
--
CREATE TABLE initplan_x (i int4, t text);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into initplan_x values
(1, 'foobar1'),
(2, 'foobar2'),
(3, 'foobar3'),
(4, 'foobar4'),
(5, 'foobar5');
CREATE TABLE initplan_y (content int4);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'content' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into initplan_y values (5);
select i, t from initplan_x
except
select g, t from initplan_x,
generate_series(0, (select max(content) from initplan_y)) g
order by 1;
i | t
---+---
(0 rows)
drop table if exists initplan_x;
drop table if exists initplan_y;
--
-- Test Initplans that return multiple params.
--
create table initplan_test(i int, j int, m int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into initplan_test values (1,1,1);
select * from initplan_test where row(j, m) = (select j, m from initplan_test where i = 1);
i | j | m
---+---+---
1 | 1 | 1
(1 row)
drop table initplan_test;
--
-- apply parallelization for subplan MPP-24563
--
create table t1_mpp_24563 (id int, value int) distributed by (id);
insert into t1_mpp_24563 values (1, 3);
create table t2_mpp_24563 (id int, value int, seq int) distributed by (id);
insert into t2_mpp_24563 values (1, 7, 5);
explain select row_number() over (order by seq asc) as id, foo.cnt
from
(select seq, (select count(*) from t1_mpp_24563 t1 where t1.id = t2.id) cnt from
t2_mpp_24563 t2 where value = 7) foo;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------
WindowAgg (cost=0.00..862.00 rows=1 width=16)
Order By: t2.seq
-> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=1 width=12)
Merge Key: t2.seq
-> Result (cost=0.00..862.00 rows=1 width=12)
-> Sort (cost=0.00..862.00 rows=1 width=12)
Sort Key: t2.seq
-> Hash Left Join (cost=0.00..862.00 rows=1 width=12)
Hash Cond: (t2.id = t1.id)
-> Seq Scan on t2_mpp_24563 t2 (cost=0.00..431.00 rows=1 width=8)
Filter: (value = 7)
-> Hash (cost=431.00..431.00 rows=1 width=12)
-> GroupAggregate (cost=0.00..431.00 rows=1 width=12)
Group Key: t1.id
-> Sort (cost=0.00..431.00 rows=1 width=4)
Sort Key: t1.id
-> Seq Scan on t1_mpp_24563 t1 (cost=0.00..431.00 rows=1 width=4)
Optimizer: Pivotal Optimizer (GPORCA)
(18 rows)
drop table t1_mpp_24563;
drop table t2_mpp_24563;
--
-- MPP-20470 update the flow of node after parallelizing subplan.
--
CREATE TABLE t_mpp_20470 (
col_date timestamp without time zone,
col_name character varying(6),
col_expiry date
) DISTRIBUTED BY (col_date) PARTITION BY RANGE(col_date)
(
START ('2013-05-10 00:00:00'::timestamp without time zone) END ('2013-05-11
00:00:00'::timestamp without time zone) WITH (tablename='t_mpp_20470_ptr1', appendonly=false ),
START ('2013-05-24 00:00:00'::timestamp without time zone) END ('2013-05-25
00:00:00'::timestamp without time zone) WITH (tablename='t_mpp_20470_ptr2', appendonly=false )
);
COPY t_mpp_20470 from STDIN delimiter '|' null '';
create view v1_mpp_20470 as
SELECT
CASE
WHEN b.col_name::text = 'FUTCUR'::text
THEN ( SELECT count(a.col_expiry) AS count FROM t_mpp_20470 a WHERE
a.col_name::text = b.col_name::text)::text
ELSE 'Q2'::text END AS cc, 1 AS nn
FROM t_mpp_20470 b;
explain SELECT cc, sum(nn) over() FROM v1_mpp_20470;
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------
WindowAgg (cost=0.00..862.00 rows=2 width=16)
-> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=2 width=12)
-> Hash Left Join (cost=0.00..862.00 rows=1 width=16)
Hash Cond: ((b.col_name)::text = (a.col_name)::text)
-> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=8)
Hash Key: b.col_name
-> Dynamic Seq Scan on t_mpp_20470 b (cost=0.00..431.00 rows=1 width=8)
Number of partitions to scan: 2 (out of 2)
-> Hash (cost=431.00..431.00 rows=1 width=16)
-> HashAggregate (cost=0.00..431.00 rows=1 width=16)
Group Key: a.col_name
-> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..431.00 rows=1 width=12)
Hash Key: a.col_name
-> Dynamic Seq Scan on t_mpp_20470 a (cost=0.00..431.00 rows=1 width=12)
Number of partitions to scan: 2 (out of 2)
Optimizer: Pivotal Optimizer (GPORCA)
(16 rows)
drop view v1_mpp_20470;
drop table t_mpp_20470;
create table tbl_25484(id int, num int) distributed by (id);
insert into tbl_25484 values(1, 1), (2, 2), (3, 3);
select id from tbl_25484 where 3 = (select 3 where 3 = (select num));
id
----
3
(1 row)
drop table tbl_25484;
reset optimizer_segments;
reset optimizer_nestloop_factor;
--
-- Test case that once triggered a bug in the IN-clause pull-up code.
--
SELECT p.id
FROM (SELECT * FROM generate_series(1,10) id
WHERE id IN (
SELECT 1
UNION ALL
SELECT 0)) p;
id
----
1
(1 row)
--
-- Verify another bug in the IN-clause pull-up code. This returned some
-- rows from xsupplier twice, because of a bug in detecting whether a
-- Redistribute node was needed.
--
CREATE TABLE xlineitem (l_orderkey int4, l_suppkey int4) distributed by (l_orderkey);
insert into xlineitem select g+3, g from generate_series(10,100) g;
insert into xlineitem select g+1, g from generate_series(10,100) g;
insert into xlineitem select g, g from generate_series(10,100) g;
CREATE TABLE xsupplier (s_suppkey int4, s_name text) distributed by (s_suppkey);
insert into xsupplier select g, 'foo' || g from generate_series(1,10) g;
select s_name from xsupplier
where s_suppkey in (
select g.l_suppkey from xlineitem g
) ;
s_name
--------
foo10
(1 row)
--
-- Another case that failed at one point. (A planner bug in pulling up a
-- subquery with constant distribution key, 1, in the outer queries.)
--
create table nested_in_tbl(tc1 int, tc2 int) distributed by (tc1);
select * from nested_in_tbl t1 where tc1 in
(select 1 from nested_in_tbl t2 where tc1 in
(select 1 from nested_in_tbl t3 where t3.tc2 = t2.tc2));
tc1 | tc2
-----+-----
(0 rows)
drop table nested_in_tbl;
--
-- Window query with a function scan that has non-correlated subquery.
--
SELECT rank() over (partition by min(c) order by min(c)) AS p_rank FROM (SELECT d AS c FROM (values(1)) d1, generate_series(0,(SELECT 2)) AS d) tt GROUP BY c;
p_rank
--------
1
1
1
(3 rows)
--
-- Remove unused subplans
--
create table foo(a int, b int) distributed by (a) partition by range(b) (start(1) end(3) every(1));
create table bar(a int, b int) distributed by (a);
with CT as (select a from foo except select a from bar)
select * from foo
where exists (select 1 from CT where CT.a = foo.a);
a | b
---+---
(0 rows)
drop table foo;
drop table bar;
--
-- Multiple SUBPLAN nodes referring to the same plan_id
--
CREATE TABLE bar_s (c integer, d character varying(10));
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
INSERT INTO bar_s VALUES (9,9);
ANALYZE bar_s;
SELECT * FROM bar_s T1 WHERE c = (SELECT max(c) FROM bar_s T2 WHERE T2.d = T1.d GROUP BY c) AND c < 10;
c | d
---+---
9 | 9
(1 row)
CREATE TABLE foo_s (a integer, b integer) PARTITION BY RANGE(b)
(PARTITION sub_one START (1) END (10),
PARTITION sub_two START (11) END (22));
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
INSERT INTO foo_s VALUES (9,9);
INSERT INTO foo_s VALUES (2,9);
SELECT bar_s.c from bar_s, foo_s WHERE foo_s.a=2 AND foo_s.b = (SELECT max(b) FROM foo_s WHERE bar_s.c = 9);
c
---
9
(1 row)
CREATE TABLE baz_s (i int4);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
INSERT INTO baz_s VALUES (9);
ANALYZE baz_s;
-- In this query, the planner avoids using SubPlan 1 in the qual in the join,
-- because it avoids picking SubPlans from an equivalence class, when it has
-- other choices.
SELECT bar_s.c FROM bar_s, foo_s WHERE foo_s.b = (SELECT max(i) FROM baz_s WHERE bar_s.c = 9) AND foo_s.b = bar_s.d::int4;
c
---
9
9
(2 rows)
-- Same as above, but with another subquery, so it must use a SubPlan. There
-- are two references to the same SubPlan in the plan, on different slices.
explain SELECT bar_s.c FROM bar_s, foo_s WHERE foo_s.b = (SELECT max(i) FROM baz_s WHERE bar_s.c = 9) AND foo_s.b = (select bar_s.d::int4);
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1356273068.35 rows=1 width=4)
-> Hash Join (cost=0.00..1356273068.35 rows=1 width=4)
Hash Cond: ((((SubPlan 1)) = foo_s.b) AND (((SubPlan 2)) = foo_s.b))
-> Seq Scan on bar_s (cost=0.00..1324053.98 rows=334 width=16)
SubPlan 1
-> Aggregate (cost=0.00..431.00 rows=1 width=4)
-> Result (cost=0.00..431.00 rows=1 width=4)
One-Time Filter: (bar_s.c = 9)
-> Materialize (cost=0.00..431.00 rows=1 width=4)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=4)
-> Seq Scan on baz_s (cost=0.00..431.00 rows=1 width=4)
SubPlan 2
-> Result (cost=0.00..0.00 rows=1 width=4)
-> Result (cost=0.00..0.00 rows=1 width=1)
-> Hash (cost=431.00..431.00 rows=1 width=4)
-> Broadcast Motion 3:3 (slice3; segments: 3) (cost=0.00..431.00 rows=1 width=4)
-> Dynamic Seq Scan on foo_s (cost=0.00..431.00 rows=1 width=4)
Number of partitions to scan: 2
Optimizer: Pivotal Optimizer (GPORCA)
(19 rows)
SELECT bar_s.c FROM bar_s, foo_s WHERE foo_s.b = (SELECT max(i) FROM baz_s WHERE bar_s.c = 9) AND foo_s.b = (select bar_s.d::int4);
c
---
9
9
(2 rows)
DROP TABLE bar_s;
DROP TABLE foo_s;
DROP TABLE baz_s;
--
-- EXPLAIN tests for queries in subselect.sql to significant plan changes
--
-- Set up some simple test tables
CREATE TABLE SUBSELECT_TBL (
f1 integer,
f2 integer,
f3 float
);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'f1' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
INSERT INTO SUBSELECT_TBL VALUES (1, 2, 3);
INSERT INTO SUBSELECT_TBL VALUES (2, 3, 4);
INSERT INTO SUBSELECT_TBL VALUES (3, 4, 5);
INSERT INTO SUBSELECT_TBL VALUES (1, 1, 1);
INSERT INTO SUBSELECT_TBL VALUES (2, 2, 2);
INSERT INTO SUBSELECT_TBL VALUES (3, 3, 3);
INSERT INTO SUBSELECT_TBL VALUES (6, 7, 8);
INSERT INTO SUBSELECT_TBL VALUES (8, 9, NULL);
ANALYZE SUBSELECT_TBL;
-- Uncorrelated subselects
EXPLAIN SELECT '' AS six, f1 AS "Uncorrelated Field" FROM SUBSELECT_TBL
WHERE f1 IN (SELECT f2 FROM SUBSELECT_TBL) ORDER BY 2;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------
Result (cost=0.00..862.00 rows=3 width=12)
-> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=7 width=4)
Merge Key: subselect_tbl.f1
-> Sort (cost=0.00..862.00 rows=3 width=4)
Sort Key: subselect_tbl.f1
-> Hash Semi Join (cost=0.00..862.00 rows=3 width=4)
Hash Cond: subselect_tbl.f1 = subselect_tbl_1.f2
-> Seq Scan on subselect_tbl (cost=0.00..431.00 rows=3 width=4)
-> Hash (cost=431.00..431.00 rows=3 width=4)
-> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=3 width=4)
Hash Key: subselect_tbl_1.f2
-> Seq Scan on subselect_tbl subselect_tbl_1 (cost=0.00..431.00 rows=3 width=4)
Optimizer: Pivotal Optimizer (GPORCA) version 2.74.0
(13 rows)
EXPLAIN SELECT '' AS six, f1 AS "Uncorrelated Field" FROM SUBSELECT_TBL
WHERE f1 IN (SELECT f2 FROM SUBSELECT_TBL WHERE
f2 IN (SELECT f1 FROM SUBSELECT_TBL)) ORDER BY 2;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------
Result (cost=0.00..1293.00 rows=2 width=12)
-> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1293.00 rows=5 width=4)
Merge Key: subselect_tbl.f1
-> Sort (cost=0.00..1293.00 rows=2 width=4)
Sort Key: subselect_tbl.f1
-> Hash Join (cost=0.00..1293.00 rows=2 width=4)
Hash Cond: subselect_tbl.f1 = subselect_tbl_1.f2
-> Seq Scan on subselect_tbl (cost=0.00..431.00 rows=3 width=4)
-> Hash (cost=862.00..862.00 rows=2 width=4)
-> GroupAggregate (cost=0.00..862.00 rows=2 width=4)
Group Key: subselect_tbl_1.f2
-> Sort (cost=0.00..862.00 rows=3 width=4)
Sort Key: subselect_tbl_1.f2
-> Hash Semi Join (cost=0.00..862.00 rows=3 width=4)
Hash Cond: subselect_tbl_1.f2 = subselect_tbl_2.f1
-> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=3 width=4)
Hash Key: subselect_tbl_1.f2
-> Seq Scan on subselect_tbl subselect_tbl_1 (cost=0.00..431.00 rows=3 width=4)
-> Hash (cost=431.00..431.00 rows=3 width=4)
-> Seq Scan on subselect_tbl subselect_tbl_2 (cost=0.00..431.00 rows=3 width=4)
Optimizer: Pivotal Optimizer (GPORCA) version 2.74.0
(21 rows)
EXPLAIN SELECT '' AS three, f1, f2
FROM SUBSELECT_TBL
WHERE (f1, f2) NOT IN (SELECT f2, CAST(f3 AS int4) FROM SUBSELECT_TBL
WHERE f3 IS NOT NULL) ORDER BY 2,3;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=10000000009.64..10000000009.64 rows=4 width=8)
Merge Key: subselect_tbl.f1, subselect_tbl.f2
-> Sort (cost=10000000009.64..10000000009.64 rows=2 width=8)
Sort Key: subselect_tbl.f1, subselect_tbl.f2
-> Nested Loop Left Anti Semi (Not-In) Join (cost=10000000000.00..10000000009.61 rows=2 width=8)
Join Filter: subselect_tbl.f1 = subselect_tbl_1.f2 AND subselect_tbl.f2 = subselect_tbl_1.f3::integer
-> Seq Scan on subselect_tbl (cost=0.00..3.08 rows=3 width=8)
-> Materialize (cost=0.00..3.47 rows=7 width=12)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..3.36 rows=7 width=12)
-> Seq Scan on subselect_tbl subselect_tbl_1 (cost=0.00..3.08 rows=3 width=12)
Filter: f3 IS NOT NULL
Optimizer: Postgres query optimizer
(12 rows)
ANALYZE tenk1;
EXPLAIN SELECT * FROM tenk1 a, tenk1 b
WHERE (a.unique1,b.unique2) IN (SELECT unique1,unique2 FROM tenk1 c);
QUERY PLAN
-----------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=356.67..706.67 rows=10000 width=488)
-> Hash Join (cost=356.67..573.33 rows=3333 width=488)
Hash Cond: (c.unique2 = b.unique2)
-> Redistribute Motion 3:3 (slice2; segments: 3) (cost=185.00..355.83 rows=3333 width=248)
Hash Key: c.unique2
-> Hash Join (cost=185.00..289.17 rows=3333 width=248)
Hash Cond: (c.unique1 = a.unique1)
-> HashAggregate (cost=80.00..113.33 rows=10000 width=8)
Group Key: c.unique1, c.unique2
-> Seq Scan on tenk1 c (cost=0.00..63.33 rows=3333 width=8)
-> Hash (cost=63.33..63.33 rows=3333 width=244)
-> Seq Scan on tenk1 a (cost=0.00..63.33 rows=3333 width=244)
-> Hash (cost=130.00..130.00 rows=3333 width=244)
-> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..130.00 rows=3333 width=244)
Hash Key: b.unique2
-> Seq Scan on tenk1 b (cost=0.00..63.33 rows=3333 width=244)
Optimizer: Postgres query optimizer
(17 rows)
-- Correlated subselects
EXPLAIN SELECT '' AS six, f1 AS "Correlated Field", f2 AS "Second Field"
FROM SUBSELECT_TBL upper
WHERE f1 IN (SELECT f2 FROM SUBSELECT_TBL WHERE f1 = upper.f1) ORDER BY 2,3;
QUERY PLAN
----------------------------------------------------------------------------------------------------------------
Result (cost=0.00..862.00 rows=3 width=16)
-> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=8 width=8)
Merge Key: upper.f1, upper.f2
-> Sort (cost=0.00..862.00 rows=3 width=8)
Sort Key: upper.f1, upper.f2
-> Hash Semi Join (cost=0.00..862.00 rows=3 width=8)
Hash Cond: upper.f1 = subselect_tbl.f1 AND upper.f1 = subselect_tbl.f2
-> Seq Scan on subselect_tbl upper (cost=0.00..431.00 rows=3 width=8)
-> Hash (cost=431.00..431.00 rows=3 width=8)
-> Seq Scan on subselect_tbl (cost=0.00..431.00 rows=3 width=8)
Optimizer: Pivotal Optimizer (GPORCA) version 2.75.0
(11 rows)
EXPLAIN SELECT '' AS six, f1 AS "Correlated Field", f3 AS "Second Field"
FROM SUBSELECT_TBL upper
WHERE f1 IN
(SELECT f2 FROM SUBSELECT_TBL WHERE CAST(upper.f2 AS float) = f3) ORDER BY 2,3;
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------
Result (cost=0.00..862.00 rows=3 width=20)
-> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=8 width=12)
Merge Key: upper.f1, upper.f3
-> Sort (cost=0.00..862.00 rows=3 width=12)
Sort Key: upper.f1, upper.f3
-> Hash Semi Join (cost=0.00..862.00 rows=3 width=12)
Hash Cond: upper.f2::double precision = subselect_tbl.f3 AND upper.f1 = subselect_tbl.f2
-> Seq Scan on subselect_tbl upper (cost=0.00..431.00 rows=3 width=16)
-> Hash (cost=431.00..431.00 rows=3 width=12)
-> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=3 width=12)
Hash Key: subselect_tbl.f2
-> Seq Scan on subselect_tbl (cost=0.00..431.00 rows=3 width=12)
Optimizer: Pivotal Optimizer (GPORCA) version 2.75.0
(13 rows)
EXPLAIN SELECT '' AS six, f1 AS "Correlated Field", f3 AS "Second Field"
FROM SUBSELECT_TBL upper
WHERE f3 IN (SELECT upper.f1 + f2 FROM SUBSELECT_TBL
WHERE f2 = CAST(f3 AS integer)) ORDER BY 2,3;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------
Result (cost=0.00..1324033.89 rows=3 width=20)
-> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324033.89 rows=8 width=12)
Merge Key: upper.f1, upper.f3
-> Sort (cost=0.00..1324033.89 rows=3 width=12)
Sort Key: upper.f1, upper.f3
-> Seq Scan on subselect_tbl upper (cost=0.00..1324033.89 rows=3 width=12)
Filter: (SubPlan 1)
SubPlan 1
-> Result (cost=0.00..431.00 rows=4 width=4)
-> Materialize (cost=0.00..431.00 rows=4 width=4)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=4 width=4)
-> Seq Scan on subselect_tbl (cost=0.00..431.00 rows=2 width=4)
Filter: f2 = f3::integer
Optimizer: Pivotal Optimizer (GPORCA) version 2.74.0
(14 rows)
EXPLAIN SELECT '' AS five, f1 AS "Correlated Field"
FROM SUBSELECT_TBL
WHERE (f1, f2) IN (SELECT f2, CAST(f3 AS int4) FROM SUBSELECT_TBL
WHERE f3 IS NOT NULL) ORDER BY 2;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=6.67..6.69 rows=8 width=4)
Merge Key: subselect_tbl.f1
-> Sort (cost=6.67..6.69 rows=3 width=4)
Sort Key: subselect_tbl.f1
-> Hash Semi Join (cost=3.33..6.55 rows=3 width=4)
Hash Cond: subselect_tbl.f1 = subselect_tbl_1.f2 AND subselect_tbl.f2 = subselect_tbl_1.f3::integer
-> Seq Scan on subselect_tbl (cost=0.00..3.08 rows=3 width=8)
-> Hash (cost=3.22..3.22 rows=3 width=12)
-> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..3.22 rows=3 width=12)
Hash Key: subselect_tbl_1.f2
-> Seq Scan on subselect_tbl subselect_tbl_1 (cost=0.00..3.08 rows=3 width=12)
Filter: f3 IS NOT NULL
Optimizer: Postgres query optimizer
(13 rows)
-- Test simplify group-by/order-by inside subquery if sublink pull-up is possible
EXPLAIN SELECT '' AS six, f1 AS "Correlated Field", f2 AS "Second Field"
FROM SUBSELECT_TBL upper
WHERE f1 IN (SELECT f2 FROM SUBSELECT_TBL WHERE f1 = upper.f1 GROUP BY f2);
QUERY PLAN
----------------------------------------------------------------------------------------------------------------
Result (cost=0.00..862.00 rows=8 width=16)
-> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=8 width=8)
-> Hash Semi Join (cost=0.00..862.00 rows=3 width=8)
Hash Cond: ((upper.f1 = subselect_tbl.f1) AND (upper.f1 = subselect_tbl.f2))
-> Seq Scan on subselect_tbl upper (cost=0.00..431.00 rows=3 width=8)
-> Hash (cost=431.00..431.00 rows=3 width=8)
-> Seq Scan on subselect_tbl (cost=0.00..431.00 rows=3 width=8)
Optimizer: Pivotal Optimizer (GPORCA)
(8 rows)
EXPLAIN SELECT '' AS six, f1 AS "Correlated Field", f2 AS "Second Field"
FROM SUBSELECT_TBL upper
WHERE f1 IN (SELECT f2 FROM SUBSELECT_TBL WHERE f1 = upper.f1 GROUP BY f2 LIMIT 3);
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------
Result (cost=0.00..1324038.67 rows=8 width=16)
-> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324038.67 rows=8 width=8)
-> Seq Scan on subselect_tbl upper (cost=0.00..1324038.67 rows=3 width=8)
Filter: (SubPlan 1)
SubPlan 1
-> Limit (cost=0.00..431.01 rows=1 width=4)
-> HashAggregate (cost=0.00..431.01 rows=1 width=4)
Group Key: subselect_tbl.f2
-> Result (cost=0.00..431.01 rows=1 width=4)
Filter: (subselect_tbl.f1 = upper.f1)
-> Materialize (cost=0.00..431.00 rows=8 width=8)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=8 width=8)
-> Seq Scan on subselect_tbl (cost=0.00..431.00 rows=3 width=8)
Optimizer: Pivotal Optimizer (GPORCA)
(14 rows)
EXPLAIN SELECT '' AS six, f1 AS "Correlated Field", f2 AS "Second Field"
FROM SUBSELECT_TBL upper
WHERE f1 IN (SELECT f2 FROM SUBSELECT_TBL WHERE f1 = upper.f1 ORDER BY f2);
QUERY PLAN
----------------------------------------------------------------------------------------------------------------
Result (cost=0.00..862.00 rows=8 width=16)
-> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=8 width=8)
-> Hash Semi Join (cost=0.00..862.00 rows=3 width=8)
Hash Cond: ((upper.f1 = subselect_tbl.f1) AND (upper.f1 = subselect_tbl.f2))
-> Seq Scan on subselect_tbl upper (cost=0.00..431.00 rows=3 width=8)
Filter: (NOT (f1 IS NULL))
-> Hash (cost=431.00..431.00 rows=3 width=8)
-> Seq Scan on subselect_tbl (cost=0.00..431.00 rows=3 width=8)
Optimizer: Pivotal Optimizer (GPORCA)
(9 rows)
EXPLAIN SELECT '' AS six, f1 AS "Correlated Field", f2 AS "Second Field"
FROM SUBSELECT_TBL upper
WHERE f1 IN (SELECT f2 FROM SUBSELECT_TBL WHERE f1 = upper.f1 ORDER BY f2 LIMIT 3);
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------
Result (cost=0.00..1324038.66 rows=8 width=16)
-> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324038.66 rows=8 width=8)
-> Seq Scan on subselect_tbl upper (cost=0.00..1324038.66 rows=3 width=8)
Filter: ((NOT (f1 IS NULL)) AND (SubPlan 1))
SubPlan 1
-> Limit (cost=0.00..431.01 rows=1 width=4)
-> Sort (cost=0.00..431.01 rows=1 width=4)
Sort Key: subselect_tbl.f2
-> Result (cost=0.00..431.01 rows=1 width=4)
Filter: (subselect_tbl.f1 = upper.f1)
-> Materialize (cost=0.00..431.00 rows=8 width=8)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=8 width=8)
-> Seq Scan on subselect_tbl (cost=0.00..431.00 rows=3 width=8)
Optimizer: Pivotal Optimizer (GPORCA)
(14 rows)
--
-- Test cases to catch unpleasant interactions between IN-join processing
-- and subquery pullup.
--
EXPLAIN select count(*) from
(select 1 from tenk1 a
where unique1 IN (select hundred from tenk1 b)) ss;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------
Finalize Aggregate (cost=0.00..864.06 rows=1 width=8)
-> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..864.06 rows=1 width=8)
-> Partial Aggregate (cost=0.00..864.06 rows=1 width=8)
-> Hash Join (cost=0.00..864.06 rows=34 width=1)
Hash Cond: (a.unique1 = b.hundred)
-> Seq Scan on tenk1 a (cost=0.00..431.50 rows=3334 width=4)
-> Hash (cost=431.94..431.94 rows=34 width=4)
-> GroupAggregate (cost=0.00..431.94 rows=34 width=4)
Group Key: b.hundred
-> Sort (cost=0.00..431.94 rows=34 width=4)
Sort Key: b.hundred
-> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..431.93 rows=34 width=4)
Hash Key: b.hundred
-> Streaming HashAggregate (cost=0.00..431.93 rows=34 width=4)
Group Key: b.hundred
-> Seq Scan on tenk1 b (cost=0.00..431.50 rows=3334 width=4)
Optimizer: Pivotal Optimizer (GPORCA)
(17 rows)
EXPLAIN select count(distinct ss.ten) from
(select ten from tenk1 a
where unique1 IN (select hundred from tenk1 b)) ss;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------
Finalize Aggregate (cost=0.00..864.09 rows=1 width=8)
-> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..864.09 rows=1 width=8)
-> Partial Aggregate (cost=0.00..864.09 rows=1 width=8)
-> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..864.09 rows=34 width=4)
Hash Key: a.ten
-> Hash Join (cost=0.00..864.09 rows=34 width=4)
Hash Cond: (a.unique1 = b.hundred)
-> Seq Scan on tenk1 a (cost=0.00..431.50 rows=3334 width=8)
-> Hash (cost=431.93..431.93 rows=34 width=4)
-> GroupAggregate (cost=0.00..431.93 rows=34 width=4)
Group Key: b.hundred
-> Sort (cost=0.00..431.93 rows=34 width=4)
Sort Key: b.hundred
-> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..431.93 rows=34 width=4)
Hash Key: b.hundred
-> Streaming HashAggregate (cost=0.00..431.93 rows=34 width=4)
Group Key: b.hundred
-> Seq Scan on tenk1 b (cost=0.00..431.50 rows=3334 width=4)
Optimizer: Pivotal Optimizer (GPORCA)
(19 rows)
EXPLAIN select count(*) from
(select 1 from tenk1 a
where unique1 IN (select distinct hundred from tenk1 b)) ss;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------
Finalize Aggregate (cost=0.00..864.06 rows=1 width=8)
-> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..864.06 rows=1 width=8)
-> Partial Aggregate (cost=0.00..864.06 rows=1 width=8)
-> Hash Semi Join (cost=0.00..864.06 rows=34 width=1)
Hash Cond: (a.unique1 = b.hundred)
-> Seq Scan on tenk1 a (cost=0.00..431.50 rows=3334 width=4)
-> Hash (cost=431.94..431.94 rows=34 width=4)
-> GroupAggregate (cost=0.00..431.94 rows=34 width=4)
Group Key: b.hundred
-> Sort (cost=0.00..431.94 rows=34 width=4)
Sort Key: b.hundred
-> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..431.93 rows=34 width=4)
Hash Key: b.hundred
-> Streaming HashAggregate (cost=0.00..431.93 rows=34 width=4)
Group Key: b.hundred
-> Seq Scan on tenk1 b (cost=0.00..431.50 rows=3334 width=4)
Optimizer: Pivotal Optimizer (GPORCA)
(17 rows)
EXPLAIN select count(distinct ss.ten) from
(select ten from tenk1 a
where unique1 IN (select distinct hundred from tenk1 b)) ss;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------
Finalize Aggregate (cost=0.00..864.09 rows=1 width=8)
-> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..864.09 rows=1 width=8)
-> Partial Aggregate (cost=0.00..864.09 rows=1 width=8)
-> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..864.09 rows=34 width=4)
Hash Key: a.ten
-> Hash Semi Join (cost=0.00..864.09 rows=34 width=4)
Hash Cond: (a.unique1 = b.hundred)
-> Seq Scan on tenk1 a (cost=0.00..431.50 rows=3334 width=8)
-> Hash (cost=431.93..431.93 rows=34 width=4)
-> GroupAggregate (cost=0.00..431.93 rows=34 width=4)
Group Key: b.hundred
-> Sort (cost=0.00..431.93 rows=34 width=4)
Sort Key: b.hundred
-> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..431.93 rows=34 width=4)
Hash Key: b.hundred
-> Streaming HashAggregate (cost=0.00..431.93 rows=34 width=4)
Group Key: b.hundred
-> Seq Scan on tenk1 b (cost=0.00..431.50 rows=3334 width=4)
Optimizer: Pivotal Optimizer (GPORCA)
(19 rows)
--
-- In case of simple exists query, planner can generate alternative
-- subplans and choose one of them during execution based on the cost.
-- The below test check that we are generating alternative subplans,
-- we should see 2 subplans in the explain
--
EXPLAIN SELECT EXISTS(SELECT * FROM tenk1 WHERE tenk1.unique1 = tenk2.unique1) FROM tenk2 LIMIT 1;
QUERY PLAN
----------------------------------------------------------------------------------------------
Limit (cost=0.00..865.45 rows=1 width=1)
-> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..865.45 rows=1 width=1)
-> Limit (cost=0.00..865.45 rows=1 width=1)
-> Hash Left Join (cost=0.00..865.42 rows=3334 width=8)
Hash Cond: (tenk2.unique1 = tenk1.unique1)
-> Seq Scan on tenk2 (cost=0.00..431.51 rows=3334 width=4)
-> Hash (cost=431.96..431.96 rows=3334 width=12)
-> HashAggregate (cost=0.00..431.96 rows=3334 width=12)
Group Key: tenk1.unique1
-> Seq Scan on tenk1 (cost=0.00..431.51 rows=3334 width=4)
Optimizer: Pivotal Optimizer (GPORCA)
(11 rows)
SELECT EXISTS(SELECT * FROM tenk1 WHERE tenk1.unique1 = tenk2.unique1) FROM tenk2 LIMIT 1;
exists
--------
t
(1 row)
--
-- Ensure that NOT is not lost during subquery pull-up
--
SELECT 1 AS col1 WHERE NOT (SELECT 1 = 1);
col1
------
(0 rows)
--
-- Test sane behavior in case of semi join semantics
--
-- start_ignore
DROP TABLE IF EXISTS dedup_test1;
NOTICE: table "dedup_test1" does not exist, skipping
DROP TABLE IF EXISTS dedup_test2;
NOTICE: table "dedup_test2" does not exist, skipping
DROP TABLE IF EXISTS dedup_test3;
NOTICE: table "dedup_test3" does not exist, skipping
-- end_ignore
CREATE TABLE dedup_test1 ( a int, b int ) DISTRIBUTED BY (a);
CREATE TABLE dedup_test2 ( e int, f int ) DISTRIBUTED BY (e);
CREATE TABLE dedup_test3 ( a int, b int, c int) DISTRIBUTED BY (a) PARTITION BY RANGE(c) (START(1) END(2) EVERY(1));
INSERT INTO dedup_test1 select i, i from generate_series(1,4)i;
INSERT INTO dedup_test2 select i, i from generate_series(1,4)i;
INSERT INTO dedup_test3 select 1, 1, 1 from generate_series(1,10);
ANALYZE dedup_test1;
ANALYZE dedup_test2;
ANALYZE dedup_test3;
EXPLAIN SELECT * FROM dedup_test1 INNER JOIN dedup_test2 ON dedup_test1.a= dedup_test2.e WHERE (a) IN (SELECT a FROM dedup_test3);
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1293.00 rows=1 width=16)
-> Hash Join (cost=0.00..1293.00 rows=1 width=16)
Hash Cond: (dedup_test2.e = dedup_test1.a)
-> Seq Scan on dedup_test2 (cost=0.00..431.00 rows=2 width=8)
-> Hash (cost=862.00..862.00 rows=1 width=8)
-> Hash Join (cost=0.00..862.00 rows=1 width=8)
Hash Cond: (dedup_test1.a = dedup_test3.a)
-> Seq Scan on dedup_test1 (cost=0.00..431.00 rows=2 width=8)
-> Hash (cost=431.00..431.00 rows=1 width=4)
-> GroupAggregate (cost=0.00..431.00 rows=1 width=4)
Group Key: dedup_test3.a
-> Sort (cost=0.00..431.00 rows=1 width=4)
Sort Key: dedup_test3.a
-> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=4)
Hash Key: dedup_test3.a
-> GroupAggregate (cost=0.00..431.00 rows=1 width=4)
Group Key: dedup_test3.a
-> Sort (cost=0.00..431.00 rows=4 width=4)
Sort Key: dedup_test3.a
-> Redistribute Motion 3:3 (slice3; segments: 3) (cost=0.00..431.00 rows=4 width=4)
-> Dynamic Seq Scan on dedup_test3 (cost=0.00..431.00 rows=4 width=4)
Number of partitions to scan: 1
Optimizer: Pivotal Optimizer (GPORCA)
(23 rows)
SELECT * FROM dedup_test1 INNER JOIN dedup_test2 ON dedup_test1.a= dedup_test2.e WHERE (a) IN (SELECT a FROM dedup_test3);
a | b | e | f
---+---+---+---
1 | 1 | 1 | 1
(1 row)
-- Test planner to check if it optimizes the join and marks it as a dummy join
EXPLAIN SELECT * FROM dedup_test3, dedup_test1 WHERE c = 7 AND dedup_test3.b IN (SELECT b FROM dedup_test1);
QUERY PLAN
----------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..882688.08 rows=1 width=20)
-> Nested Loop (cost=0.00..882688.08 rows=1 width=20)
Join Filter: true
-> Result (cost=0.00..0.00 rows=0 width=12)
One-Time Filter: false
-> Seq Scan on dedup_test1 (cost=0.00..431.00 rows=2 width=8)
Optimizer: Pivotal Optimizer (GPORCA)
(7 rows)
EXPLAIN SELECT * FROM dedup_test3, dedup_test1 WHERE c = 7 AND dedup_test3.b IN (SELECT a FROM dedup_test1);
QUERY PLAN
----------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..882688.08 rows=1 width=20)
-> Nested Loop (cost=0.00..882688.08 rows=1 width=20)
Join Filter: true
-> Result (cost=0.00..0.00 rows=0 width=12)
One-Time Filter: false
-> Seq Scan on dedup_test1 (cost=0.00..431.00 rows=2 width=8)
Optimizer: Pivotal Optimizer (GPORCA)
(7 rows)
EXPLAIN SELECT * FROM dedup_test3, dedup_test1 WHERE c = 7 AND EXISTS (SELECT b FROM dedup_test1) AND dedup_test3.b IN (SELECT b FROM dedup_test1);
QUERY PLAN
----------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..882688.08 rows=1 width=20)
-> Nested Loop (cost=0.00..882688.08 rows=1 width=20)
Join Filter: true
-> Result (cost=0.00..0.00 rows=0 width=12)
One-Time Filter: false
-> Seq Scan on dedup_test1 (cost=0.00..431.00 rows=2 width=8)
Optimizer: Pivotal Optimizer (GPORCA)
(7 rows)
-- More dedup semi-join tests.
create table dedup_tab (a int4) distributed by(a) ;
insert into dedup_tab select g from generate_series(1,100) g;
analyze dedup_tab;
create table dedup_reptab (a int4) distributed replicated;
insert into dedup_reptab select generate_series(1,1);
analyze dedup_reptab;
-- Replicated table on the inner side of the join. The replicated table needs
-- be broadcast from a single node to the others, with a unique RowIdExpr
-- tacked on, because even though all the rows are available in all the
-- segments, you cannot distinguish join rows generated by the same "logical"
-- row otherwise.
explain (costs off, locus)
select * from dedup_reptab r where r.a in (select t.a/10 from dedup_tab t);
QUERY PLAN
------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Hash Join
Hash Cond: (((t.a / 10)) = r.a)
-> GroupAggregate
Group Key: ((t.a / 10))
-> Sort
Sort Key: ((t.a / 10))
-> Redistribute Motion 3:3 (slice2; segments: 3)
Hash Key: ((t.a / 10))
-> Seq Scan on dedup_tab t
-> Hash
-> Seq Scan on dedup_reptab r
Optimizer: Pivotal Optimizer (GPORCA)
(13 rows)
select * from dedup_reptab r where r.a in (select t.a/10 from dedup_tab t);
a
---
1
(1 row)
-- Try the same with a General-locus function. In GPDB 6 and below, this
-- generated a plan that did create the same logical row ID on each segment,
-- on the assumption that an immutable function generates the result rows
-- in the same order on all segments. We no longer assume that, and generate
-- the same plan with a broadcast as the case with a replicated table.
--
-- We have to create a custom function for this, instead of using
-- generate_series() directly, because the rows-estimate for generate_series()
-- is so high that we don't get the plan we want. (After PostgreSQL v12 we
-- could though, because the cost estimation of functions was improved.)
create function dedup_srf() RETURNS SETOF int AS $$
begin
return query select generate_series(1, 3);
end;
$$ LANGUAGE plpgsql IMMUTABLE ROWS 3;
create function dedup_srf_stable() RETURNS SETOF int AS $$
begin
return query select generate_series(1, 3);
end;
$$ LANGUAGE plpgsql STABLE ROWS 3;
create function dedup_srf_volatile() RETURNS SETOF int AS $$
begin
return query select generate_series(1, 3);
end;
$$ LANGUAGE plpgsql VOLATILE ROWS 3;
explain (costs off)
select * from dedup_srf() r(a) where r.a in (select t.a/10 from dedup_tab t);
QUERY PLAN
------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Hash Semi Join
Hash Cond: (dedup_srf.dedup_srf = ((t.a / 10)))
-> Result
-> Function Scan on dedup_srf
-> Hash
-> Redistribute Motion 3:3 (slice2; segments: 3)
Hash Key: ((t.a / 10))
-> Seq Scan on dedup_tab t
Optimizer: Pivotal Optimizer (GPORCA) version 3.94.0
(10 rows)
select * from dedup_srf() r(a) where r.a in (select t.a/10 from dedup_tab t);
a
---
1
2
3
(3 rows)
explain (costs off)
select * from dedup_srf_stable() r(a) where r.a in (select t.a/10 from dedup_tab t);
QUERY PLAN
-------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Hash Semi Join
Hash Cond: (dedup_srf_stable.dedup_srf_stable = ((t.a / 10)))
-> Redistribute Motion 1:3 (slice2)
Hash Key: dedup_srf_stable.dedup_srf_stable
-> Function Scan on dedup_srf_stable
-> Hash
-> Redistribute Motion 3:3 (slice3; segments: 3)
Hash Key: ((t.a / 10))
-> Seq Scan on dedup_tab t
Optimizer: Pivotal Optimizer (GPORCA) version 3.94.0
(11 rows)
select * from dedup_srf_stable() r(a) where r.a in (select t.a/10 from dedup_tab t);
a
---
1
2
3
(3 rows)
explain (costs off)
select * from dedup_srf_volatile() r(a) where r.a in (select t.a/10 from dedup_tab t);
QUERY PLAN
-----------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Hash Semi Join
Hash Cond: (dedup_srf_volatile.dedup_srf_volatile = ((t.a / 10)))
-> Redistribute Motion 1:3 (slice2)
Hash Key: dedup_srf_volatile.dedup_srf_volatile
-> Function Scan on dedup_srf_volatile
-> Hash
-> Redistribute Motion 3:3 (slice3; segments: 3)
Hash Key: ((t.a / 10))
-> Seq Scan on dedup_tab t
Optimizer: Pivotal Optimizer (GPORCA) version 3.94.0
(11 rows)
select * from dedup_srf_volatile() r(a) where r.a in (select t.a/10 from dedup_tab t);
a
---
1
2
3
(3 rows)
-- Also test it with non-SRFs. In principle, since the function returns exactly
-- one row, no deduplication would be needed in these cases. But the planner
-- doesn't recognize that currently, so you get the same kind of plan as with
-- set-returning functions.
create function dedup_func() RETURNS int AS $$
select 5;
$$ LANGUAGE SQL IMMUTABLE;
create function dedup_func_stable() RETURNS int AS $$
select 5;
$$ LANGUAGE SQL STABLE;
create function dedup_func_volatile() RETURNS int AS $$
select 5;
$$ LANGUAGE SQL VOLATILE;
explain (costs off)
select * from dedup_func() r(a) where r.a in (select t.a/10 from dedup_tab t);
QUERY PLAN
------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Hash Join
Hash Cond: (((a / 10)) = (5))
-> GroupAggregate
Group Key: ((a / 10))
-> Sort
Sort Key: ((a / 10))
-> Redistribute Motion 3:3 (slice2; segments: 3)
Hash Key: ((a / 10))
-> Seq Scan on dedup_tab t
-> Hash
-> Result
Optimizer: Pivotal Optimizer (GPORCA)
(13 rows)
select * from dedup_func() r(a) where r.a in (select t.a/10 from dedup_tab t);
a
---
5
(1 row)
explain (costs off)
select * from dedup_func_stable() r(a) where r.a in (select t.a/10 from dedup_tab t);
QUERY PLAN
------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Hash Join
Hash Cond: (((a / 10)) = (5))
-> GroupAggregate
Group Key: ((a / 10))
-> Sort
Sort Key: ((a / 10))
-> Redistribute Motion 3:3 (slice2; segments: 3)
Hash Key: ((a / 10))
-> Seq Scan on dedup_tab t
-> Hash
-> Result
Optimizer: Pivotal Optimizer (GPORCA)
(13 rows)
select * from dedup_func_stable() r(a) where r.a in (select t.a/10 from dedup_tab t);
a
---
5
(1 row)
explain (costs off)
select * from dedup_func_volatile() r(a) where r.a in (select t.a/10 from dedup_tab t);
QUERY PLAN
------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Hash Join
Hash Cond: (((a / 10)) = (5))
-> GroupAggregate
Group Key: ((a / 10))
-> Sort
Sort Key: ((a / 10))
-> Redistribute Motion 3:3 (slice2; segments: 3)
Hash Key: ((a / 10))
-> Seq Scan on dedup_tab t
-> Hash
-> Result
Optimizer: Pivotal Optimizer (GPORCA)
(13 rows)
select * from dedup_func_volatile() r(a) where r.a in (select t.a/10 from dedup_tab t);
a
---
5
(1 row)
--
-- Test init/main plan are not both parallel
--
create table init_main_plan_parallel (c1 int, c2 int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
-- case 1: init plan is parallel, main plan is not.
select relname from pg_class where exists(select * from init_main_plan_parallel);
relname
---------
(0 rows)
-- case2: init plan is not parallel, main plan is parallel
select * from init_main_plan_parallel where exists (select * from pg_class);
c1 | c2
----+----
(0 rows)
-- A subplan whose targetlist might be expanded to make sure all entries of its
-- hashExpr are in its targetlist, test the motion node above it also updated
-- its targetlist, otherwise, a wrong answer or a crash happens.
DROP TABLE IF EXISTS TEST_IN;
CREATE TABLE TEST_IN(
C01 FLOAT,
C02 NUMERIC(10,0)
) DISTRIBUTED RANDOMLY;
--insert repeatable records:
INSERT INTO TEST_IN
SELECT
ROUND(RANDOM()*1E1),ROUND(RANDOM()*1E1)
FROM GENERATE_SERIES(1,1E4::BIGINT) I;
ANALYZE TEST_IN;
SELECT COUNT(*) FROM
TEST_IN A
WHERE A.C01 IN(SELECT C02 FROM TEST_IN);
count
-------
10000
(1 row)
--
-- Variant of the test in upstream 'subselect' test, for PostgreSQL bug #14924
-- At one point, this produced wrong results on GPDB for different reasons than
-- the original bug: we forgot to handle the VALUES list in the function to
-- mutate a plan tree (plan_tree_mutator()).
--
create temp table onerowtmp as select 1;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named '?column?' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
select val.x
from generate_series(1,10) as s(i),
lateral (
values ((select s.i + 1 from onerowtmp)), (s.i + 101)
) as val(x)
where s.i < 10 and val.x < 110;
x
-----
2
102
3
103
4
104
5
105
6
106
7
107
8
108
9
109
10
(17 rows)
-- EXISTS sublink simplication
drop table if exists simplify_sub;
NOTICE: table "simplify_sub" does not exist, skipping
create table simplify_sub (i int) distributed by (i);
insert into simplify_sub values (1);
insert into simplify_sub values (2);
analyze simplify_sub;
-- limit n
explain (costs off)
select * from simplify_sub t1 where exists (select 1 from simplify_sub t2 where t1.i = t2.i limit 1);
QUERY PLAN
-------------------------------------------------------------------------------
Result
Filter: (SubPlan 1)
-> Gather Motion 3:1 (slice1; segments: 3)
-> Seq Scan on simplify_sub t1
Filter: (NOT (i IS NULL))
SubPlan 1
-> Result
-> Limit
-> Result
Filter: (t1.i = t2.i)
-> Materialize
-> Gather Motion 3:1 (slice2; segments: 3)
-> Seq Scan on simplify_sub t2
Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0
(14 rows)
select * from simplify_sub t1 where exists (select 1 from simplify_sub t2 where t1.i = t2.i limit 1);
i
---
2
1
(2 rows)
explain (costs off)
select * from simplify_sub t1 where not exists (select 1 from simplify_sub t2 where t1.i = t2.i limit 1);
QUERY PLAN
-------------------------------------------------------------------------------
Result
Filter: (SubPlan 1)
-> Gather Motion 3:1 (slice1; segments: 3)
-> Seq Scan on simplify_sub t1
SubPlan 1
-> Result
-> Limit
-> Result
Filter: (t1.i = t2.i)
-> Materialize
-> Gather Motion 3:1 (slice2; segments: 3)
-> Seq Scan on simplify_sub t2
Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0
(13 rows)
select * from simplify_sub t1 where not exists (select 1 from simplify_sub t2 where t1.i = t2.i limit 1);
i
---
(0 rows)
explain (costs off)
select * from simplify_sub t1 where exists (select 1 from simplify_sub t2 where t1.i = t2.i limit 0);
QUERY PLAN
------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Nested Loop Semi Join
Join Filter: true
-> Seq Scan on simplify_sub t1
Filter: (NOT (i IS NULL))
-> Limit
-> Result
One-Time Filter: false
Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0
(9 rows)
select * from simplify_sub t1 where exists (select 1 from simplify_sub t2 where t1.i = t2.i limit 0);
i
---
(0 rows)
explain (costs off)
select * from simplify_sub t1 where not exists (select 1 from simplify_sub t2 where t1.i = t2.i limit 0);
QUERY PLAN
------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Nested Loop Anti Join
Join Filter: true
-> Seq Scan on simplify_sub t1
-> Result
One-Time Filter: false
Optimizer: Pivotal Optimizer (GPORCA) version 3.55.0
(7 rows)
select * from simplify_sub t1 where not exists (select 1 from simplify_sub t2 where t1.i = t2.i limit 0);
i
---
1
2
(2 rows)
explain (costs off)
select * from simplify_sub t1 where exists (select 1 from simplify_sub t2 where t1.i = t2.i limit all);
QUERY PLAN
-----------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Hash Semi Join
Hash Cond: (t1.i = t2.i)
-> Seq Scan on simplify_sub t1
Filter: (NOT (i IS NULL))
-> Hash
-> Seq Scan on simplify_sub t2
Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0
(8 rows)
select * from simplify_sub t1 where exists (select 1 from simplify_sub t2 where t1.i = t2.i limit all);
i
---
2
1
(2 rows)
explain (costs off)
select * from simplify_sub t1 where not exists (select 1 from simplify_sub t2 where t1.i = t2.i limit all);
QUERY PLAN
-----------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Hash Anti Join
Hash Cond: (t1.i = t2.i)
-> Seq Scan on simplify_sub t1
-> Hash
-> Seq Scan on simplify_sub t2
Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0
(7 rows)
select * from simplify_sub t1 where not exists (select 1 from simplify_sub t2 where t1.i = t2.i limit all);
i
---
(0 rows)
explain (costs off)
select * from simplify_sub t1 where exists (select 1 from simplify_sub t2 where t1.i = t2.i limit NULL);
QUERY PLAN
-----------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Hash Semi Join
Hash Cond: (t1.i = t2.i)
-> Seq Scan on simplify_sub t1
Filter: (NOT (i IS NULL))
-> Hash
-> Seq Scan on simplify_sub t2
Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0
(8 rows)
select * from simplify_sub t1 where exists (select 1 from simplify_sub t2 where t1.i = t2.i limit NULL);
i
---
2
1
(2 rows)
explain (costs off)
select * from simplify_sub t1 where not exists (select 1 from simplify_sub t2 where t1.i = t2.i limit NULL);
QUERY PLAN
-----------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Hash Anti Join
Hash Cond: (t1.i = t2.i)
-> Seq Scan on simplify_sub t1
-> Hash
-> Seq Scan on simplify_sub t2
Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0
(7 rows)
select * from simplify_sub t1 where not exists (select 1 from simplify_sub t2 where t1.i = t2.i limit NULL);
i
---
(0 rows)
-- aggregates without GROUP BY or HAVING
explain (costs off)
select * from simplify_sub t1 where exists (select sum(t2.i) from simplify_sub t2 where t1.i = t2.i);
QUERY PLAN
------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Seq Scan on simplify_sub t1
Optimizer: Pivotal Optimizer (GPORCA) version 3.55.0
(3 rows)
select * from simplify_sub t1 where exists (select sum(t2.i) from simplify_sub t2 where t1.i = t2.i);
i
---
1
2
(2 rows)
explain (costs off)
select * from simplify_sub t1 where not exists (select sum(t2.i) from simplify_sub t2 where t1.i = t2.i);
QUERY PLAN
------------------------------------------------------
Result
One-Time Filter: false
Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0
(3 rows)
select * from simplify_sub t1 where not exists (select sum(t2.i) from simplify_sub t2 where t1.i = t2.i);
i
---
(0 rows)
explain (costs off)
select * from simplify_sub t1 where exists (select sum(t2.i) from simplify_sub t2 where t1.i = t2.i offset 0);
QUERY PLAN
------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Seq Scan on simplify_sub t1
Optimizer: Pivotal Optimizer (GPORCA) version 3.55.0
(3 rows)
select * from simplify_sub t1 where exists (select sum(t2.i) from simplify_sub t2 where t1.i = t2.i offset 0);
i
---
2
1
(2 rows)
explain (costs off)
select * from simplify_sub t1 where not exists (select sum(t2.i) from simplify_sub t2 where t1.i = t2.i offset 0);
QUERY PLAN
------------------------------------------------------
Result
One-Time Filter: false
Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0
(3 rows)
select * from simplify_sub t1 where not exists (select sum(t2.i) from simplify_sub t2 where t1.i = t2.i offset 0);
i
---
(0 rows)
explain (costs off)
select * from simplify_sub t1 where exists (select sum(t2.i) from simplify_sub t2 where t1.i = t2.i offset 1);
QUERY PLAN
-------------------------------------------------------------------------------
Result
Filter: (SubPlan 1)
-> Gather Motion 3:1 (slice1; segments: 3)
-> Seq Scan on simplify_sub t1
SubPlan 1
-> Limit
-> Aggregate
-> Result
Filter: (t1.i = t2.i)
-> Materialize
-> Gather Motion 3:1 (slice2; segments: 3)
-> Seq Scan on simplify_sub t2
Optimizer: Pivotal Optimizer (GPORCA) version 3.55.0
(13 rows)
select * from simplify_sub t1 where exists (select sum(t2.i) from simplify_sub t2 where t1.i = t2.i offset 1);
i
---
(0 rows)
explain (costs off)
select * from simplify_sub t1 where not exists (select sum(t2.i) from simplify_sub t2 where t1.i = t2.i offset 1);
QUERY PLAN
-------------------------------------------------------------------------------
Result
Filter: (SubPlan 1)
-> Gather Motion 3:1 (slice1; segments: 3)
-> Seq Scan on simplify_sub t1
SubPlan 1
-> Limit
-> Aggregate
-> Result
Filter: (t1.i = t2.i)
-> Materialize
-> Gather Motion 3:1 (slice2; segments: 3)
-> Seq Scan on simplify_sub t2
Optimizer: Pivotal Optimizer (GPORCA) version 3.55.0
(13 rows)
select * from simplify_sub t1 where not exists (select sum(t2.i) from simplify_sub t2 where t1.i = t2.i offset 1);
i
---
2
1
(2 rows)
explain (costs off)
select * from simplify_sub t1 where exists (select sum(t2.i) from simplify_sub t2 where t1.i = t2.i offset NULL);
QUERY PLAN
-------------------------------------------------------------------------------
Result
Filter: (SubPlan 1)
-> Gather Motion 3:1 (slice1; segments: 3)
-> Seq Scan on simplify_sub t1
SubPlan 1
-> Limit
-> Aggregate
-> Result
Filter: (t1.i = t2.i)
-> Materialize
-> Gather Motion 3:1 (slice2; segments: 3)
-> Seq Scan on simplify_sub t2
Optimizer: Pivotal Optimizer (GPORCA) version 3.55.0
(13 rows)
select * from simplify_sub t1 where exists (select sum(t2.i) from simplify_sub t2 where t1.i = t2.i offset NULL);
i
---
2
1
(2 rows)
explain (costs off)
select * from simplify_sub t1 where not exists (select sum(t2.i) from simplify_sub t2 where t1.i = t2.i offset NULL);
QUERY PLAN
-------------------------------------------------------------------------------
Result
Filter: (SubPlan 1)
-> Gather Motion 3:1 (slice1; segments: 3)
-> Seq Scan on simplify_sub t1
SubPlan 1
-> Limit
-> Aggregate
-> Result
Filter: (t1.i = t2.i)
-> Materialize
-> Gather Motion 3:1 (slice2; segments: 3)
-> Seq Scan on simplify_sub t2
Optimizer: Pivotal Optimizer (GPORCA) version 3.55.0
(13 rows)
select * from simplify_sub t1 where not exists (select sum(t2.i) from simplify_sub t2 where t1.i = t2.i offset NULL);
i
---
(0 rows)
drop table if exists simplify_sub;
--
-- Test a couple of cases where a SubPlan is used in a Motion's hash key.
--
create table foo (i int4, j int4) distributed by (i);
create table bar (i int4, j int4) distributed by (i);
create table baz (i int4, j int4) distributed by (i);
insert into foo select g, g from generate_series(1, 10) g;
insert into bar values (1, 1);
insert into baz select g, g from generate_series(5, 100) g;
analyze foo;
analyze bar;
analyze baz;
explain (verbose, costs off)
select * from foo left outer join baz on (select bar.i from bar where bar.i = foo.i) + 1 = baz.j;
QUERY PLAN
-----------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
Output: foo.i, foo.j, baz.i, baz.j
-> Hash Right Join
Output: foo.i, foo.j, baz.i, baz.j
Hash Cond: (baz.j = (((SubPlan 1)) + 1))
-> Redistribute Motion 3:3 (slice2; segments: 3)
Output: baz.i, baz.j
Hash Key: baz.j
-> Seq Scan on subselect_gp.baz
Output: baz.i, baz.j
-> Hash
Output: foo.i, foo.j, ((SubPlan 1))
-> Redistribute Motion 3:3 (slice3; segments: 3)
Output: foo.i, foo.j, ((SubPlan 1))
Hash Key: (((SubPlan 1)) + 1)
-> Seq Scan on subselect_gp.foo
Output: foo.i, foo.j, (SubPlan 1)
SubPlan 1
-> Result
Output: bar.i
Filter: (bar.i = foo.i)
-> Materialize
Output: bar.i
-> Broadcast Motion 3:3 (slice4; segments: 3)
Output: bar.i
-> Seq Scan on subselect_gp.bar
Output: bar.i
Optimizer: Postgres query optimizer
Settings: optimizer=on
(29 rows)
select * from foo left outer join baz on (select bar.i from bar where bar.i = foo.i) + 1 = baz.j;
i | j | i | j
----+----+---+---
10 | 10 | |
9 | 9 | |
6 | 6 | |
5 | 5 | |
8 | 8 | |
7 | 7 | |
4 | 4 | |
3 | 3 | |
2 | 2 | |
1 | 1 | |
(10 rows)
-- This is a variant of a query in the upstream 'subselect' test, with the
-- twist that baz.i is the distribution key for the table. In the plan, the
-- CASE WHEN construct with SubPlan is used as Hash Key in the Redistribute
-- Motion. It is a planned as a hashed SubPlan. (We had a bug at one point,
-- where the hashed SubPlan was added to the target list twice, which
-- caused an error at runtime when the executor tried to build the hash
-- table twice, because the Motion in the SubPlan couldn't be rescanned.)
explain (verbose, costs off)
select * from foo where
(case when foo.i in (select a.i from baz a) then foo.i else null end) in
(select b.i from baz b);
QUERY PLAN
--------------------------------------------------------------
Result
Output: foo.i, foo.j
Filter: (SubPlan 2)
-> Gather Motion 3:1 (slice1; segments: 3)
Output: foo.i, foo.j
-> Seq Scan on subselect_gp.foo
Output: foo.i, foo.j
SubPlan 1
-> Result
Output: a.i
-> Materialize
Output: a.i
-> Gather Motion 3:1 (slice2; segments: 3)
Output: a.i
-> Seq Scan on subselect_gp.baz a
Output: a.i
SubPlan 2
-> Materialize
Output: b.i
-> Gather Motion 3:1 (slice3; segments: 3)
Output: b.i
-> Seq Scan on subselect_gp.baz b
Output: b.i
Optimizer: Pivotal Optimizer (GPORCA) version 3.93.0
Settings: optimizer=on
(25 rows)
select * from foo where
(case when foo.i in (select a.i from baz a) then foo.i else null end) in
(select b.i from baz b);
i | j
----+----
6 | 6
7 | 7
10 | 10
5 | 5
8 | 8
9 | 9
(6 rows)
-- When creating plan with subquery and CTE, it sets the useless flow for the plan.
-- But we only need flow for the topmost plan and child of the motion. See commit
-- https://github.com/greenplum-db/gpdb/commit/93abe741cd67f04958e2951edff02b45ab6e280f for detail
-- The extra flow will cause subplan set wrong motionType and cause an ERROR
-- unexpected gang size: XX
-- This related to issue: https://github.com/greenplum-db/gpdb/issues/12371
create table extra_flow_dist(a int, b int, c date);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table extra_flow_dist1(a int, b int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into extra_flow_dist select i, i, '1949-10-01'::date from generate_series(1, 10)i;
insert into extra_flow_dist1 select i, i from generate_series(20, 22)i;
-- case 1 subplan with outer general locus (CTE and subquery)
explain (verbose, costs off) with run_dt as (
select
(
select c from extra_flow_dist where b = x -- subplan's outer is the below general locus path
) dt
from (select ( max(1) ) x) a -- general locus
)
select * from run_dt, extra_flow_dist1
where dt < '2010-01-01'::date;
QUERY PLAN
--------------------------------------------------------------------------------------
Nested Loop
Output: ((SubPlan 1)), extra_flow_dist1.a, extra_flow_dist1.b
Join Filter: true
-> Result
Output: ((SubPlan 1))
Filter: (((SubPlan 1)) < '01-01-2010'::date)
-> Aggregate
Output: (SubPlan 1)
-> Result
Output: true
SubPlan 1
-> Result
Output: extra_flow_dist.c
Filter: (extra_flow_dist.b = max(1))
-> Materialize
Output: extra_flow_dist.b, extra_flow_dist.c
-> Gather Motion 3:1 (slice2; segments: 3)
Output: extra_flow_dist.b, extra_flow_dist.c
-> Seq Scan on subselect_gp.extra_flow_dist
Output: extra_flow_dist.b, extra_flow_dist.c
-> Materialize
Output: extra_flow_dist1.a, extra_flow_dist1.b
-> Gather Motion 3:1 (slice1; segments: 3)
Output: extra_flow_dist1.a, extra_flow_dist1.b
-> Seq Scan on subselect_gp.extra_flow_dist1
Output: extra_flow_dist1.a, extra_flow_dist1.b
Optimizer: Pivotal Optimizer (GPORCA)
(27 rows)
with run_dt as (
select
(
select c from extra_flow_dist where b = x
) dt
from (select ( max(1) ) x) a
)
select * from run_dt, extra_flow_dist1
where dt < '2010-01-01'::date;
dt | a | b
------------+----+----
10-01-1949 | 22 | 22
10-01-1949 | 20 | 20
10-01-1949 | 21 | 21
(3 rows)
create table extra_flow_rand(a int) distributed replicated;
insert into extra_flow_rand values (1);
-- case 2 for subplan with outer segment general locus (CTE and subquery)
explain (verbose, costs off) with run_dt as (
select
(
select c from extra_flow_dist where b = x -- subplan's outer is the below segment general locus path
) dt
from (select a x from extra_flow_rand) a -- segment general locus
)
select * from run_dt, extra_flow_dist1
where dt < '2010-01-01'::date;
QUERY PLAN
--------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
Output: ((SubPlan 1)), extra_flow_dist1.a, extra_flow_dist1.b
-> Nested Loop
Output: ((SubPlan 1)), extra_flow_dist1.a, extra_flow_dist1.b
Join Filter: true
-> Seq Scan on subselect_gp.extra_flow_dist1
Output: extra_flow_dist1.a, extra_flow_dist1.b
-> Materialize
Output: ((SubPlan 1))
-> Result
Output: ((SubPlan 1))
Filter: (((SubPlan 1)) < '01-01-2010'::date)
-> Seq Scan on subselect_gp.extra_flow_rand
Output: (SubPlan 1)
SubPlan 1
-> Result
Output: extra_flow_dist.c
Filter: (extra_flow_dist.b = extra_flow_rand.a)
-> Materialize
Output: extra_flow_dist.b, extra_flow_dist.c
-> Broadcast Motion 3:3 (slice2; segments: 3)
Output: extra_flow_dist.b, extra_flow_dist.c
-> Seq Scan on subselect_gp.extra_flow_dist
Output: extra_flow_dist.b, extra_flow_dist.c
Optimizer: Pivotal Optimizer (GPORCA)
(23 rows)
with run_dt as (
select
(
select c from extra_flow_dist where b = x
) dt
from (select a x from extra_flow_rand) a
)
select * from run_dt, extra_flow_dist1
where dt < '2010-01-01'::date;
dt | a | b
------------+----+----
10-01-1949 | 22 | 22
10-01-1949 | 21 | 21
10-01-1949 | 20 | 20
(3 rows)
-- case 3 for subplan with outer entry locus (CTE and subquery)
explain (verbose, costs off) with run_dt as (
select
(
select c from extra_flow_dist where b = x -- subplan's outer is the below entry locus path
) dt
from (select 1 x from pg_class limit 1) a -- entry locus
)
select * from run_dt, extra_flow_dist1
where dt < '2010-01-01'::date;
QUERY PLAN
--------------------------------------------------------------------------------------
Nested Loop
Output: ((SubPlan 1)), extra_flow_dist1.a, extra_flow_dist1.b
Join Filter: true
-> Result
Output: ((SubPlan 1))
Filter: (((SubPlan 1)) < '01-01-2010'::date)
-> Result
Output: (SubPlan 1)
-> Limit
Output: (1)
-> Seq Scan on pg_catalog.pg_class
Output: 1
SubPlan 1
-> Result
Output: extra_flow_dist.c
Filter: (extra_flow_dist.b = (1))
-> Materialize
Output: extra_flow_dist.b, extra_flow_dist.c
-> Gather Motion 3:1 (slice2; segments: 3)
Output: extra_flow_dist.b, extra_flow_dist.c
-> Seq Scan on subselect_gp.extra_flow_dist
Output: extra_flow_dist.b, extra_flow_dist.c
Filter: (extra_flow_dist.b = 1)
-> Materialize
Output: extra_flow_dist1.a, extra_flow_dist1.b
-> Gather Motion 3:1 (slice1; segments: 3)
Output: extra_flow_dist1.a, extra_flow_dist1.b
-> Seq Scan on subselect_gp.extra_flow_dist1
Output: extra_flow_dist1.a, extra_flow_dist1.b
Optimizer: Pivotal Optimizer (GPORCA)
(30 rows)
with run_dt as (
select
(
select c from extra_flow_dist where b = x -- subplan's outer is the below entry locus path
) dt
from (select 1 x from pg_class limit 1) a -- entry locus
)
select * from run_dt, extra_flow_dist1
where dt < '2010-01-01'::date;
dt | a | b
------------+----+----
10-01-1949 | 22 | 22
10-01-1949 | 20 | 20
10-01-1949 | 21 | 21
(3 rows)
-- case 4 subplan with outer segment general locus without param in subplan (CTE and subquery)
explain (verbose, costs off) with run_dt as (
select x, y dt
from (select a x from extra_flow_rand ) a -- segment general locus
left join (select max(1) y) aaa
on a.x > any (select random() from extra_flow_dist) -- subplan's outer is the above segment general locus path
)
select * from run_dt, extra_flow_dist1
where dt < extra_flow_dist1.a;
QUERY PLAN
-------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
Output: extra_flow_rand.a, (max(1)), extra_flow_dist1.a, extra_flow_dist1.b
-> Nested Loop
Output: extra_flow_rand.a, (max(1)), extra_flow_dist1.a, extra_flow_dist1.b
Join Filter: ((max(1)) < extra_flow_dist1.a)
-> Nested Loop Left Join
Output: extra_flow_rand.a, (max(1))
Inner Unique: true
Join Filter: ((SubPlan 1))
-> Seq Scan on subselect_gp.extra_flow_rand
Output: extra_flow_rand.a, (SubPlan 1)
SubPlan 1
-> Materialize
Output: (random())
-> Broadcast Motion 3:3 (slice2; segments: 3)
Output: (random())
-> Seq Scan on subselect_gp.extra_flow_dist
Output: random()
-> Materialize
Output: (max(1))
-> Aggregate
Output: max(1)
-> Result
-> Materialize
Output: extra_flow_dist1.a, extra_flow_dist1.b
-> Seq Scan on subselect_gp.extra_flow_dist1
Output: extra_flow_dist1.a, extra_flow_dist1.b
Optimizer: Postgres query optimizer
(28 rows)
-- case 5 for subplan with outer entry locus without param in subplan (CTE and subquery)
explain (verbose, costs off) with run_dt as (
select x, y dt
from (select relnatts x from pg_class ) a -- entry locus
left join (select max(1) y) aaa
on a.x > any (select random() from extra_flow_dist) -- subplan's outer is the above entry loucs
)
select * from run_dt, extra_flow_dist1
where dt < extra_flow_dist1.a;
QUERY PLAN
--------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
Output: pg_class.relnatts, (max(1)), extra_flow_dist1.a, extra_flow_dist1.b
-> Nested Loop
Output: pg_class.relnatts, (max(1)), extra_flow_dist1.a, extra_flow_dist1.b
Join Filter: ((max(1)) < extra_flow_dist1.a)
-> Broadcast Motion 1:3 (slice2)
Output: pg_class.relnatts, (max(1))
-> Nested Loop Left Join
Output: pg_class.relnatts, (max(1))
Inner Unique: true
Join Filter: ((SubPlan 1))
-> Seq Scan on pg_catalog.pg_class
Output: pg_class.relnatts, (SubPlan 1)
SubPlan 1
-> Materialize
Output: (random())
-> Gather Motion 3:1 (slice3; segments: 3)
Output: (random())
-> Seq Scan on subselect_gp.extra_flow_dist
Output: random()
-> Materialize
Output: (max(1))
-> Aggregate
Output: max(1)
-> Result
-> Materialize
Output: extra_flow_dist1.a, extra_flow_dist1.b
-> Seq Scan on subselect_gp.extra_flow_dist1
Output: extra_flow_dist1.a, extra_flow_dist1.b
Optimizer: Postgres query optimizer
(30 rows)
-- case 6 without CTE, nested subquery should not add extral flow
explain (verbose, costs off) select * from (
select dt from (
select
(
select c from extra_flow_dist where b = x -- subplan's outer is the below general locus path
) dt
from (select ( max(1) ) x) a -- general locus
union
select
(
select c from extra_flow_dist where b = x -- subplan's outer is the below general locus path
) dt
from (select ( max(1) ) x) aa -- general locus
) tbl
) run_dt,
extra_flow_dist1
where dt < '2010-01-01'::date;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
Output: ((SubPlan 1)), extra_flow_dist1.a, extra_flow_dist1.b
-> Nested Loop
Output: ((SubPlan 1)), extra_flow_dist1.a, extra_flow_dist1.b
Join Filter: true
-> Broadcast Motion 3:3 (slice7; segments: 3)
Output: extra_flow_dist1.a, extra_flow_dist1.b
-> Seq Scan on subselect_gp.extra_flow_dist1
Output: extra_flow_dist1.a, extra_flow_dist1.b
-> GroupAggregate
Output: ((SubPlan 1))
Group Key: ((SubPlan 1))
-> Sort
Output: ((SubPlan 1))
Sort Key: ((SubPlan 1))
-> Redistribute Motion 3:3 (slice2; segments: 3)
Output: ((SubPlan 1))
Hash Key: ((SubPlan 1))
-> Append
-> Result
Output: ((SubPlan 1))
Filter: (((SubPlan 1)) < '01-01-2010'::date)
-> Redistribute Motion 1:3 (slice3)
Output: ((SubPlan 1))
-> Aggregate
Output: (SubPlan 1)
-> Result
Output: true
SubPlan 1
-> Result
Output: extra_flow_dist.c
Filter: (extra_flow_dist.b = max(1))
-> Materialize
Output: extra_flow_dist.b, extra_flow_dist.c
-> Gather Motion 3:1 (slice4; segments: 3)
Output: extra_flow_dist.b, extra_flow_dist.c
-> Seq Scan on subselect_gp.extra_flow_dist
Output: extra_flow_dist.b, extra_flow_dist.c
-> Result
Output: ((SubPlan 2))
Filter: (((SubPlan 2)) < '01-01-2010'::date)
-> Redistribute Motion 1:3 (slice5)
Output: ((SubPlan 2))
-> Aggregate
Output: (SubPlan 2)
-> Result
Output: true
SubPlan 2
-> Result
Output: extra_flow_dist_1.c
Filter: (extra_flow_dist_1.b = max(1))
-> Materialize
Output: extra_flow_dist_1.b, extra_flow_dist_1.c
-> Gather Motion 3:1 (slice6; segments: 3)
Output: extra_flow_dist_1.b, extra_flow_dist_1.c
-> Seq Scan on subselect_gp.extra_flow_dist extra_flow_dist_1
Output: extra_flow_dist_1.b, extra_flow_dist_1.c
Settings: enable_parallel = 'off', optimizer = 'on'
Optimizer: GPORCA
(59 rows)
-- Check DISTINCT ON clause and ORDER BY clause in SubLink, See https://github.com/greenplum-db/gpdb/issues/12656.
-- For EXISTS SubLink, we don’t need to care about the data deduplication problem, we can delete DISTINCT ON clause and
-- ORDER BY clause with confidence, because we only care about whether the data exists.
-- But for ANY SubLink, wo can't do this, because we not only care about the existence of data, but also the content of
-- the data.
create table issue_12656 (
i int,
j int
) distributed by (i);
insert into issue_12656 values (1, 10001), (1, 10002);
-- case 1, check basic DISTINCT ON
explain (costs off, verbose)
select * from issue_12656 where (i, j) in (select distinct on (i) i, j from issue_12656);
QUERY PLAN
----------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
Output: issue_12656.i, issue_12656.j
-> Hash Join
Output: issue_12656.i, issue_12656.j
Inner Unique: true
Hash Cond: ((issue_12656.i = issue_12656_1.i) AND (issue_12656.j = issue_12656_1.j))
-> Seq Scan on subselect_gp.issue_12656
Output: issue_12656.i, issue_12656.j
-> Hash
Output: issue_12656_1.i, issue_12656_1.j
-> Unique
Output: issue_12656_1.i, issue_12656_1.j
Group Key: issue_12656_1.i
-> Sort
Output: issue_12656_1.i, issue_12656_1.j
Sort Key: issue_12656_1.i
-> Seq Scan on subselect_gp.issue_12656 issue_12656_1
Output: issue_12656_1.i, issue_12656_1.j
Optimizer: Postgres query optimizer
Settings: optimizer=on
(20 rows)
select * from issue_12656 where (i, j) in (select distinct on (i) i, j from issue_12656);
i | j
---+-------
1 | 10001
(1 row)
-- case 2, check DISTINCT ON and ORDER BY
explain (costs off, verbose)
select * from issue_12656 where (i, j) in (select distinct on (i) i, j from issue_12656 order by i, j asc);
QUERY PLAN
----------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
Output: issue_12656.i, issue_12656.j
-> Hash Join
Output: issue_12656.i, issue_12656.j
Inner Unique: true
Hash Cond: ((issue_12656.i = issue_12656_1.i) AND (issue_12656.j = issue_12656_1.j))
-> Seq Scan on subselect_gp.issue_12656
Output: issue_12656.i, issue_12656.j
-> Hash
Output: issue_12656_1.i, issue_12656_1.j
-> Unique
Output: issue_12656_1.i, issue_12656_1.j
Group Key: issue_12656_1.i
-> Sort
Output: issue_12656_1.i, issue_12656_1.j
Sort Key: issue_12656_1.i, issue_12656_1.j
-> Seq Scan on subselect_gp.issue_12656 issue_12656_1
Output: issue_12656_1.i, issue_12656_1.j
Optimizer: Postgres query optimizer
Settings: optimizer=on
(20 rows)
select * from issue_12656 where (i, j) in (select distinct on (i) i, j from issue_12656 order by i, j asc);
i | j
---+-------
1 | 10001
(1 row)
explain (costs off, verbose)
select * from issue_12656 where (i, j) in (select distinct on (i) i, j from issue_12656 order by i, j desc);
QUERY PLAN
----------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
Output: issue_12656.i, issue_12656.j
-> Hash Join
Output: issue_12656.i, issue_12656.j
Inner Unique: true
Hash Cond: ((issue_12656.i = issue_12656_1.i) AND (issue_12656.j = issue_12656_1.j))
-> Seq Scan on subselect_gp.issue_12656
Output: issue_12656.i, issue_12656.j
-> Hash
Output: issue_12656_1.i, issue_12656_1.j
-> Unique
Output: issue_12656_1.i, issue_12656_1.j
Group Key: issue_12656_1.i
-> Sort
Output: issue_12656_1.i, issue_12656_1.j
Sort Key: issue_12656_1.i, issue_12656_1.j DESC
-> Seq Scan on subselect_gp.issue_12656 issue_12656_1
Output: issue_12656_1.i, issue_12656_1.j
Optimizer: Postgres query optimizer
Settings: optimizer=on
(20 rows)
select * from issue_12656 where (i, j) in (select distinct on (i) i, j from issue_12656 order by i, j desc);
i | j
---+-------
1 | 10002
(1 row)
-- case 3, check correlated DISTINCT ON
explain select * from issue_12656 a where (i, j) in
(select distinct on (i) i, j from issue_12656 b where a.i=b.i order by i, j asc);
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..895.00 rows=43050 width=8)
-> Seq Scan on issue_12656 a (cost=0.00..321.00 rows=14350 width=8)
Filter: (SubPlan 1)
SubPlan 1
-> Unique (cost=9818.00..10033.25 rows=1000 width=8)
-> Sort (cost=9818.00..10033.25 rows=86100 width=8)
Sort Key: b.j
-> Result (cost=0.00..2760.50 rows=86100 width=8)
Filter: (a.i = b.i)
-> Materialize (cost=0.00..1899.50 rows=86100 width=8)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..1469.00 rows=86100 width=8)
-> Seq Scan on issue_12656 b (cost=0.00..321.00 rows=28700 width=8)
Optimizer: Postgres query optimizer
(13 rows)
select * from issue_12656 a where (i, j) in
(select distinct on (i) i, j from issue_12656 b where a.i=b.i order by i, j asc);
i | j
---+-------
1 | 10001
(1 row)
---
--- Test param info is preserved when bringing a path to OuterQuery locus
---
drop table if exists param_t;
NOTICE: table "param_t" does not exist, skipping
create table param_t (i int, j int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into param_t select i, i from generate_series(1,10)i;
analyze param_t;
explain (costs off)
select * from param_t a where a.i in
(select count(b.j) from param_t b, param_t c,
lateral (select * from param_t d where d.j = c.j limit 10) s
where s.i = a.i
);
QUERY PLAN
-----------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Seq Scan on param_t a
Filter: (SubPlan 1)
SubPlan 1
-> Aggregate
-> Nested Loop
-> Nested Loop
-> Materialize
-> Broadcast Motion 3:3 (slice2; segments: 3)
-> Seq Scan on param_t c
-> Materialize
-> Result
Filter: (d.i = a.i)
-> Limit
-> Result
Filter: (d.j = c.j)
-> Materialize
-> Broadcast Motion 3:3 (slice3; segments: 3)
-> Seq Scan on param_t d
-> Materialize
-> Broadcast Motion 3:3 (slice4; segments: 3)
-> Seq Scan on param_t b
Optimizer: Postgres query optimizer
(23 rows)
select * from param_t a where a.i in
(select count(b.j) from param_t b, param_t c,
lateral (select * from param_t d where d.j = c.j limit 10) s
where s.i = a.i
);
i | j
----+----
10 | 10
(1 row)
drop table if exists param_t;
-- A guard test case for gpexpand's populate SQL
-- Some simple notes and background is: we want to compute
-- table size efficiently, it is better to avoid invoke
-- pg_relation_size() in serial on QD, since this function
-- will dispatch for each tuple. The bad pattern SQL is like
-- select pg_relation_size(oid) from pg_class where xxx
-- The idea is force pg_relations_size is evaluated on each
-- segment and the sum the result together to get the final
-- result. To make sure correctness, we have to evaluate
-- pg_relation_size before any motion. The skill here is
-- to wrap this in a subquery, due to volatile of pg_relation_size,
-- this subquery won't be pulled up. Plus the skill of
-- gp_dist_random('pg_class') we can achieve this goal.
-- the below test is to verify the plan, we should see pg_relation_size
-- is evaludated on each segment and then motion then sum together. The
-- SQL pattern is a catalog join a table size "dict".
set gp_enable_multiphase_agg = on;
-- force nestloop join to make test stable since we
-- are testing plan and do not care about where we
-- put hash table.
set enable_hashjoin = off;
set enable_nestloop = on;
set enable_indexscan = off;
set enable_bitmapscan = off;
explain (verbose on, costs off)
with cte(table_oid, size) as
(
select
table_oid,
sum(size) size
from (
select oid,
pg_relation_size(oid)
from gp_dist_random('pg_class')
) x(table_oid, size)
group by table_oid
)
select pc.relname, ts.size
from pg_class pc, cte ts
where pc.oid = ts.table_oid;
QUERY PLAN
----------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
Output: pc.relname, (sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text))))
-> Nested Loop
Output: pc.relname, (sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text))))
Join Filter: (pc.oid = pg_class.oid)
-> Redistribute Motion 1:3 (slice2)
Output: pc.relname, pc.oid
Hash Key: pc.oid
-> Seq Scan on pg_catalog.pg_class pc
Output: pc.relname, pc.oid
-> Materialize
Output: pg_class.oid, (sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text))))
-> HashAggregate
Output: pg_class.oid, sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text)))
Group Key: pg_class.oid
-> Redistribute Motion 3:3 (slice3; segments: 3)
Output: pg_class.oid, (pg_relation_size((pg_class.oid)::regclass, 'main'::text))
Hash Key: pg_class.oid
-> Seq Scan on pg_catalog.pg_class
Output: pg_class.oid, pg_relation_size((pg_class.oid)::regclass, 'main'::text)
Optimizer: Postgres query optimizer
(21 rows)
set gp_enable_multiphase_agg = off;
explain (verbose on, costs off)
with cte(table_oid, size) as
(
select
table_oid,
sum(size) size
from (
select oid,
pg_relation_size(oid)
from gp_dist_random('pg_class')
) x(table_oid, size)
group by table_oid
)
select pc.relname, ts.size
from pg_class pc, cte ts
where pc.oid = ts.table_oid;
QUERY PLAN
----------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
Output: pc.relname, (sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text))))
-> Nested Loop
Output: pc.relname, (sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text))))
Join Filter: (pc.oid = pg_class.oid)
-> Redistribute Motion 1:3 (slice2)
Output: pc.relname, pc.oid
Hash Key: pc.oid
-> Seq Scan on pg_catalog.pg_class pc
Output: pc.relname, pc.oid
-> Materialize
Output: pg_class.oid, (sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text))))
-> HashAggregate
Output: pg_class.oid, sum((pg_relation_size((pg_class.oid)::regclass, 'main'::text)))
Group Key: pg_class.oid
-> Redistribute Motion 3:3 (slice3; segments: 3)
Output: pg_class.oid, (pg_relation_size((pg_class.oid)::regclass, 'main'::text))
Hash Key: pg_class.oid
-> Seq Scan on pg_catalog.pg_class
Output: pg_class.oid, pg_relation_size((pg_class.oid)::regclass, 'main'::text)
Optimizer: Postgres query optimizer
(21 rows)
reset gp_enable_multiphase_agg;
reset enable_hashjoin;
reset enable_nestloop;
reset enable_indexscan;
reset enable_bitmapscan;
create table sublink_outer_table(a int, b int) distributed by(b);
create table sublink_inner_table(x int, y bigint) distributed by(y);
set optimizer to off;
explain select t.* from sublink_outer_table t join (select y ,10*avg(x) s from sublink_inner_table group by y) RR on RR.y = t.b and t.a > rr.s;
QUERY PLAN
--------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=436.00..1310.77 rows=28700 width=8)
-> Hash Join (cost=436.00..928.11 rows=9567 width=8)
Hash Cond: (t.b = sublink_inner_table.y)
Join Filter: ((t.a)::numeric > (('10'::numeric * avg(sublink_inner_table.x))))
-> Seq Scan on sublink_outer_table t (cost=0.00..321.00 rows=28700 width=8)
-> Hash (cost=431.83..431.83 rows=333 width=40)
-> HashAggregate (cost=423.50..428.50 rows=333 width=40)
Group Key: sublink_inner_table.y
-> Seq Scan on sublink_inner_table (cost=0.00..293.67 rows=25967 width=12)
Optimizer: Postgres query optimizer
(10 rows)
explain select * from sublink_outer_table T where a > (select 10*avg(x) from sublink_inner_table R where T.b=R.y);
QUERY PLAN
----------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=436.00..1310.77 rows=28700 width=8)
-> Hash Join (cost=436.00..928.11 rows=9567 width=8)
Hash Cond: (t.b = "Expr_SUBQUERY".csq_c0)
Join Filter: ((t.a)::numeric > "Expr_SUBQUERY".csq_c1)
-> Seq Scan on sublink_outer_table t (cost=0.00..321.00 rows=28700 width=8)
-> Hash (cost=431.83..431.83 rows=333 width=40)
-> Subquery Scan on "Expr_SUBQUERY" (cost=423.50..431.83 rows=333 width=40)
-> HashAggregate (cost=423.50..428.50 rows=333 width=40)
Group Key: r.y
-> Seq Scan on sublink_inner_table r (cost=0.00..293.67 rows=25967 width=12)
Optimizer: Postgres query optimizer
(11 rows)
set enable_hashagg to off;
explain select t.* from sublink_outer_table t join (select y ,10*avg(x) s from sublink_inner_table group by y) RR on RR.y = t.b and t.a > rr.s;
QUERY PLAN
--------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=2404.84..3279.62 rows=28700 width=8)
-> Hash Join (cost=2404.84..2896.95 rows=9567 width=8)
Hash Cond: (t.b = sublink_inner_table.y)
Join Filter: ((t.a)::numeric > (('10'::numeric * avg(sublink_inner_table.x))))
-> Seq Scan on sublink_outer_table t (cost=0.00..321.00 rows=28700 width=8)
-> Hash (cost=2400.67..2400.67 rows=333 width=40)
-> GroupAggregate (cost=2197.59..2397.34 rows=333 width=40)
Group Key: sublink_inner_table.y
-> Sort (cost=2197.59..2262.51 rows=25967 width=12)
Sort Key: sublink_inner_table.y
-> Seq Scan on sublink_inner_table (cost=0.00..293.67 rows=25967 width=12)
Optimizer: Postgres query optimizer
(12 rows)
explain select * from sublink_outer_table T where a > (select 10*avg(x) from sublink_inner_table R where T.b=R.y);
QUERY PLAN
----------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=2404.84..3279.62 rows=28700 width=8)
-> Hash Join (cost=2404.84..2896.95 rows=9567 width=8)
Hash Cond: (t.b = "Expr_SUBQUERY".csq_c0)
Join Filter: ((t.a)::numeric > "Expr_SUBQUERY".csq_c1)
-> Seq Scan on sublink_outer_table t (cost=0.00..321.00 rows=28700 width=8)
-> Hash (cost=2400.67..2400.67 rows=333 width=40)
-> Subquery Scan on "Expr_SUBQUERY" (cost=2197.59..2400.67 rows=333 width=40)
-> GroupAggregate (cost=2197.59..2397.34 rows=333 width=40)
Group Key: r.y
-> Sort (cost=2197.59..2262.51 rows=25967 width=12)
Sort Key: r.y
-> Seq Scan on sublink_inner_table r (cost=0.00..293.67 rows=25967 width=12)
Optimizer: Postgres query optimizer
(13 rows)
drop table sublink_outer_table;
drop table sublink_inner_table;
reset optimizer;
reset enable_hashagg;
-- Ensure sub-queries with order by outer reference can be decorrelated and executed correctly.
create table r(a int, b int, c int) distributed by (a);
create table s(a int, b int, c int) distributed by (a);
insert into r values (1,2,3);
insert into s values (1,2,10);
explain (costs off) select * from r where b in (select b from s where c=10 order by r.c);
QUERY PLAN
---------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Hash Semi Join
Hash Cond: (r.b = s.b)
-> Seq Scan on r
-> Hash
-> Broadcast Motion 3:3 (slice2; segments: 3)
-> Seq Scan on s
Filter: (c = 10)
Optimizer: Pivotal Optimizer (GPORCA)
(9 rows)
select * from r where b in (select b from s where c=10 order by r.c);
a | b | c
---+---+---
1 | 2 | 3
(1 row)
explain (costs off) select * from r where b in (select b from s where c=10 order by r.c limit 2);
QUERY PLAN
------------------------------------------------------------
Hash Semi Join
Hash Cond: (r.b = s.b)
-> Gather Motion 3:1 (slice1; segments: 3)
-> Seq Scan on r
-> Hash
-> Limit
-> Gather Motion 3:1 (slice2; segments: 3)
-> Seq Scan on s
Filter: (c = 10)
Optimizer: Pivotal Optimizer (GPORCA)
(10 rows)
select * from r where b in (select b from s where c=10 order by r.c limit 2);
a | b | c
---+---+---
1 | 2 | 3
(1 row)
explain (costs off) select * from r where b in (select b from s where c=10 order by r.c, b);
QUERY PLAN
---------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Hash Semi Join
Hash Cond: (r.b = s.b)
-> Seq Scan on r
-> Hash
-> Broadcast Motion 3:3 (slice2; segments: 3)
-> Seq Scan on s
Filter: (c = 10)
Optimizer: Pivotal Optimizer (GPORCA)
(9 rows)
select * from r where b in (select b from s where c=10 order by r.c, b);
a | b | c
---+---+---
1 | 2 | 3
(1 row)
explain (costs off) select * from r where b in (select b from s where c=10 order by r.c, b limit 2);
QUERY PLAN
------------------------------------------------------------
Hash Semi Join
Hash Cond: (r.b = s.b)
-> Gather Motion 3:1 (slice1; segments: 3)
-> Seq Scan on r
-> Hash
-> Limit
-> Gather Motion 3:1 (slice2; segments: 3)
Merge Key: s.b
-> Sort
Sort Key: s.b
-> Seq Scan on s
Filter: (c = 10)
Optimizer: Pivotal Optimizer (GPORCA)
(13 rows)
select * from r where b in (select b from s where c=10 order by r.c, b limit 2);
a | b | c
---+---+---
1 | 2 | 3
(1 row)
explain (costs off) select * from r where b in (select b from s where c=10 order by c);
QUERY PLAN
---------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Hash Semi Join
Hash Cond: (r.b = s.b)
-> Seq Scan on r
-> Hash
-> Broadcast Motion 3:3 (slice2; segments: 3)
-> Seq Scan on s
Filter: (c = 10)
Optimizer: Pivotal Optimizer (GPORCA)
(9 rows)
select * from r where b in (select b from s where c=10 order by c);
a | b | c
---+---+---
1 | 2 | 3
(1 row)
explain (costs off) select * from r where b in (select b from s where c=10 order by c limit 2);
QUERY PLAN
------------------------------------------------------------
Hash Semi Join
Hash Cond: (r.b = s.b)
-> Gather Motion 3:1 (slice1; segments: 3)
-> Seq Scan on r
-> Hash
-> Limit
-> Gather Motion 3:1 (slice2; segments: 3)
Merge Key: s.c
-> Sort
Sort Key: s.c
-> Seq Scan on s
Filter: (c = 10)
Optimizer: Pivotal Optimizer (GPORCA)
(13 rows)
select * from r where b in (select b from s where c=10 order by c limit 2);
a | b | c
---+---+---
1 | 2 | 3
(1 row)
-- Test nested query with aggregate inside a sublink,
-- ORCA should correctly normalize the aggregate expression inside the
-- sublink's nested query and the column variable accessed in aggregate should
-- be accessible to the aggregate after the normalization of query.
-- If the query is not supported, ORCA should gracefully fallback to postgres
explain (COSTS OFF) with t0 AS (
SELECT
ROW_TO_JSON((SELECT x FROM (SELECT max(t.b)) x))
AS c
FROM r
JOIN s ON true
JOIN s as t ON true
)
SELECT c FROM t0;
QUERY PLAN
---------------------------------------------------------------------------------------
Finalize Aggregate
-> Gather Motion 3:1 (slice1; segments: 3)
-> Partial Aggregate
-> Nested Loop
-> Broadcast Motion 3:3 (slice2; segments: 3)
-> Nested Loop
-> Seq Scan on r
-> Materialize
-> Broadcast Motion 3:3 (slice3; segments: 3)
-> Seq Scan on s
-> Materialize
-> Seq Scan on s t
SubPlan 1
-> Result
Optimizer: Postgres query optimizer
(15 rows)
-- Test push predicate into subquery
-- more details could be found at https://github.com/greenplum-db/gpdb/issues/8429
CREATE TABLE foo_predicate_pushdown (a int, b int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
CREATE TABLE bar_predicate_pushdown (c int, d int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
explain (costs off) select * from (
select distinct (select bar.c from bar_predicate_pushdown bar where c = foo.b) as ss from foo_predicate_pushdown foo
) ABC where ABC.ss = 5;
QUERY PLAN
-------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> GroupAggregate
Group Key: ((SubPlan 1))
-> Sort
Sort Key: ((SubPlan 1))
-> Redistribute Motion 3:3 (slice2; segments: 3)
Hash Key: ((SubPlan 1))
-> Result
Filter: (((SubPlan 1)) = 5)
-> Seq Scan on foo_predicate_pushdown foo
SubPlan 1
-> Result
Filter: (bar.c = foo.b)
-> Materialize
-> Broadcast Motion 3:3 (slice3; segments: 3)
-> Seq Scan on bar_predicate_pushdown bar
Optimizer: GPORCA
(17 rows)
DROP TABLE foo_predicate_pushdown;
DROP TABLE bar_predicate_pushdown;
--
-- Test case for ORCA semi join with random table
-- See https://github.com/greenplum-db/gpdb/issues/16611
--
--- case for random distribute
create table table_left (l1 int, l2 int) distributed by (l1);
create table table_right (r1 int, r2 int) distributed randomly;
create index table_right_idx on table_right(r1);
insert into table_left values (1,1);
insert into table_right select i, i from generate_series(1, 300) i;
insert into table_right select 1, 1 from generate_series(1, 100) i;
--- make sure the same value (1,1) rows are inserted into different segments
select count(distinct gp_segment_id) > 1 from table_right where r1 = 1;
?column?
----------
t
(1 row)
analyze table_left;
analyze table_right;
-- two types of semi join tests
explain (costs off) select * from table_left where exists (select 1 from table_right where l1 = r1);
QUERY PLAN
------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Hash Semi Join
Hash Cond: (table_left.l1 = table_right.r1)
-> Seq Scan on table_left
Filter: (NOT (l1 IS NULL))
-> Hash
-> Result
-> Redistribute Motion 3:3 (slice2; segments: 3)
Hash Key: table_right.r1
-> Seq Scan on table_right
Optimizer: GPORCA
(11 rows)
select * from table_left where exists (select 1 from table_right where l1 = r1);
l1 | l2
----+----
1 | 1
(1 row)
explain (costs off) select * from table_left where l1 in (select r1 from table_right);
QUERY PLAN
------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Hash Semi Join
Hash Cond: (table_left.l1 = table_right.r1)
-> Seq Scan on table_left
-> Hash
-> Redistribute Motion 3:3 (slice2; segments: 3)
Hash Key: table_right.r1
-> Seq Scan on table_right
Optimizer: GPORCA
(9 rows)
select * from table_left where exists (select 1 from table_right where l1 = r1);
l1 | l2
----+----
1 | 1
(1 row)
--- case for replicate distribute
alter table table_right set distributed replicated;
explain (costs off) select * from table_left where exists (select 1 from table_right where l1 = r1);
QUERY PLAN
-------------------------------------------------------------------
Gather Motion 1:1 (slice1; segments: 1)
-> Nested Loop
Join Filter: true
-> Broadcast Motion 3:1 (slice2; segments: 3)
-> Seq Scan on table_left
Filter: (NOT (l1 IS NULL))
-> GroupAggregate
Group Key: table_right.r1
-> Index Scan using table_right_idx on table_right
Index Cond: (r1 = table_left.l1)
Optimizer: GPORCA
(11 rows)
select * from table_left where exists (select 1 from table_right where l1 = r1);
l1 | l2
----+----
1 | 1
(1 row)
explain (costs off) select * from table_left where l1 in (select r1 from table_right);
QUERY PLAN
-------------------------------------------------------------------
Gather Motion 1:1 (slice1; segments: 1)
-> Nested Loop
Join Filter: true
-> Broadcast Motion 3:1 (slice2; segments: 3)
-> Seq Scan on table_left
-> GroupAggregate
Group Key: table_right.r1
-> Index Scan using table_right_idx on table_right
Index Cond: (r1 = table_left.l1)
Optimizer: GPORCA
(10 rows)
select * from table_left where exists (select 1 from table_right where l1 = r1);
l1 | l2
----+----
1 | 1
(1 row)
--- case for partition table with random distribute
drop table table_right;
create table table_right (r1 int, r2 int) distributed randomly partition by range (r1) ( start (0) end (300) every (100));
create index table_right_idx on table_right(r1);
insert into table_right select i, i from generate_series(1, 299) i;
insert into table_right select 1, 1 from generate_series(1, 100) i;
analyze table_right;
explain (costs off) select * from table_left where exists (select 1 from table_right where l1 = r1);
QUERY PLAN
----------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Hash Semi Join
Hash Cond: (table_left.l1 = table_right.r1)
-> Seq Scan on table_left
Filter: (NOT (l1 IS NULL))
-> Hash
-> Result
-> Redistribute Motion 3:3 (slice2; segments: 3)
Hash Key: table_right.r1
-> Dynamic Seq Scan on table_right
Number of partitions to scan: 3 (out of 3)
Optimizer: GPORCA
(12 rows)
select * from table_left where exists (select 1 from table_right where l1 = r1);
l1 | l2
----+----
1 | 1
(1 row)
explain (costs off) select * from table_left where l1 in (select r1 from table_right);
QUERY PLAN
----------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Hash Semi Join
Hash Cond: (table_left.l1 = table_right.r1)
-> Seq Scan on table_left
-> Hash
-> Redistribute Motion 3:3 (slice2; segments: 3)
Hash Key: table_right.r1
-> Dynamic Seq Scan on table_right
Number of partitions to scan: 3 (out of 3)
Optimizer: GPORCA
(10 rows)
select * from table_left where exists (select 1 from table_right where l1 = r1);
l1 | l2
----+----
1 | 1
(1 row)
-- clean up
drop table table_left;
drop table table_right;
-- test cross params of initplan
-- https://github.com/greenplum-db/gpdb/issues/16268
create table tmp (a varchar, b varchar, c varchar);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
select (SELECT EXISTS
(SELECT
FROM pg_views
WHERE schemaname = a)) from tmp;
exists
--------
(0 rows)
drop table tmp;
-- Test LEAST() and GREATEST() with an embedded subquery
drop table if exists foo;
create table foo (a int, b int) distributed by(a);
insert into foo values (1, 2), (2, 3), (3, 4);
analyze foo;
explain (costs off) select foo.a from foo where foo.a <= LEAST(foo.b, (SELECT 1), NULL);
QUERY PLAN
-------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Seq Scan on foo
Filter: (a <= LEAST(b, (SubPlan 1), NULL::integer))
SubPlan 1
-> Result
Optimizer: GPORCA
(6 rows)
select foo.a from foo where foo.a <= LEAST(foo.b, (SELECT 1), NULL);
a
---
1
(1 row)
explain (costs off) select foo.a from foo where foo.a <= GREATEST(foo.b, (SELECT 1), NULL);
QUERY PLAN
----------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Seq Scan on foo
Filter: (a <= GREATEST(b, (SubPlan 1), NULL::integer))
SubPlan 1
-> Result
Optimizer: GPORCA
(6 rows)
select foo.a from foo where foo.a <= GREATEST(foo.b, (SELECT 1), NULL);
a
---
2
3
1
(3 rows)
explain (costs off) select least((select 5), greatest(b, NULL, (select 1)), a) from foo;
QUERY PLAN
------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Nested Loop Left Join
Join Filter: true
-> Nested Loop Left Join
Join Filter: true
-> Seq Scan on foo
-> Materialize
-> Result
-> Materialize
-> Result
Optimizer: GPORCA
(11 rows)
select least((select 5), greatest(b, NULL, (select 1)), a) from foo;
least
-------
2
3
1
(3 rows)
drop table foo;
-- Test subquery within ScalarArrayRef or ScalarArrayRefIndexList
drop table if exists bar;
create table bar (a int[], b int[][]) distributed by(a);
insert into bar values (ARRAY[1, 2, 3], ARRAY[[1, 2, 3], [4, 5, 6]]);
analyze bar;
explain (costs off) select (select a from bar)[1] from bar;
QUERY PLAN
------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Nested Loop Left Join
Join Filter: true
-> Seq Scan on bar bar_1
-> Assert
Assert Cond: ((row_number() OVER (?)) = 1)
-> Materialize
-> Broadcast Motion 1:3 (slice2)
-> WindowAgg
-> Gather Motion 3:1 (slice3; segments: 3)
-> Seq Scan on bar
Optimizer: GPORCA
(12 rows)
select (select a from bar)[1] from bar;
a
---
1
(1 row)
explain (costs off) select (select a from bar)[(select 1)] from bar;
QUERY PLAN
------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Nested Loop Left Join
Join Filter: true
-> Nested Loop Left Join
Join Filter: true
-> Seq Scan on bar bar_1
-> Materialize
-> Result
-> Assert
Assert Cond: ((row_number() OVER (?)) = 1)
-> Materialize
-> Broadcast Motion 1:3 (slice2)
-> WindowAgg
-> Gather Motion 3:1 (slice3; segments: 3)
-> Seq Scan on bar
Optimizer: GPORCA
(16 rows)
select (select a from bar)[(select 1)] from bar;
a
---
1
(1 row)
explain (costs off) select (select b from bar)[1][1:3] from bar;
QUERY PLAN
------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Nested Loop Left Join
Join Filter: true
-> Seq Scan on bar bar_1
-> Assert
Assert Cond: ((row_number() OVER (?)) = 1)
-> Materialize
-> Broadcast Motion 1:3 (slice2)
-> WindowAgg
-> Gather Motion 3:1 (slice3; segments: 3)
-> Seq Scan on bar
Optimizer: GPORCA
(12 rows)
select (select b from bar)[1][1:3] from bar;
b
-----------
{{1,2,3}}
(1 row)
explain (costs off) select (select b from bar)[(select 1)][1:3] from bar;
QUERY PLAN
------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Nested Loop Left Join
Join Filter: true
-> Nested Loop Left Join
Join Filter: true
-> Seq Scan on bar bar_1
-> Materialize
-> Result
-> Assert
Assert Cond: ((row_number() OVER (?)) = 1)
-> Materialize
-> Broadcast Motion 1:3 (slice2)
-> WindowAgg
-> Gather Motion 3:1 (slice3; segments: 3)
-> Seq Scan on bar
Optimizer: GPORCA
(16 rows)
select (select b from bar)[(select 1)][1:3] from bar;
b
-----------
{{1,2,3}}
(1 row)
drop table bar;
create table outer_foo(a int primary key, b int);
create table inner_bar(a int, b int) distributed randomly;
insert into outer_foo values (generate_series(1,20), generate_series(11,30));
insert into inner_bar values (generate_series(1,20), generate_series(25,44));
set optimizer to off;
explain (costs off) select t1.a from outer_foo t1, LATERAL(SELECT distinct t2.a from inner_bar t2 where t1.b=t2.b) q order by 1;
QUERY PLAN
------------------------------------------------------------------------
Nested Loop
-> Gather Motion 3:1 (slice1; segments: 3)
Merge Key: t1.a
-> Index Scan using outer_foo_pkey on outer_foo t1
-> Materialize
-> HashAggregate
Group Key: t2.a
-> Result
Filter: (t1.b = t2.b)
-> Materialize
-> Gather Motion 3:1 (slice2; segments: 3)
-> Seq Scan on inner_bar t2
Optimizer: Postgres-based planner
(13 rows)
explain (costs off) select t1.a from outer_foo t1, LATERAL(SELECT distinct t2.a from inner_bar t2 where t1.b=t2.b) q;
QUERY PLAN
------------------------------------------------------------------------
Nested Loop
-> Gather Motion 3:1 (slice1; segments: 3)
-> Seq Scan on outer_foo t1
-> Materialize
-> HashAggregate
Group Key: t2.a
-> Result
Filter: (t1.b = t2.b)
-> Materialize
-> Gather Motion 3:1 (slice2; segments: 3)
-> Seq Scan on inner_bar t2
Optimizer: Postgres-based planner
(12 rows)
select t1.a from outer_foo t1, LATERAL(SELECT distinct t2.a from inner_bar t2 where t1.b=t2.b) q order by 1;
a
----
15
16
17
18
19
20
(6 rows)
create table t(a int, b int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
explain (costs off) with cte(x) as (select t1.a from outer_foo t1, LATERAL(SELECT distinct t2.a from inner_bar t2 where t1.b=t2.b) q order by 1)
select * from t where a > (select count(1) from cte where x > t.a + random());
QUERY PLAN
-----------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Seq Scan on t
Filter: (a > (SubPlan 1))
SubPlan 1
-> Aggregate
-> Result
Filter: ((t1.a)::double precision > ((t.a)::double precision + random()))
-> Nested Loop
-> Materialize
-> Sort
Sort Key: t1.a
-> Broadcast Motion 3:3 (slice2; segments: 3)
-> Index Scan using outer_foo_pkey on outer_foo t1
-> Materialize
-> HashAggregate
Group Key: t2.a
-> Result
Filter: (t1.b = t2.b)
-> Materialize
-> Broadcast Motion 3:3 (slice3; segments: 3)
-> Seq Scan on inner_bar t2
Optimizer: Postgres-based planner
(22 rows)
with cte(x) as (select t1.a from outer_foo t1, LATERAL(SELECT distinct t2.a from inner_bar t2 where t1.b=t2.b) q order by 1)
select * from t where a > (select count(1) from cte where x > t.a + random());
a | b
---+---
(0 rows)
reset optimizer;
drop table outer_foo;
drop table inner_bar;
drop table t;