blob: 84b381dcc8d69ea133a85ef6da167f9721efeb51 [file] [log] [blame]
create schema qp_subquery;
set search_path to qp_subquery;
set optimizer_trace_fallback to on;
begin;
CREATE TABLE SUBSELECT_TBL1 (f1 integer, f2 integer, f3 float);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'f1' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
INSERT INTO SUBSELECT_TBL1 VALUES (1, 2, 3);
INSERT INTO SUBSELECT_TBL1 VALUES (2, 3, 4);
INSERT INTO SUBSELECT_TBL1 VALUES (3, 4, 5);
INSERT INTO SUBSELECT_TBL1 VALUES (1, 1, 1);
INSERT INTO SUBSELECT_TBL1 VALUES (2, 2, 2);
INSERT INTO SUBSELECT_TBL1 VALUES (3, 3, 3);
INSERT INTO SUBSELECT_TBL1 VALUES (6, 7, 8);
INSERT INTO SUBSELECT_TBL1 VALUES (8, 9, NULL);
commit;
SELECT '' AS eight, * FROM SUBSELECT_TBL1 ORDER BY 2,3,4;
eight | f1 | f2 | f3
-------+----+----+----
| 1 | 1 | 1
| 1 | 2 | 3
| 2 | 2 | 2
| 2 | 3 | 4
| 3 | 3 | 3
| 3 | 4 | 5
| 6 | 7 | 8
| 8 | 9 |
(8 rows)
SELECT '' AS two, f1 AS "Constant Select" FROM SUBSELECT_TBL1
WHERE f1 IN (SELECT 1) ORDER BY 2;
two | Constant Select
-----+-----------------
| 1
| 1
(2 rows)
-- order 2
SELECT '' AS six, f1 AS "Uncorrelated Field" FROM SUBSELECT_TBL1
WHERE f1 IN (SELECT f2 FROM SUBSELECT_TBL1) ORDER BY 2;
six | Uncorrelated Field
-----+--------------------
| 1
| 1
| 2
| 2
| 3
| 3
(6 rows)
-- order 2
SELECT '' AS six, f1 AS "Uncorrelated Field" FROM SUBSELECT_TBL1
WHERE f1 IN (SELECT f2 FROM SUBSELECT_TBL1 WHERE
f2 IN (SELECT f1 FROM SUBSELECT_TBL1)) ORDER BY 2;
six | Uncorrelated Field
-----+--------------------
| 1
| 1
| 2
| 2
| 3
| 3
(6 rows)
-- order 2,3
SELECT '' AS three, f1, f2
FROM SUBSELECT_TBL1
WHERE (f1, f2) NOT IN (SELECT f2, CAST(f3 AS int4) FROM SUBSELECT_TBL1
WHERE f3 IS NOT NULL) ORDER BY 2,3;
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
three | f1 | f2
-------+----+----
| 1 | 2
| 6 | 7
| 8 | 9
(3 rows)
SELECT 1 AS one WHERE 1 IN (SELECT 1);
one
-----
1
(1 row)
SELECT 1 AS zero WHERE 1 IN (SELECT 2);
zero
------
(0 rows)
SELECT 1 AS zero WHERE 1 NOT IN (SELECT 1);
zero
------
(0 rows)
SELECT '' AS six, f1 AS "Correlated Field", f2 AS "Second Field"
FROM SUBSELECT_TBL1 upper
WHERE f1 IN (SELECT f2 FROM SUBSELECT_TBL1 WHERE f1 = upper.f1);
six | Correlated Field | Second Field
-----+------------------+--------------
| 1 | 2
| 2 | 3
| 1 | 1
| 2 | 2
| 3 | 4
| 3 | 3
(6 rows)
SELECT '' AS six, f1 AS "Correlated Field", f3 AS "Second Field"
FROM SUBSELECT_TBL1 upper
WHERE f1 IN
(SELECT f2 FROM SUBSELECT_TBL1 WHERE CAST(upper.f2 AS float) = f3);
six | Correlated Field | Second Field
-----+------------------+--------------
| 2 | 4
| 1 | 1
| 2 | 2
| 3 | 5
| 3 | 3
(5 rows)
SELECT '' AS six, f1 AS "Correlated Field", f3 AS "Second Field"
FROM SUBSELECT_TBL1 upper
WHERE f3 IN (SELECT upper.f1 + f2 FROM SUBSELECT_TBL1
WHERE f2 = CAST(f3 AS integer));
six | Correlated Field | Second Field
-----+------------------+--------------
| 1 | 3
| 2 | 4
| 3 | 5
| 6 | 8
(4 rows)
SELECT '' AS five, f1 AS "Correlated Field"
FROM SUBSELECT_TBL1
WHERE (f1, f2) IN (SELECT f2, CAST(f3 AS int4) FROM SUBSELECT_TBL1
WHERE f3 IS NOT NULL);
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
five | Correlated Field
------+------------------
| 3
| 3
| 2
| 1
| 2
(5 rows)
begin;
create table join_tab1 ( i integer, j integer, t text);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
INSERT INTO join_tab1 VALUES (1, 4, 'one');
ANALYZE join_tab1;
INSERT INTO join_tab1 VALUES (2, 3, 'two');
INSERT INTO join_tab1 VALUES (3, 2, 'three');
INSERT INTO join_tab1 VALUES (4, 1, 'four');
INSERT INTO join_tab1 VALUES (5, 0, 'five');
INSERT INTO join_tab1 VALUES (6, 6, 'six');
INSERT INTO join_tab1 VALUES (7, 7, 'seven');
INSERT INTO join_tab1 VALUES (8, 8, 'eight');
INSERT INTO join_tab1 VALUES (0, NULL, 'zero');
INSERT INTO join_tab1 VALUES (NULL, NULL, 'null');
INSERT INTO join_tab1 VALUES (NULL, 0, 'zero');
create table join_tab2 ( i integer, k integer);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
INSERT INTO join_tab2 VALUES (1, -1);
ANALYZE join_tab2;
INSERT INTO join_tab2 VALUES (2, 2);
INSERT INTO join_tab2 VALUES (3, -3);
INSERT INTO join_tab2 VALUES (2, 4);
INSERT INTO join_tab2 VALUES (5, -5);
INSERT INTO join_tab2 VALUES (5, -5);
INSERT INTO join_tab2 VALUES (0, NULL);
INSERT INTO join_tab2 VALUES (NULL, NULL);
INSERT INTO join_tab2 VALUES (NULL, 0);
commit;
select * from ( SELECT '' AS "col", * FROM join_tab1 AS tx)A;
col | i | j | t
-----+---+---+-------
| 1 | 4 | one
| 2 | 3 | two
| 0 | | zero
| 3 | 2 | three
| 4 | 1 | four
| 5 | 0 | five
| 6 | 6 | six
| 7 | 7 | seven
| | | null
| | 0 | zero
| 8 | 8 | eight
(11 rows)
select * from ( SELECT '' AS "col", * FROM join_tab1 AS tx) AS A;
col | i | j | t
-----+---+---+-------
| 8 | 8 | eight
| 1 | 4 | one
| 2 | 3 | two
| 0 | | zero
| 3 | 2 | three
| 4 | 1 | four
| 5 | 0 | five
| 6 | 6 | six
| 7 | 7 | seven
| | | null
| | 0 | zero
(11 rows)
select * from(SELECT '' AS "col", * FROM join_tab1 AS tx) as A(a,b,c);
a | b | c | t
---+---+---+-------
| 3 | 2 | three
| 4 | 1 | four
| 5 | 0 | five
| 6 | 6 | six
| 7 | 7 | seven
| | | null
| | 0 | zero
| 8 | 8 | eight
| 1 | 4 | one
| 2 | 3 | two
| 0 | | zero
(11 rows)
select * from(SELECT '' AS "col", t1.a, t2.e FROM join_tab1 t1 (a, b, c), join_tab2 t2 (d, e)
WHERE t1.a = t2.d)as A;
col | a | e
-----+---+----
| 3 | -3
| 5 | -5
| 5 | -5
| 1 | -1
| 2 | 2
| 2 | 4
| 0 |
(7 rows)
select * from join_tab1 where exists(select * from join_tab2 where join_tab1.i=join_tab2.i);
i | j | t
---+---+-------
3 | 2 | three
5 | 0 | five
1 | 4 | one
2 | 3 | two
0 | | zero
(5 rows)
select * from join_tab1 where not exists(select * from join_tab2 where join_tab1.i=join_tab2.i) order by i,j;
i | j | t
---+---+-------
4 | 1 | four
6 | 6 | six
7 | 7 | seven
8 | 8 | eight
| 0 | zero
| | null
(6 rows)
select 25 = any ('{1,2,3,4}');
?column?
----------
f
(1 row)
select 25 = any ('{1,2,25}');
?column?
----------
t
(1 row)
select 'abc' = any('{abc,d,e}');
?column?
----------
t
(1 row)
create table subq_abc(a int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into subq_abc values(1);
insert into subq_abc values(9);
insert into subq_abc values(3);
insert into subq_abc values(6);
SELECT 9 = any (select * from subq_abc);
?column?
----------
t
(1 row)
select null::int >= any ('{}');
?column?
----------
f
(1 row)
select 'abc' = any('{" "}');
?column?
----------
f
(1 row)
select 33.4 = any (array[1,2,3]);
?column?
----------
f
(1 row)
select 40 = all ('{3,4,40,10}');
?column?
----------
f
(1 row)
select 55 >= all ('{1,2,55}');
?column?
----------
t
(1 row)
select 25 = all ('{25,25,25}');
?column?
----------
t
(1 row)
select 'abc' = all('{abc}');
?column?
----------
t
(1 row)
select 'abc' = all('{abc,d,e}');
?column?
----------
f
(1 row)
select 'abc' = all('{"abc"}');
?column?
----------
t
(1 row)
select 'abc' = all('{" "}');
?column?
----------
f
(1 row)
select null::int >= all ('{1,2,33}');
?column?
----------
(1 row)
select null::int >= all ('{}');
?column?
----------
t
(1 row)
select 33.4 > all (array[1,2,3]);
?column?
----------
t
(1 row)
create table emp_list(empid int,name char(20),sal float);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'empid' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into emp_list values(1,'empone',1000);
insert into emp_list values(2,'emptwo',2000);
insert into emp_list values(3,'empthree',3000);
insert into emp_list values(4,'empfour',4000);
insert into emp_list values(5,'empfive',4000);
select name from emp_list where sal=(select max(sal) from emp_list);
name
----------------------
empfour
empfive
(2 rows)
select name from emp_list where sal=(select min(sal) from emp_list);
name
----------------------
empone
(1 row)
select name from emp_list where sal>(select avg(sal) from emp_list);
name
----------------------
empthree
empfour
empfive
(3 rows)
select name from emp_list where sal<(select avg(sal) from emp_list);
name
----------------------
empone
emptwo
(2 rows)
CREATE TABLE subq_test1 (s1 INT, s2 CHAR(5), s3 FLOAT);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 's1' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
INSERT INTO subq_test1 VALUES (1,'1',1.0);
INSERT INTO subq_test1 VALUES (2,'2',2.0);
INSERT INTO subq_test1 VALUES (3,'3',3.0);
INSERT INTO subq_test1 VALUES (4,'4',4.0);
SELECT sb1,sb2,sb3 FROM (SELECT s1 AS sb1, s2 AS sb2, s3*2 AS sb3 FROM subq_test1) AS sb WHERE sb1 > 1;
sb1 | sb2 | sb3
-----+-------+-----
2 | 2 | 4
3 | 3 | 6
4 | 4 | 8
(3 rows)
select to_char(Avg(sum_col1),'9999999.9999999') from (select sum(s1) as sum_col1 from subq_test1 group by s1) as tab1;
to_char
------------------
2.5000000
(1 row)
select g2,count(*) from (select I, count(*) as g2 from join_tab1 group by I) as vtable group by g2;
g2 | count
----+-------
1 | 9
2 | 1
(2 rows)
begin;
create table join_tab4 ( i integer, j integer, t text);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into join_tab4 values (1,7,'sunday');
insert into join_tab4 values (2,6,'monday');
insert into join_tab4 values (3,5,'tueday');
insert into join_tab4 values (4,4,'wedday');
insert into join_tab4 values (5,3,'thuday');
insert into join_tab4 values (6,2,'friday');
insert into join_tab4 values (7,1,'satday');
commit;
select i,j,t from (select * from (select i,j,t from join_tab1)as dtab1
UNION select * from(select i,j,t from join_tab4) as dtab2 )as mtab;
i | j | t
---+---+--------
1 | 7 | sunday
3 | 5 | tueday
4 | 1 | four
6 | 2 | friday
| | null
1 | 4 | one
2 | 6 | monday
5 | 0 | five
5 | 3 | thuday
6 | 6 | six
7 | 7 | seven
8 | 8 | eight
| 0 | zero
0 | | zero
2 | 3 | two
3 | 2 | three
4 | 4 | wedday
7 | 1 | satday
(18 rows)
select * from join_tab1 where i = (select i from join_tab4 where t='satday');
i | j | t
---+---+-------
7 | 7 | seven
(1 row)
select * from join_tab1 where i = (select i from join_tab4);
ERROR: one or more assertions failed
DETAIL: Expected no more than one row to be returned by expression
--
-- Test references to outer query in join quals
--
-- Single var
explain (costs off)
SELECT (SELECT join_tab1.i - join_tab2.i
FROM join_tab1, join_tab2 WHERE join_tab1.i = join_tab2.i and out.f1 > 0
LIMIT 1) as x
FROM subselect_tbl1 out;
QUERY PLAN
--------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Seq Scan on subselect_tbl1 "out"
SubPlan 1
-> Result
-> Limit
-> Result
One-Time Filter: ("out".f1 > 0)
-> Materialize
-> Broadcast Motion 3:3 (slice2; segments: 3)
-> Hash Join
Hash Cond: (join_tab1.i = join_tab2.i)
-> Seq Scan on join_tab1
-> Hash
-> Seq Scan on join_tab2
Optimizer: Pivotal Optimizer (GPORCA)
(15 rows)
SELECT (SELECT join_tab1.i - join_tab2.i
FROM join_tab1, join_tab2 WHERE join_tab1.i = join_tab2.i and out.f1 > 0
LIMIT 1) as x
FROM subselect_tbl1 out;
x
---
0
0
0
0
0
0
0
0
(8 rows)
-- Two outer vars
explain (costs off)
SELECT (SELECT join_tab1.i - join_tab2.i
FROM join_tab1, join_tab2 WHERE join_tab1.i = join_tab2.i and out1.i = out2.i
LIMIT 1) as x
FROM join_tab1 out1, join_tab2 out2;
QUERY PLAN
-------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Nested Loop
Join Filter: true
-> Broadcast Motion 3:3 (slice2; segments: 3)
-> Seq Scan on join_tab1 out1
-> Seq Scan on join_tab2 out2
SubPlan 1
-> Result
-> Limit
-> Result
One-Time Filter: (out1.i = out2.i)
-> Materialize
-> Broadcast Motion 3:3 (slice3; segments: 3)
-> Hash Join
Hash Cond: (join_tab1.i = join_tab2.i)
-> Seq Scan on join_tab1
-> Hash
-> Seq Scan on join_tab2
Optimizer: Pivotal Optimizer (GPORCA)
(19 rows)
SELECT (SELECT join_tab1.i - join_tab2.i
FROM join_tab1, join_tab2 WHERE join_tab1.i = join_tab2.i and out1.i = out2.i
LIMIT 1) as x
FROM join_tab1 out1, join_tab2 out2;
x
---
0
0
0
0
0
0
0
(99 rows)
-- Same, in an outer join
--
-- NOTE: The order that the rows come out from the subquery is not
-- deterministic, so we have to use a dummy coalesce() expression that
-- returns the same result regardless.
--
explain (costs off)
SELECT (SELECT coalesce(join_tab1.i + join_tab2.i, 0) >= 0
FROM join_tab1 LEFT JOIN join_tab2 ON join_tab1.i = join_tab2.i and out.f1 > 0
LIMIT 1) as x
FROM subselect_tbl1 out;
QUERY PLAN
----------------------------------------------------------------------------------------------
Result
-> Gather Motion 3:1 (slice1; segments: 3)
-> Seq Scan on subselect_tbl1 "out"
SubPlan 1
-> Result
-> Limit
-> Nested Loop Left Join
Join Filter: ((join_tab1.i = join_tab2.i) AND ("out".f1 > 0))
-> Materialize
-> Gather Motion 3:1 (slice3; segments: 3)
-> Seq Scan on join_tab1
-> Materialize
-> Gather Motion 3:1 (slice2; segments: 3)
-> Seq Scan on join_tab2
Optimizer: Pivotal Optimizer (GPORCA)
(15 rows)
SELECT (SELECT coalesce(join_tab1.i + join_tab2.i, 0) >= 0
FROM join_tab1 LEFT JOIN join_tab2 ON join_tab1.i = join_tab2.i and out.f1 > 0
LIMIT 1) as x
FROM subselect_tbl1 out;
x
---
t
t
t
t
t
t
t
t
(8 rows)
explain (costs off)
SELECT (SELECT coalesce(join_tab1.i + join_tab2.i, 0) >= 0
FROM join_tab1 LEFT JOIN join_tab2 ON join_tab1.i = join_tab2.i and out1.i = out2.i
LIMIT 1) as x
FROM join_tab1 out1, join_tab2 out2;
QUERY PLAN
------------------------------------------------------------------------------------------------------
Result
-> Gather Motion 3:1 (slice1; segments: 3)
-> Nested Loop
Join Filter: true
-> Broadcast Motion 3:3 (slice2; segments: 3)
-> Seq Scan on join_tab1 out1
-> Seq Scan on join_tab2 out2
SubPlan 1
-> Result
-> Limit
-> Nested Loop Left Join
Join Filter: ((join_tab1.i = join_tab2.i) AND (out1.i = out2.i))
-> Materialize
-> Gather Motion 3:1 (slice4; segments: 3)
-> Seq Scan on join_tab1
-> Materialize
-> Gather Motion 3:1 (slice3; segments: 3)
-> Seq Scan on join_tab2
Optimizer: Pivotal Optimizer (GPORCA)
(19 rows)
SELECT (SELECT coalesce(join_tab1.i + join_tab2.i, 0) >= 0
FROM join_tab1 LEFT JOIN join_tab2 ON join_tab1.i = join_tab2.i and out1.i = out2.i
LIMIT 1) as x
FROM join_tab1 out1, join_tab2 out2;
x
---
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
t
(99 rows)
--
-- Testing NOT-IN Subquery
--
create table Tbl8352_t1(a int, b int) distributed by (a);
create table Tbl8352_t2(a int, b int) distributed by (a);
insert into Tbl8352_t1 values(1,null),(null,1),(1,1),(null,null);
insert into Tbl8352_t2 values(1,1);
select * from Tbl8352_t1 where (Tbl8352_t1.a,Tbl8352_t1.b) not in (select Tbl8352_t2.a,Tbl8352_t2.b from Tbl8352_t2);
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
a | b
---+---
(0 rows)
create table Tbl8352_t1a(a int, b int) distributed by (a);
create table Tbl8352_t2a(a int, b int) distributed by (a);
insert into Tbl8352_t1a values(1,2),(3,null),(null,4),(null,null);
insert into Tbl8352_t2a values(1,2);
select * from Tbl8352_t1a where (Tbl8352_t1a.a,Tbl8352_t1a.b) not in (select Tbl8352_t2a.a,Tbl8352_t2a.b from Tbl8352_t2a) order by 1,2;
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
a | b
---+---
3 |
| 4
(2 rows)
select (1,null::int) not in (select 1,1);
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
?column?
----------
(1 row)
select (3,null::int) not in (select 1,1);
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
?column?
----------
t
(1 row)
begin;
create table t1(a int, b int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table t2(a int, b int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table t3(a int, b int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table t4(a int, b int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into t1 values(1,2);
analyze t1;
insert into t1 values(3,4);
insert into t1 values(5,6);
insert into t2 values(1,2);
analyze t2;
insert into t2 values(3,4);
insert into t2 values(7,8);
insert into t3 values(1,2);
insert into t3 values(3,4);
insert into t4 values(1,2);
create table i1(a int, b int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table i2(a int, b int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into i1 values(1,2);
analyze i1;
commit;
--
-- not in subquery involving vars from different rels with inner join
--
select t1.a, t2.b from t1, t2 where t1.a=t2.a and ((t1.a,t2.b) not in (select i1.a,i1.b from i1));
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
a | b
---+---
3 | 4
(1 row)
select t1.a, t2.b from t1 inner join t2 on (t1.a=t2.a and ((t1.a,t2.b) not in (select i1.a,i1.b from i1)));
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
a | b
---+---
3 | 4
(1 row)
select t1.a, t2.b from t1 inner join t2 on (t1.a=t2.a) where ((t1.a,t2.b) not in (select i1.a,i1.b from i1));
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
a | b
---+---
3 | 4
(1 row)
-- unsupported case
explain select t1.a, t2.b from t1, t2 where t1.a=t2.a or ((t1.a,t2.b) not in (select i1.a,i1.b from i1));
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
QUERY PLAN
------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=10000000000.00..10000000002.10 rows=3 width=8)
-> Nested Loop (cost=10000000000.00..10000000002.05 rows=1 width=8)
Join Filter: ((t1.a = t2.a) OR (NOT (hashed SubPlan 1)))
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..1.03 rows=1 width=4)
-> Seq Scan on t1 (cost=0.00..1.01 rows=1 width=4)
-> Seq Scan on t2 (cost=0.00..1.01 rows=1 width=8)
SubPlan 1
-> Broadcast Motion 3:3 (slice3; segments: 3) (cost=0.00..1.01 rows=1 width=8)
-> Seq Scan on i1 (cost=0.00..1.01 rows=1 width=8)
Optimizer: Postgres query optimizer
(10 rows)
--
-- not in subquery involving vars from different rels with left join.
--
select t1.a, t2.b from t1 left join t2 on (t1.a=t2.a) where ((t1.a,t2.b) not in (select i1.a,i1.b from i1));
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
a | b
---+---
3 | 4
5 |
(2 rows)
select t1.a, t2.b from t1 left join t2 on (t1.a=t2.a and ((t1.a,t2.b) not in (select i1.a,i1.b from i1)));
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
a | b
---+---
1 |
3 | 4
5 |
(3 rows)
--
-- not in subquery involving vars from different rels with outer join
--
select t1.a, t2.b from t1 full outer join t2 on (t1.a=t2.a) where ((t1.a,t2.b) not in (select i1.a,i1.b from i1));
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
a | b
---+---
3 | 4
5 |
| 8
(3 rows)
-- not in subquery with a row var in FULL JOIN condition
select t1.a, t2.b from t1 full outer join t2 on (t1.a=t2.a and ((t1.a,t2.b) not in (select i1.a,i1.b from i1)));
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
a | b
---+---
3 | 4
5 |
| 8
1 |
| 2
(5 rows)
--
-- more complex case
--
select t1.a,t2.b from t1 left join (t2 inner join t3 on (t3.a not in (select t4.a from t4))) on (t1.a=t2.a);
a | b
---+---
1 | 2
3 | 4
5 |
(3 rows)
begin;
create table Tbl01(a int, b int, c int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into Tbl01 values(1,2,3);
insert into Tbl01 values(4,5,6);
insert into Tbl01 values(7,8,9);
insert into Tbl01 values(null,11,12);
create table Tbl03(a int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into Tbl03 values(1),(4);
create or replace function foo(int) returns int as $$
select case when $1 is null then 13::int
else null::int
end;
$$ language sql immutable;
commit;
select Tbl01.*,foo(Tbl01.a) as foo from Tbl01; -- showing foo values
a | b | c | foo
---+----+----+-----
1 | 2 | 3 |
4 | 5 | 6 |
7 | 8 | 9 |
| 11 | 12 | 13
(4 rows)
select Tbl01.* from Tbl01 where foo(Tbl01.a) not in (select a from Tbl03);
a | b | c
---+----+----
| 11 | 12
(1 row)
create table Tbl02 as select Tbl01.*,foo(Tbl01.a) as foo from Tbl01;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
select Tbl02.* from Tbl02 where foo not in (select a from Tbl03);
a | b | c | foo
---+----+----+-----
| 11 | 12 | 13
(1 row)
begin;
create table Tbl04(a int, b int);
insert into Tbl04 values(1,2),(3,4),(5,6);
create table Tbl05(a int, b int);
insert into Tbl05 values(1,2);
create table Tbl06(a int, b int);
insert into Tbl06 values(1,2),(3,4);
create table i3(a int not null, b int not null);
insert into i3 values(1,2);
create table Tbl07(a int, b int);
insert into Tbl07 values(1,2),(3,4),(null,null);
create table Tbl08(a int, b int);
insert into Tbl08 values(1,2),(3,4),(null,null);
create table Tbl09(a int, b int);
insert into Tbl09 values(1,2),(5,null),(null,8);
analyze Tbl04;
analyze Tbl05;
analyze Tbl06;
analyze i3;
analyze Tbl07;
analyze Tbl08;
analyze Tbl09;
commit;
--
-- Positive cases: We should be inferring non-nullability of the not-in subquery. This should result in HLASJ.
--
-- non-nullability due to inner join
select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl05.a,Tbl05.b from Tbl05,Tbl06 where Tbl05.a=Tbl06.a and Tbl05.b < 10); -- expected: (3,4),(5,6)
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
a | b
---+---
3 | 4
5 | 6
(2 rows)
select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl07.a,Tbl07.b from Tbl07 inner join Tbl08 on (Tbl07.a=Tbl08.a and Tbl07.b=Tbl08.b) inner join i3 on (i3.a=Tbl08.a and i3.b=Tbl08.b)); -- expected:(3,4), (5,6)
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
a | b
---+---
3 | 4
5 | 6
(2 rows)
-- non-nullability due to where clause condition
select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl05.a,Tbl05.b from Tbl05 where Tbl05.a < 2 and Tbl05.b < 10); -- expected: (3,4), (5,6)
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
a | b
---+---
3 | 4
5 | 6
(2 rows)
select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl07.a,Tbl07.b from Tbl07 left join Tbl08 on (Tbl07.a=Tbl08.a) where Tbl07.a = 1 and Tbl07.b = 2); -- expected: (3,4),(5,6)
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
a | b
---+---
3 | 4
5 | 6
(2 rows)
-- not null condition in the where clause
select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl07.a,Tbl07.b from Tbl07 full outer join Tbl08 on (Tbl07.a=Tbl08.a) where Tbl07.a is not null and Tbl07.b is not null); -- (5,6)
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
a | b
---+---
5 | 6
(1 row)
select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl07.a,Tbl07.b from Tbl07 left join Tbl08 on (Tbl07.a=Tbl08.a) where Tbl07.a is not null and Tbl07.b is not null); -- (5,6)
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
a | b
---+---
5 | 6
(1 row)
-- or clauses that should lead to non-nullability
select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl05.a,Tbl05.b from Tbl05 where (Tbl05.a < 2 or Tbl05.a > 100) AND (Tbl05.b < 4 or Tbl05.b > 100)); -- expected: (3,4), (5,6)
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
a | b
---+---
3 | 4
5 | 6
(2 rows)
-- base-table constraints
select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select i3.a,i3.b from i3); -- expected: (3,4),(5,6)
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
a | b
---+---
3 | 4
5 | 6
(2 rows)
select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl05.a,Tbl05.b from Tbl05,i3 where Tbl05.a = i3.a and Tbl05.b = i3.b);
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
a | b
---+---
3 | 4
5 | 6
(2 rows)
select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl05.a,Tbl05.b from Tbl05,i3 where Tbl05.a < i3.a and Tbl05.b > i3.b);
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
a | b
---+---
3 | 4
5 | 6
1 | 2
(3 rows)
-- non-null constant values
select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select 1,2); -- (3,4),(5,6)
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
a | b
---+---
3 | 4
5 | 6
(2 rows)
select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in ((1,2));
a | b
---+---
3 | 4
5 | 6
(2 rows)
-- multiple NOT-IN expressions
select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl06.a,Tbl06.b from Tbl06) and (Tbl04.a,Tbl04.b) not in (select i3.a, i3.b from i3); -- expected: (5,6)
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
a | b
---+---
5 | 6
(1 row)
explain (costs off)
select Tbl04.* from Tbl04 where not ((Tbl04.a,Tbl04.b) in (select Tbl06.a,Tbl06.b from Tbl06) or (Tbl04.a,Tbl04.b) in (select i3.a, i3.b from i3));
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
QUERY PLAN
--------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Nested Loop Left Anti Semi (Not-In) Join
Join Filter: ((tbl04.a = i3.a) AND (tbl04.b = i3.b))
-> Nested Loop Left Anti Semi (Not-In) Join
Join Filter: ((tbl04.a = tbl06.a) AND (tbl04.b = tbl06.b))
-> Seq Scan on tbl04
-> Materialize
-> Broadcast Motion 3:3 (slice2; segments: 3)
-> Seq Scan on tbl06
-> Materialize
-> Broadcast Motion 3:3 (slice3; segments: 3)
-> Seq Scan on i3
Optimizer: Postgres query optimizer
(13 rows)
select Tbl04.* from Tbl04 where not ((Tbl04.a,Tbl04.b) in (select Tbl06.a,Tbl06.b from Tbl06) or (Tbl04.a,Tbl04.b) in (select i3.a, i3.b from i3)); -- expected: (5,6)
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
a | b
---+---
5 | 6
(1 row)
-- single column in the target list should always give a HLASJ
select Tbl04.* from Tbl04 where Tbl04.a NOT IN (select Tbl09.a from Tbl09 where Tbl09.b is null); -- (1,2) (3,4)
a | b
---+---
3 | 4
1 | 2
(2 rows)
select Tbl04.* from Tbl04 where Tbl04.a NOT IN (select i3.a from i3);
a | b
---+---
3 | 4
5 | 6
(2 rows)
select Tbl04.* from Tbl04 where Tbl04.a NOT IN (select Tbl05.a from Tbl05 left join i3 on (Tbl05.a=i3.a));
a | b
---+---
3 | 4
5 | 6
(2 rows)
--
-- Negative tests: we should not be inferring non-nullability in these cases. Therefore, we should see NLASJ.
--
-- No where clause
select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl05.a,Tbl05.b from Tbl05); -- expected: (3,4), (5,6)
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
a | b
---+---
3 | 4
5 | 6
(2 rows)
-- INDF in the where clause
select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl07.a,Tbl07.b from Tbl07,Tbl08 where Tbl07.a is not distinct from Tbl08.a and Tbl07.b is not distinct from Tbl08.b); -- no rows
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
a | b
---+---
(0 rows)
-- null conditions in the where clause
select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl07.a,Tbl07.b from Tbl07 left join Tbl08 on (Tbl07.a=Tbl08.a and Tbl07.b=Tbl08.b) where Tbl07.a is null and Tbl07.b is null); -- no rows
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
a | b
---+---
(0 rows)
select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl07.a,Tbl07.b from Tbl07 full outer join Tbl08 on (Tbl07.a=Tbl08.a and Tbl07.b=Tbl08.b) where Tbl07.a is null and Tbl07.b is null); -- no rows
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
a | b
---+---
(0 rows)
-- OR clauses that should not lead to non-nullability
select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl07.a,Tbl07.b from Tbl07,Tbl08 where Tbl07.a is not distinct from Tbl08.a or Tbl07.a=1); -- no rows
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
a | b
---+---
(0 rows)
-- values list: we don't support it yet. not worth the effort.
select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (values(1,2),(3,4)); -- (3,4),(5,6)
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
a | b
---+---
5 | 6
(1 row)
-- functions/ops in the target list of the subquery
select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select i3.a+2,i3.b+2 from i3); -- expected: (5,6)
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
a | b
---+---
1 | 2
5 | 6
(2 rows)
-- group by does not guarantee removal of nulls.
select Tbl09.a, Tbl09.b from Tbl09;
a | b
---+---
1 | 2
5 |
| 8
(3 rows)
select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl09.a,Tbl09.b from Tbl09); -- expected: (3,4)
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
a | b
---+---
3 | 4
(1 row)
select Tbl09.a, Tbl09.b from Tbl09 group by Tbl09.a, Tbl09.b;
a | b
---+---
5 |
| 8
1 | 2
(3 rows)
select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl09.a, Tbl09.b from Tbl09 group by Tbl09.a, Tbl09.b); -- expected: (3,4)
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
a | b
---+---
3 | 4
(1 row)
select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select sum(i3.b),i3.a from i3 group by i3.a); -- (1,2),(3,4),(5,6)
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
a | b
---+---
1 | 2
3 | 4
5 | 6
(3 rows)
-- infering not-nullability for only one of the columns
select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select i3.a,Tbl05.b from i3,Tbl05 where i3.a=Tbl05.a); -- (3,4),(5,6)
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
a | b
---+---
3 | 4
5 | 6
(2 rows)
select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) NOT IN (select i3.a,i3.b from Tbl07 left join i3 on (i3.a=Tbl07.a and i3.b=Tbl07.b) where i3.a > 2);
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
a | b
---+---
3 | 4
5 | 6
1 | 2
(3 rows)
--
-- Unsupported test: These tests are meant to illustrate NOT-IN subqueries we do not support
-- Started supporting since RIO
--
select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select i3.a,i3.b from i3 union select Tbl07.a, Tbl07.b from Tbl07); -- nulls in the inner side, should not return any rows
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
a | b
---+---
(0 rows)
select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select i3.a,i3.b from i3 union all select Tbl07.a, Tbl07.b from Tbl07); -- nulls in the innder side, should not return any rows
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
a | b
---+---
(0 rows)
select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select 1,2 union select 3,4); --(5,6)
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
a | b
---+---
5 | 6
(1 row)
select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select i3.a,i3.b from i3) or (Tbl04.a,Tbl04.b) not in (select Tbl07.a, Tbl07.b from Tbl07);
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
a | b
---+---
3 | 4
5 | 6
(2 rows)
-- Cases where the planner "should have" determined not-nullabitlity
select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select i3.a,i3.b from i3 left join Tbl07 on (i3.a=Tbl07.a and i3.b=Tbl07.b));
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
a | b
---+---
3 | 4
5 | 6
(2 rows)
select Tbl04.* from Tbl04 where (Tbl04.a,Tbl04.b) not in (select Tbl05.a,Tbl05.b from Tbl05 where (Tbl05.a IN (select i3.a from i3)) AND (Tbl05.b IN (select i3.b from i3)));
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
a | b
---+---
3 | 4
5 | 6
(2 rows)
-- additional queries
drop table if exists Tbl04;
create table Tbl04(x int, y int);
insert into Tbl04 values(1,2);
insert into Tbl04 values(3,4);
create table Tbl10(x int, y int);
insert into Tbl10 values(1,null);
select * from Tbl04 where (x,y) not in (select x,y from Tbl10);
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
x | y
---+---
3 | 4
(1 row)
select * from Tbl04 where (x,y) not in (select 1,y from Tbl10);
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
x | y
---+---
3 | 4
(1 row)
select * from tbl10 where y not in (select 1 where false);
x | y
---+---
1 |
(1 row)
alter table Tbl10 alter column x set not null;
select * from Tbl04 where (x,y) not in (select x,y from Tbl10);
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
x | y
---+---
3 | 4
(1 row)
begin;
create table TblText1(a text, b text);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table TblText2(a text, b text);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table TblText3(a text, b text);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into TblText1 values('rob', 'klopp');
insert into TblText1 values('florian','waas');
insert into TblText1 values('oak','barrett');
insert into TblText1 values('tushar','pednekar');
insert into TblText2 select * from TblText1;
insert into TblText3 values('florian','waas');
insert into TblText3 values('oak','barrett');
commit;
SELECT TblText1.a, TblText2.b FROM TblText1 JOIN TblText2 ON TblText1.a = TblText2.a WHERE ((NOT (TblText1.a, TblText2.b) IN (SELECT TblText3.a, TblText3.b FROM TblText3)));
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
a | b
--------+----------
tushar | pednekar
rob | klopp
(2 rows)
SELECT TblText1.a, TblText2.b FROM TblText1 JOIN TblText2 ON TblText1.a = TblText2.a WHERE (( (TblText1.a, TblText2.b) IN (SELECT TblText3.a, TblText3.b FROM TblText3)));
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-Scalar Subquery
a | b
---------+---------
florian | waas
oak | barrett
(2 rows)
--
-- Delete
--
begin;
create table TabDel1(a int, b int);
insert into TabDel1 values(1,2),(3,4),(5,6);
create table TabDel2 as select * from TabDel1;
create table TabDel3(a int, b int);
insert into TabDel3 values(1,2);
create table TabDel4(a int not null, b int not null);
insert into TabDel4 values(1,2);
commit;
explain delete from TabDel1 where TabDel1.a not in (select a from TabDel3); -- do not support this because we produce NLASJ
QUERY PLAN
---------------------------------------------------------------------------------------------------
Delete on tabdel1 (cost=0.00..862.04 rows=1 width=1)
-> Hash Left Anti Semi (Not-In) Join (cost=0.00..862.00 rows=1 width=18)
Hash Cond: (tabdel1.a = tabdel3.a)
-> Seq Scan on tabdel1 (cost=0.00..431.00 rows=1 width=18)
-> Hash (cost=431.00..431.00 rows=1 width=4)
-> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=4)
-> Seq Scan on tabdel3 (cost=0.00..431.00 rows=1 width=4)
Optimizer: Pivotal Optimizer (GPORCA)
(8 rows)
explain delete from TabDel2 where TabDel2.a not in (select a from TabDel4); -- support this
QUERY PLAN
---------------------------------------------------------------------------------------------------
Delete on tabdel2 (cost=0.00..862.03 rows=1 width=1)
-> Hash Left Anti Semi (Not-In) Join (cost=0.00..862.00 rows=1 width=18)
Hash Cond: (tabdel2.a = tabdel4.a)
-> Seq Scan on tabdel2 (cost=0.00..431.00 rows=1 width=18)
-> Hash (cost=431.00..431.00 rows=3 width=4)
-> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..431.00 rows=3 width=4)
-> Seq Scan on tabdel4 (cost=0.00..431.00 rows=1 width=4)
Optimizer: Pivotal Optimizer (GPORCA)
(8 rows)
delete from TabDel2 where TabDel2.a not in (select a from TabDel4);
select * from TabDel2;
a | b
---+---
1 | 2
(1 row)
--
-- Update
--
begin;
create table TblUp1(a int, b int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into TblUp1 values(1,2),(3,4),(5,6);
create table TblUp2 as select * from TblUp1;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry.
create table TblUp3(a int, b int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into TblUp3 values(1,2);
create table TblUp4(a int not null, b int not null);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into TblUp4 values(1,2);
commit;
-- planner does not support updates on distribution keys
update TblUp1 set a=100 where a not in (select a from TblUp3);
select * from TblUp1;
a | b
-----+---
100 | 4
100 | 6
1 | 2
(3 rows)
update TblUp2 set a=100 where a not in (select a from TblUp4);
select * from TblUp2;
a | b
-----+---
100 | 4
1 | 2
100 | 6
(3 rows)
--
-- Check for correct results for subqueries nested inside a scalar expression
--
begin;
create table subselect_tab1 (a int, b text, c int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table subselect_tab2 (a int, b int, c int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table subselect_tab3 (a int, b text, c int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into subselect_tab1 VALUES (100, 'false', 1);
insert into subselect_tab1 VALUES (200, 'true', 2);
insert into subselect_tab2 VALUES (2,2,2);
insert into subselect_tab3 VALUES (200, 'falseg', 1);
commit;
-- scalar subquery in a null test expression
select * from subselect_tab1 where (select b from subselect_tab2) is null;
a | b | c
---+---+---
(0 rows)
-- ANY subquery nested in a scalar comparison expression
select * from subselect_tab1 where b::bool = ( c = any(select c from subselect_tab2));
a | b | c
-----+-------+---
200 | true | 2
100 | false | 1
(2 rows)
-- ALL subquery deeply nested in a scalar expression
select * from subselect_tab3 where b = ( a < all(select c from subselect_tab2) || 'g');
a | b | c
-----+--------+---
200 | falseg | 1
(1 row)
-- EXISTS subquery nested in a boolean expression
select * from subselect_tab1 where b::bool = (exists(select c from subselect_tab2) and not exists (select c from subselect_tab3));
a | b | c
-----+-------+---
100 | false | 1
(1 row)
-- ALL and EXISTS nested in a CASE-WHEN-THEN expression
select * from subselect_tab1 where case when b is not null then (subselect_tab1.c < all(select c from subselect_tab2 where exists (select * from subselect_tab3))) else false end;
a | b | c
-----+-------+---
100 | false | 1
(1 row)
-- EXISTS subquery nested in a scalar comparison expression
select * from subselect_tab1 where b::bool = exists(select c from subselect_tab2);
a | b | c
-----+------+---
200 | true | 2
(1 row)
-- a few more complex combinations..
SELECT * FROM subselect_tab3 WHERE (EXISTS(SELECT c FROM subselect_tab2) AND NOT EXISTS (SELECT c from subselect_tab3)) IN (SELECT b::BOOL from subselect_tab1);
a | b | c
-----+--------+---
200 | falseg | 1
(1 row)
SELECT * FROM subselect_tab3 WHERE (NOT EXISTS(SELECT c FROM subselect_tab2)) IN (SELECT b::boolean from subselect_tab1);
a | b | c
-----+--------+---
200 | falseg | 1
(1 row)
-- Test to verify that planner for subqueries, generates different copy of SubPlans referring to the same initplan
-- and does not Assert on the subplan's being same
create table append_rel(att1 int, att2 int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'att1' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table append_rel1(att3 int) INHERITS (append_rel);
NOTICE: table has parent, setting distribution columns to match parent table
create table append_rel2(att4 int) INHERITS(append_rel);
NOTICE: table has parent, setting distribution columns to match parent table
insert into append_rel values(1,10),(2,20),(3,30);
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Inherited tables
analyze append_rel;
explain with test as (select * from (select * from append_rel) p where att1 in (select att1 from append_rel where att2 >= 19) ) select att2 from append_rel where att1 in (select att1 from test where att2 <= 21);
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Inherited tables
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=2296.15..4165.55 rows=51935 width=4)
-> Hash Join (cost=2296.15..3473.08 rows=17312 width=4)
Hash Cond: (append_rel.att1 = test.att1)
-> Append (cost=0.00..848.02 rows=51934 width=8)
-> Seq Scan on append_rel append_rel_1 (cost=0.00..1.01 rows=1 width=8)
-> Seq Scan on append_rel1 append_rel_2 (cost=0.00..293.67 rows=25967 width=8)
-> Seq Scan on append_rel2 append_rel_3 (cost=0.00..293.67 rows=25967 width=8)
-> Hash (cost=2283.65..2283.65 rows=1000 width=4)
-> HashAggregate (cost=2280.31..2283.65 rows=1000 width=4)
Group Key: test.att1
-> Subquery Scan on test (cost=1021.14..2063.92 rows=17312 width=4)
-> Hash Semi Join (cost=1021.14..2063.92 rows=17312 width=8)
Hash Cond: (append_rel_4.att1 = append_rel_5.att1)
-> Append (cost=0.00..804.74 rows=17312 width=8)
-> Seq Scan on append_rel append_rel_6 (cost=0.00..1.01 rows=1 width=8)
Filter: (att2 <= 21)
-> Seq Scan on append_rel1 append_rel_7 (cost=0.00..358.58 rows=8656 width=8)
Filter: (att2 <= 21)
-> Seq Scan on append_rel2 append_rel_8 (cost=0.00..358.58 rows=8656 width=8)
Filter: (att2 <= 21)
-> Hash (cost=804.74..804.74 rows=17312 width=4)
-> Append (cost=0.00..804.74 rows=17312 width=4)
-> Seq Scan on append_rel append_rel_9 (cost=0.00..1.01 rows=1 width=4)
Filter: (att2 >= 19)
-> Seq Scan on append_rel1 append_rel_10 (cost=0.00..358.58 rows=8656 width=4)
Filter: (att2 >= 19)
-> Seq Scan on append_rel2 append_rel_11 (cost=0.00..358.58 rows=8656 width=4)
Filter: (att2 >= 19)
Optimizer: Postgres query optimizer
(29 rows)
with test as (select * from (select * from append_rel) p where att1 in (select att1 from append_rel where att2 >= 19) ) select att2 from append_rel where att1 in (select att1 from test where att2 <= 21);
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Inherited tables
att2
------
20
(1 row)
-- Check correct results for subqueries in the target list
begin;
drop table if exists temp_a, temp_b, temp_c;
NOTICE: table "temp_a" does not exist, skipping
NOTICE: table "temp_b" does not exist, skipping
NOTICE: table "temp_c" does not exist, skipping
create table temp_a (a int ,b int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table temp_b (b int ,c int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'b' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table temp_c (c int, d int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into temp_a values (1,2), (2,3), (3,4), (4,7), (5,19), (6,13), (7,23), (7,17);
insert into temp_b values (1,2), (2,2), (3,2), (4,2), (5,3), (6,3), (7,3), (8,3), (10,4);
insert into temp_c values (NULL, 2), (2, 2), (4, NULL), (NULL, 3), (1, 3), (8, NULL), (7, 2), (NULL, NULL);
commit;
select sum(case when b in (select b from temp_b where t.a>c) then 1 else 0 end),sum(case when not( b in (select b from temp_b where t.a>c)) then 1 else 0 end) from temp_a t;
sum | sum
-----+-----
2 | 6
(1 row)
select sum(case when b in (select b from temp_b where EXISTS (select sum(d) from temp_c where t.a > d)) then 1 else 0 end),sum(case when not( b in (select b from temp_b where t.a>c)) then 1 else 0 end) from temp_a t;
sum | sum
-----+-----
4 | 6
(1 row)
select sum(case when b in (select b from temp_b where EXISTS (select sum(d) from temp_c where t.a > d or t.a > temp_b.c)) then 1 else 0 end),sum(case when not( b in (select b from temp_b, temp_c where t.a>temp_b.c or t.a > temp_c.d)) then 1 else 0 end) from temp_a t;
sum | sum
-----+-----
4 | 6
(1 row)
-- Check that predicate with set-returning function is not pushed down
create table table_with_array_column (an_array_column double precision[]);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'an_array_column' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into table_with_array_column values (array[1.1, 2.2]);
explain (costs off)
select *
from (
select unnest(t1.an_array_column) unnested_array_column
from table_with_array_column t1, table_with_array_column t2) zz
where unnested_array_column is not null;
QUERY PLAN
---------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Result
Filter: (NOT ((unnest(t1.an_array_column)) IS NULL))
-> ProjectSet
-> Nested Loop
Join Filter: true
-> Seq Scan on table_with_array_column t1
-> Materialize
-> Broadcast Motion 3:3 (slice2; segments: 3)
-> Seq Scan on table_with_array_column t2
Optimizer: Pivotal Optimizer (GPORCA)
(11 rows)
select *
from (
select unnest(t1.an_array_column) unnested_array_column
from table_with_array_column t1, table_with_array_column t2) zz
where unnested_array_column is not null;
unnested_array_column
-----------------------
1.1
2.2
(2 rows)
-- check that predicate is not pushed through a projected non-correlated subquery
create table subquery_nonpush_through_1(a int, b int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table subquery_nonpush_through_2(a int, b int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
explain (costs off)
select *
from(
select (subquery_nonpush_through_1.a in (select a from subquery_nonpush_through_2))::text as xx, subquery_nonpush_through_1.b
from subquery_nonpush_through_1,subquery_nonpush_through_2) t
where xx='dd';
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Result
Filter: ((((hashed SubPlan 1))::text) = 'dd'::text)
-> Nested Loop
Join Filter: true
-> Seq Scan on subquery_nonpush_through_1
-> Materialize
-> Broadcast Motion 3:3 (slice2; segments: 3)
-> Seq Scan on subquery_nonpush_through_2
SubPlan 1
-> Result
-> Result
-> Materialize
-> Broadcast Motion 3:3 (slice3; segments: 3)
-> Seq Scan on subquery_nonpush_through_2 subquery_nonpush_through_2_1
Optimizer: Pivotal Optimizer (GPORCA)
(19 rows)
select *
from(
select (subquery_nonpush_through_1.a in (select a from subquery_nonpush_through_2))::text as xx, subquery_nonpush_through_1.b
from subquery_nonpush_through_1,subquery_nonpush_through_2) t
where xx='dd';
xx | b
----+---
(0 rows)
-- Ensure we produce a hashed subplan when there are no outer references
CREATE TABLE a1 AS (
SELECT * FROM generate_series(1, 5) AS a1)
WITH data distributed replicated;
CREATE TABLE a2 AS (
SELECT * FROM generate_series(1, 10) AS a1)
WITH data distributed BY (a1);
CREATE TABLE a3 AS (
SELECT a1, row_to_json(a2) AS rj FROM a2)
WITH data distributed BY (a1);
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Whole-row variable
-- explain "verbose" is needed to show that the subplan is hashed
explain (verbose, costs off) select a1,case when a2 in (select a1::text from a1 where a1 is not null) then 'true' else 'false' end as checkcol
from (
select a1,rj->>'a1'::text as a2
from a3
)t;
QUERY PLAN
----------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
Output: a3.a1, (CASE WHEN (hashed SubPlan 1) THEN 'true'::text ELSE 'false'::text END)
-> Seq Scan on qp_subquery.a3
Output: a3.a1, CASE WHEN (hashed SubPlan 1) THEN 'true'::text ELSE 'false'::text END
SubPlan 1
-> Result
Output: ((a1.a1)::text)
-> Result
Output: (a1.a1)::text, true
-> Seq Scan on qp_subquery.a1
Output: a1.a1
Filter: (NOT (a1.a1 IS NULL))
Optimizer: GPORCA
(13 rows)
select a1,case when a2 in (select a1::text from a1 where a1 is not null) then 'true' else 'false' end as checkcol
from (
select a1,rj->>'a1'::text as a2
from a3
)t;
a1 | checkcol
----+----------
2 | true
3 | true
4 | true
7 | false
8 | false
1 | true
5 | true
6 | false
9 | false
10 | false
(10 rows)
-- check various [NOT] EXISTS subqueries on materialized views
create table t (a int, b int) distributed by (a);
insert into t values (1, 1), (2, NULL), (NULL, 3);
create materialized view v as select a, b from t distributed randomly;
select * from v where exists (select a from v);
a | b
---+---
2 |
| 3
1 | 1
(3 rows)
select * from v where exists (select a from v limit 0);
a | b
---+---
(0 rows)
select * from v where exists (select a from v where a=2);
a | b
---+---
2 |
| 3
1 | 1
(3 rows)
select * from v where exists (select a from v where a<>2);
a | b
---+---
2 |
| 3
1 | 1
(3 rows)
select * from v where not exists (select a from v);
a | b
---+---
(0 rows)
select * from v where not exists (select a from v limit 0);
a | b
---+---
2 |
| 3
1 | 1
(3 rows)
select * from v where not exists (select a from v where a=2);
a | b
---+---
(0 rows)
select * from v where not exists (select a from v where a<>2);
a | b
---+---
(0 rows)
select * from v where exists (select b from v);
a | b
---+---
2 |
| 3
1 | 1
(3 rows)
select * from v where exists (select b from v limit 0);
a | b
---+---
(0 rows)
select * from v where exists (select b from v where b=2);
a | b
---+---
(0 rows)
select * from v where exists (select b from v where b<>2);
a | b
---+---
2 |
| 3
1 | 1
(3 rows)
select * from v where not exists (select b from v);
a | b
---+---
(0 rows)
select * from v where not exists (select b from v limit 0);
a | b
---+---
2 |
| 3
1 | 1
(3 rows)
select * from v where not exists (select b from v where b=2);
a | b
---+---
2 |
| 3
1 | 1
(3 rows)
select * from v where not exists (select b from v where b<>2);
a | b
---+---
(0 rows)
-- Check that a query having pattern of Select-Project-NaryJoin,
-- also containing a Select predicate condition with the same pattern nested in a subquery runs
CREATE TABLE tab1(a TEXT, b TEXT) DISTRIBUTED RANDOMLY;
INSERT INTO tab1 SELECT i,i FROM GENERATE_SERIES(1,3)i;
SELECT * FROM (SELECT BTRIM(p1.b) AS param FROM tab1 p1 JOIN tab1 p2 USING(a)) t1
WHERE EXISTS
(SELECT 1 FROM
(SELECT BTRIM(p1.b) AS param FROM tab1 p1 JOIN tab1 p2 USING(a)) t2
WHERE t2.param = t1.param);
param
-------
3
1
2
(3 rows)
EXPLAIN (COSTS OFF) SELECT * FROM (SELECT BTRIM(p1.b) AS param FROM tab1 p1 JOIN tab1 p2 USING(a)) t1
WHERE EXISTS
(SELECT 1 FROM
(SELECT BTRIM(p1.b) AS param FROM tab1 p1 JOIN tab1 p2 USING(a)) t2
WHERE t2.param = t1.param);
QUERY PLAN
---------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Hash Semi Join
Hash Cond: (btrim(p1.b) = btrim(p1_1.b))
-> Hash Join
Hash Cond: (p1.a = p2.a)
-> Seq Scan on tab1 p1
Filter: (NOT (btrim(b) IS NULL))
-> Hash
-> Broadcast Motion 3:3 (slice2; segments: 3)
-> Seq Scan on tab1 p2
-> Hash
-> Broadcast Motion 3:3 (slice3; segments: 3)
-> Hash Join
Hash Cond: (p1_1.a = p2_1.a)
-> Seq Scan on tab1 p1_1
-> Hash
-> Broadcast Motion 3:3 (slice4; segments: 3)
-> Seq Scan on tab1 p2_1
Optimizer: GPORCA
(19 rows)
-- Check that a query having pattern of Select-Project-NaryJoin,
-- also containing a Select predicate condition with the same pattern nested in a subquery runs when subplan is enforced
SET optimizer_enforce_subplans TO on;
SELECT * FROM (SELECT BTRIM(p1.b) AS param FROM tab1 p1 JOIN tab1 p2 USING(a)) t1
WHERE EXISTS
(SELECT 1 FROM
(SELECT BTRIM(p1.b) AS param FROM tab1 p1 JOIN tab1 p2 USING(a)) t2
WHERE t2.param = t1.param);
param
-------
2
1
3
(3 rows)
EXPLAIN (COSTS OFF) SELECT * FROM (SELECT BTRIM(p1.b) AS param FROM tab1 p1 JOIN tab1 p2 USING(a)) t1
WHERE EXISTS
(SELECT 1 FROM
(SELECT BTRIM(p1.b) AS param FROM tab1 p1 JOIN tab1 p2 USING(a)) t2
WHERE t2.param = t1.param);
QUERY PLAN
-----------------------------------------------------------------------------------------
Hash Join
Hash Cond: (p1.a = p2_1.a)
-> Result
Filter: (SubPlan 1)
-> Gather Motion 3:1 (slice1; segments: 3)
-> Seq Scan on tab1 p1
Filter: (NOT (btrim(b) IS NULL))
SubPlan 1
-> Result
Filter: (btrim(p1_1.b) = btrim(p1.b))
-> Materialize
-> Gather Motion 3:1 (slice2; segments: 3)
-> Hash Join
Hash Cond: (p1_1.a = p2.a)
-> Seq Scan on tab1 p1_1
-> Hash
-> Broadcast Motion 3:3 (slice3; segments: 3)
-> Seq Scan on tab1 p2
-> Hash
-> Gather Motion 3:1 (slice4; segments: 3)
-> Seq Scan on tab1 p2_1
Optimizer: GPORCA
(22 rows)
reset optimizer_enforce_subplans;
set client_min_messages='warning';
drop schema qp_subquery cascade;
reset optimizer_trace_fallback;