blob: 5b1688fe00ad774a822e017f637ec613ff3b9f60 [file] [log] [blame]
--
-- NOTIN
-- Test NOTIN clauses
--
create schema notin;
set search_path=notin;
--
-- generate a bunch of tables
--
create table t1 (
c1 integer
);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table t2 (
c2 integer
);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c2' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table t3 (
c3 integer
);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c3' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table t4 (
c4 integer
);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c4' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table t1n (
c1n integer
);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1n' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table g1 (
a integer,
b integer,
c integer
);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table l1 (
w integer,
x integer,
y integer,
z integer
);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'w' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
--
-- stick in some values
--
insert into t1 values (generate_series (1,10));
insert into t2 values (generate_series (1,5));
insert into t3 values (1), (2), (3);
insert into t4 values (1), (2);
insert into t1n values (1), (2), (3), (null), (5), (6), (7);
insert into g1 values
(1,1,1),
(1,1,2),
(1,2,2),
(2,2,2),
(2,2,3),
(2,3,3),
(3,3,3),
(3,3,3),
(3,3,4),
(3,4,4),
(4,4,4);
insert into l1 values (generate_series (1,10), generate_series (1,10), generate_series (1,10), generate_series (1,10));
analyze t1;
analyze t2;
analyze t3;
analyze t4;
analyze t1n;
analyze g1;
analyze l1;
--
-- queries
--
--
--q1
--
explain select c1 from t1 where c1 not in
(select c2 from t2);
QUERY PLAN
-------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=2.44..5.63 rows=4 width=4)
-> Hash Left Anti Semi (Not-In) Join (cost=2.44..5.63 rows=2 width=4)
Hash Cond: t1.c1 = t2.c2
-> Seq Scan on t1 (cost=0.00..3.10 rows=4 width=4)
-> Hash (cost=2.25..2.25 rows=5 width=4)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..2.25 rows=5 width=4)
-> Seq Scan on t2 (cost=0.00..2.05 rows=2 width=4)
Optimizer status: Postgres query optimizer
(8 rows)
select c1 from t1 where c1 not in
(select c2 from t2);
c1
----
6
8
10
7
9
(5 rows)
--
--q2
--
explain select c1 from t1 where c1 not in
(select c2 from t2 where c2 > 2 and c2 not in
(select c3 from t3));
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=2.21..3.31 rows=3 width=4)
-> Hash Left Anti Semi (Not-In) Join (cost=2.21..3.26 rows=1 width=4)
Hash Cond: (t1.c1 = t2.c2)
-> Seq Scan on t1 (cost=0.00..1.03 rows=3 width=4)
-> Hash (cost=2.17..2.17 rows=3 width=4)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=1.09..2.17 rows=3 width=4)
-> Hash Left Anti Semi (Not-In) Join (cost=1.09..2.12 rows=1 width=4)
Hash Cond: (t2.c2 = t3.c3)
-> Seq Scan on t2 (cost=0.00..1.02 rows=1 width=4)
Filter: (c2 > 2)
-> Hash (cost=1.05..1.05 rows=3 width=4)
-> Broadcast Motion 3:3 (slice3; segments: 3) (cost=0.00..1.05 rows=3 width=4)
-> Seq Scan on t3 (cost=0.00..1.01 rows=1 width=4)
Optimizer: Postgres query optimizer
(14 rows)
select c1 from t1 where c1 not in
(select c2 from t2 where c2 > 2 and c2 not in
(select c3 from t3));
c1
----
1
3
7
9
2
6
8
10
(8 rows)
--
--q3
--
explain select c1 from t1 where c1 not in
(select c2 from t2 where c2 not in
(select c3 from t3 where c3 not in
(select c4 from t4)));
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=5.90..9.08 rows=4 width=4)
-> Hash Left Anti Semi (Not-In) Join (cost=5.90..9.08 rows=2 width=4)
Hash Cond: t1.c1 = t2.c2
-> Seq Scan on t1 (cost=0.00..3.10 rows=4 width=4)
-> Hash (cost=5.77..5.77 rows=4 width=4)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=3.52..5.77 rows=4 width=4)
-> Hash Left Anti Semi (Not-In) Join (cost=3.52..5.64 rows=2 width=4)
Hash Cond: t2.c2 = t3.c3
-> Seq Scan on t2 (cost=0.00..2.05 rows=2 width=4)
-> Hash (cost=3.39..3.39 rows=4 width=4)
-> Broadcast Motion 3:3 (slice3; segments: 3) (cost=1.18..3.39 rows=4 width=4)
-> Hash Left Anti Semi (Not-In) Join (cost=1.18..3.26 rows=2 width=4)
Hash Cond: t3.c3 = t4.c4
-> Seq Scan on t3 (cost=0.00..2.03 rows=1 width=4)
-> Hash (cost=1.10..1.10 rows=2 width=4)
-> Broadcast Motion 3:3 (slice4; segments: 3) (cost=0.00..1.10 rows=2 width=4)
-> Seq Scan on t4 (cost=0.00..1.02 rows=1 width=4)
Optimizer status: Postgres query optimizer
(18 rows)
select c1 from t1 where c1 not in
(select c2 from t2 where c2 not in
(select c3 from t3 where c3 not in
(select c4 from t4)));
c1
----
6
8
10
3
7
9
(6 rows)
--
--q4
--
explain select c1 from t1,
(select c2 from t2 where c2 not in
(select c3 from t3)) foo
where c1 = foo.c2;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=1.14..2.24 rows=3 width=4)
-> Hash Join (cost=1.14..2.19 rows=1 width=4)
Hash Cond: (t1.c1 = t2.c2)
-> Seq Scan on t1 (cost=0.00..1.03 rows=3 width=4)
-> Hash (cost=1.12..1.12 rows=1 width=4)
-> Hash Left Anti Semi (Not-In) Join (cost=0.09..1.12 rows=1 width=4)
Hash Cond: (t2.c2 = t3.c3)
-> Seq Scan on t2 (cost=0.00..1.02 rows=2 width=4)
-> Hash (cost=0.05..0.05 rows=3 width=4)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..0.05 rows=3 width=4)
-> Seq Scan on t3 (cost=0.00..0.01 rows=1 width=4)
Optimizer: Postgres query optimizer
(12 rows)
select c1 from t1,
(select c2 from t2 where c2 not in
(select c3 from t3)) foo
where c1 = foo.c2;
c1
----
5
4
(2 rows)
--
--q5
--
explain select c1 from t1,
(select c2 from t2 where c2 not in
(select c3 from t3) and c2 > 4) foo
where c1 = foo.c2;
QUERY PLAN
-------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=5.34..8.58 rows=4 width=4)
-> Hash Left Anti Semi (Not-In) Join (cost=5.34..8.58 rows=2 width=4)
Hash Cond: (t2.c2 = t3.c3)
-> Hash Join (cost=3.08..6.26 rows=2 width=8)
Hash Cond: (t1.c1 = t2.c2)
-> Seq Scan on t1 (cost=0.00..3.12 rows=3 width=4)
Filter: (c1 > 4)
-> Hash (cost=3.06..3.06 rows=1 width=4)
-> Seq Scan on t2 (cost=0.00..3.06 rows=1 width=4)
Filter: (c2 > 4)
-> Hash (cost=2.15..2.15 rows=3 width=4)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..2.15 rows=3 width=4)
-> Seq Scan on t3 (cost=0.00..2.03 rows=1 width=4)
Optimizer: Postgres query optimizer
(14 rows)
select c1 from t1,
(select c2 from t2 where c2 not in
(select c3 from t3) and c2 > 4) foo
where c1 = foo.c2;
c1
----
5
(1 row)
--
--q6
--
explain select c1 from t1 where c1 not in
(select c2 from t2) and c1 > 1;
QUERY PLAN
-------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=2.44..5.66 rows=4 width=4)
-> Hash Left Anti Semi (Not-In) Join (cost=2.44..5.66 rows=2 width=4)
Hash Cond: t1.c1 = t2.c2
-> Seq Scan on t1 (cost=0.00..3.12 rows=4 width=4)
Filter: c1 > 1
-> Hash (cost=2.25..2.25 rows=5 width=4)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..2.25 rows=5 width=4)
-> Seq Scan on t2 (cost=0.00..2.05 rows=2 width=4)
Optimizer status: Postgres query optimizer
(9 rows)
select c1 from t1 where c1 not in
(select c2 from t2) and c1 > 1;
c1
----
7
9
6
8
10
(5 rows)
--
--q7
--
explain select c1 from t1 where c1 > 6 and c1 not in
(select c2 from t2) and c1 < 10;
QUERY PLAN
-------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=2.44..5.65 rows=4 width=4)
-> Hash Left Anti Semi (Not-In) Join (cost=2.44..5.65 rows=2 width=4)
Hash Cond: t1.c1 = t2.c2
-> Seq Scan on t1 (cost=0.00..3.15 rows=2 width=4)
Filter: c1 > 6 AND c1 < 10
-> Hash (cost=2.25..2.25 rows=5 width=4)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..2.25 rows=5 width=4)
-> Seq Scan on t2 (cost=0.00..2.05 rows=2 width=4)
Optimizer status: Postgres query optimizer
(9 rows)
select c1 from t1 where c1 > 6 and c1 not in
(select c2 from t2) and c1 < 10;
c1
----
7
9
8
(3 rows)
--
--q8 introduce join
--
explain select c1 from t1,t2 where c1 not in
(select c3 from t3) and c1 = c2;
QUERY PLAN
-------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=2.12..3.25 rows=4 width=4)
-> Hash Left Anti Semi (Not-In) Join (cost=2.12..3.20 rows=1 width=4)
Hash Cond: (t1.c1 = t3.c3)
-> Hash Join (cost=1.04..2.10 rows=2 width=4)
Hash Cond: (t1.c1 = t2.c2)
-> Seq Scan on t1 (cost=0.00..1.03 rows=3 width=4)
-> Hash (cost=1.02..1.02 rows=2 width=4)
-> Seq Scan on t2 (cost=0.00..1.02 rows=2 width=4)
-> Hash (cost=1.05..1.05 rows=3 width=4)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..1.05 rows=3 width=4)
-> Seq Scan on t3 (cost=0.00..1.01 rows=1 width=4)
Optimizer: Postgres query optimizer
(12 rows)
select c1 from t1,t2 where c1 not in
(select c3 from t3) and c1 = c2;
c1
----
4
5
(2 rows)
--
--q9
--
select c1 from t1 where c1 not in
(select c2 from t2 where c2 > 2 and c2 < 5);
c1
----
1
5
7
9
2
6
8
10
(8 rows)
--
--q10
--
select count(c1) from t1 where c1 not in
(select sum(c2) from t2);
count
-------
10
(1 row)
--
--q11
--
select c1 from t1 where c1 not in
(select count(*) from t1);
c1
----
1
3
5
7
9
2
4
6
8
(9 rows)
--
--q12
--
select a,b from g1 where (a,b) not in
(select a,b from g1);
a | b
---+---
(0 rows)
--
--q13
--
explain select x,y from l1 where (x,y) not in
(select distinct y, sum(x) from l1 group by y having y < 4 order by y) order by 1,2;
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice3; segments: 3) (cost=10000000008.02..10000000008.03 rows=4 width=8)
Merge Key: l1.x, l1.y
-> Sort (cost=10000000002.35..10000000002.36 rows=1 width=8)
Sort Key: l1.x, l1.y
-> Nested Loop Left Anti Semi (Not-In) Join (cost=10000000003.25..10000000007.99 rows=2 width=8)
Join Filter: ((l1.x = "NotIn_SUBQUERY".y) AND (l1.y = "NotIn_SUBQUERY".sum))
-> Seq Scan on l1 (cost=0.00..3.10 rows=4 width=8)
-> Materialize (cost=3.25..3.47 rows=3 width=12)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=3.25..3.43 rows=3 width=12)
-> Subquery Scan on "NotIn_SUBQUERY" (cost=3.25..3.31 rows=1 width=12)
-> HashAggregate (cost=3.25..3.28 rows=1 width=16)
Group Key: l1_1.y
-> Redistribute Motion 3:3 (slice1; segments: 3) (cost=3.14..3.20 rows=1 width=12)
Hash Key: l1_1.y
-> Seq Scan on l1 l1_1 (cost=0.00..3.12 rows=2 width=8)
Filter: (y < 4)
Optimizer: Postgres query optimizer
(17 rows)
select x,y from l1 where (x,y) not in
(select distinct y, sum(x) from l1 group by y having y < 4 order by y) order by 1,2;
x | y
----+----
4 | 4
5 | 5
6 | 6
7 | 7
8 | 8
9 | 9
10 | 10
(7 rows)
--
--q14
--
explain select * from g1 where (a,b,c) not in
(select x,y,z from l1);
QUERY PLAN
---------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=3.53..14.71 rows=11 width=12)
-> Nested Loop Left Anti Semi (Not-In) Join (cost=3.53..14.71 rows=4 width=12)
Join Filter: g1.a = l1.x AND g1.b = l1.y AND g1.c = l1.z
-> Seq Scan on g1 (cost=0.00..2.11 rows=4 width=12)
-> Materialize (cost=3.53..3.83 rows=10 width=12)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..3.50 rows=10 width=12)
-> Seq Scan on l1 (cost=0.00..3.10 rows=4 width=12)
Optimizer status: Postgres query optimizer
(8 rows)
select * from g1 where (a,b,c) not in
(select x,y,z from l1);
a | b | c
---+---+---
1 | 1 | 2
1 | 2 | 2
3 | 3 | 4
3 | 4 | 4
2 | 2 | 3
2 | 3 | 3
(6 rows)
--
--q15
--
explain select c1 from t1, t2 where c1 not in
(select c3 from t3 where c3 = c1) and c1 = c2;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=2.11..15.49 rows=4 width=4)
-> Hash Join (cost=2.11..15.49 rows=2 width=4)
Hash Cond: t1.c1 = t2.c2
-> Seq Scan on t1 (cost=0.00..13.32 rows=2 width=4)
Filter: (subplan)
SubPlan 1
-> Result (cost=2.04..2.05 rows=1 width=4)
Filter: t3.c3 = $0
-> Materialize (cost=2.04..2.05 rows=1 width=4)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..2.04 rows=1 width=4)
-> Seq Scan on t3 (cost=0.00..2.04 rows=1 width=4)
-> Hash (cost=2.05..2.05 rows=2 width=4)
-> Seq Scan on t2 (cost=0.00..2.05 rows=2 width=4)
Settings: optimizer=off
Optimizer status: Postgres query optimizer
(15 rows)
select c1 from t1, t2 where c1 not in
(select c3 from t3 where c3 = c1) and c1 = c2;
c1
----
5
4
(2 rows)
--
--q17
-- null test
--
select c1 from t1 where c1 not in
(select c1n from t1n);
c1
----
(0 rows)
--
--q18
-- null test
--
select c1 from t1 where c1 not in
(select c2 from t2 where c2 not in
(select c3 from t3 where c3 not in
(select c1n from t1n)));
c1
----
7
9
6
8
10
(5 rows)
--
--q19
--
select c1 from t1 join t2 on c1 = c2 where c1 not in
(select c3 from t3);
c1
----
5
4
(2 rows)
--
--q20
--
explain select c1 from t1 where c1 not in
(select sum(c2) as s from t2 where c2 > 2 group by c2 having c2 > 3);
QUERY PLAN
-------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=1.13..2.34 rows=10 width=4)
-> Hash Left Anti Semi (Not-In) Join (cost=1.13..2.20 rows=3 width=4)
Hash Cond: (t1.c1 = "NotIn_SUBQUERY".s)
-> Seq Scan on t1 (cost=0.00..1.03 rows=3 width=4)
-> Hash (cost=1.09..1.09 rows=3 width=8)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=1.03..1.09 rows=3 width=8)
-> Subquery Scan on "NotIn_SUBQUERY" (cost=1.03..1.05 rows=1 width=8)
-> HashAggregate (cost=1.03..1.04 rows=1 width=16)
Group Key: t2.c2
-> Seq Scan on t2 (cost=0.00..1.02 rows=1 width=4)
Filter: ((c2 > 2) AND (c2 > 3))
Optimizer: Postgres query optimizer
(12 rows)
select c1 from t1 where c1 not in
(select sum(c2) as s from t2 where c2 > 2 group by c2 having c2 > 3);
c1
----
1
3
7
9
2
6
8
10
(8 rows)
--
--q21
-- multiple not in in where clause
--
select c1 from t1 where c1 not in
(select c2 from t2) and c1 not in
(select c3 from t3);
c1
----
7
9
6
8
10
(5 rows)
--
--q22
-- coexist with joins
--
select c1 from t1,t3,t2 where c1 not in
(select c4 from t4) and c1 = c3 and c1 = c2;
c1
----
3
(1 row)
--
--q23
-- union in subselect
--
select c1 from t1 where c1 not in
(select c2 from t2 union select c3 from t3);
c1
----
6
8
10
7
9
(5 rows)
--
--q24
--
select c1 from t1 where c1 not in
(select c2 from t2 union all select c3 from t3);
c1
----
7
9
6
8
10
(5 rows)
--
--q25
--
select c1 from t1 where c1 not in
(select (case when c1n is null then 1 else c1n end) as c1n from t1n);
c1
----
9
4
8
10
(4 rows)
--
--q26
--
explain select (case when c1%2 = 0
then (select sum(c2) from t2 where c2 not in (select c3 from t3))
else (select sum(c3) from t3 where c3 not in (select c4 from t4)) end) as foo from t1;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=1.33..2.51 rows=10 width=8)
InitPlan 1 (returns $0) (slice2)
-> Aggregate (cost=1.17..1.18 rows=1 width=8)
-> Gather Motion 3:1 (slice3; segments: 3) (cost=0.09..1.17 rows=3 width=4)
-> Hash Left Anti Semi (Not-In) Join (cost=0.09..1.12 rows=1 width=4)
Hash Cond: (t2.c2 = t3.c3)
-> Seq Scan on t2 (cost=0.00..1.02 rows=2 width=4)
-> Hash (cost=0.05..0.05 rows=3 width=4)
-> Broadcast Motion 3:3 (slice4; segments: 3) (cost=0.00..0.05 rows=3 width=4)
-> Seq Scan on t3 (cost=0.00..0.01 rows=1 width=4)
InitPlan 2 (returns $1) (slice5)
-> Aggregate (cost=0.14..0.15 rows=1 width=8)
-> Gather Motion 3:1 (slice6; segments: 3) (cost=0.06..0.13 rows=3 width=4)
-> Hash Left Anti Semi (Not-In) Join (cost=0.06..0.08 rows=1 width=4)
Hash Cond: (t3_1.c3 = t4.c4)
-> Seq Scan on t3 t3_1 (cost=0.00..0.01 rows=1 width=4)
-> Hash (cost=0.03..0.03 rows=2 width=4)
-> Broadcast Motion 3:3 (slice7; segments: 3) (cost=0.00..0.03 rows=2 width=4)
-> Seq Scan on t4 (cost=0.00..0.01 rows=1 width=4)
-> Seq Scan on t1 (cost=1.33..2.38 rows=3 width=8)
Optimizer: Postgres query optimizer
(21 rows)
select (case when c1%2 = 0
then (select sum(c2) from t2 where c2 not in (select c3 from t3))
else (select sum(c3) from t3 where c3 not in (select c4 from t4)) end) as foo from t1;
foo
-----
9
9
9
9
9
3
3
3
3
3
(10 rows)
--
--q27
--
explain select c1 from t1 where not c1 >= some (select c2 from t2);
QUERY PLAN
-------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=2.27..8.74 rows=4 width=4)
-> Nested Loop Left Anti Semi (Not-In) Join (cost=2.27..8.74 rows=2 width=4)
Join Filter: t1.c1 >= t2.c2
-> Seq Scan on t1 (cost=0.00..3.10 rows=4 width=4)
-> Materialize (cost=2.27..2.42 rows=5 width=4)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..2.25 rows=5 width=4)
-> Seq Scan on t2 (cost=0.00..2.05 rows=2 width=4)
Optimizer status: Postgres query optimizer
(8 rows)
select c1 from t1 where not c1 >= some (select c2 from t2);
c1
----
(0 rows)
--
--q28
--
explain select c2 from t2 where not c2 < all (select c2 from t2);
QUERY PLAN
-------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=10000000000.00..10000000007.10 rows=4 width=4)
-> Nested Loop Semi Join (cost=10000000000.00..10000000007.10 rows=2 width=4)
Join Filter: t2.c2 >= t2_1.c2
-> Seq Scan on t2 (cost=0.00..3.05 rows=2 width=4)
-> Materialize (cost=0.00..3.33 rows=5 width=4)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..3.25 rows=5 width=4)
-> Seq Scan on t2 t2_1 (cost=0.00..3.05 rows=2 width=4)
Optimizer: Postgres query optimizer
(8 rows)
select c2 from t2 where not c2 < all (select c2 from t2);
c2
----
2
4
1
3
5
(5 rows)
--
--q29
--
explain select c3 from t3 where not c3 <> any (select c4 from t4);
QUERY PLAN
-------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=1.11..3.54 rows=4 width=4)
-> Nested Loop Left Anti Semi (Not-In) Join (cost=1.11..3.54 rows=2 width=4)
Join Filter: t3.c3 <> t4.c4
-> Seq Scan on t3 (cost=0.00..2.03 rows=1 width=4)
-> Materialize (cost=1.11..1.17 rows=2 width=4)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..1.10 rows=2 width=4)
-> Seq Scan on t4 (cost=0.00..1.02 rows=1 width=4)
Optimizer status: Postgres query optimizer
(8 rows)
select c3 from t3 where not c3 <> any (select c4 from t4);
c3
----
(0 rows)
--
--q31
--
explain select c1 from t1 where c1 not in (select c2 from t2 order by c2 limit 3) order by c1;
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=5.65..5.66 rows=4 width=4)
Merge Key: t1.c1
-> Sort (cost=5.65..5.66 rows=2 width=4)
Sort Key: t1.c1
-> Hash Left Anti Semi (Not-In) Join (cost=2.44..5.62 rows=2 width=4)
Hash Cond: t1.c1 = "NotIn_SUBQUERY".c2
-> Seq Scan on t1 (cost=0.00..3.10 rows=4 width=4)
-> Hash (cost=2.33..2.33 rows=3 width=4)
-> Broadcast Motion 1:3 (slice2; segments: 1) (cost=2.11..2.33 rows=9 width=4)
-> Subquery Scan on "NotIn_SUBQUERY" (cost=2.11..2.21 rows=3 width=4)
-> Limit (cost=2.11..2.18 rows=3 width=4)
-> Gather Motion 3:1 (slice3; segments: 3) (cost=2.11..2.18 rows=3 width=4)
Merge Key: t2.c2
-> Limit (cost=2.11..2.12 rows=1 width=4)
-> Sort (cost=2.11..2.12 rows=2 width=4)
Sort Key: t2.c2
-> Seq Scan on t2 (cost=0.00..2.05 rows=2 width=4)
Settings: optimizer=off
Optimizer status: Postgres query optimizer
(19 rows)
select c1 from t1 where c1 not in (select c2 from t2 order by c2 limit 3) order by c1;
c1
----
4
5
6
7
8
9
10
(7 rows)
--quantified/correlated subqueries
--
--q32
--
explain select c1 from t1 where c1 =all (select c2 from t2 where c2 > -1 and c2 <= 1);
QUERY PLAN
-------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=2.12..5.89 rows=9 width=4)
-> Nested Loop Left Anti Semi (Not-In) Join (cost=2.12..5.89 rows=3 width=4)
Join Filter: t1.c1 <> t2.c2
-> Seq Scan on t1 (cost=0.00..3.10 rows=4 width=4)
-> Materialize (cost=2.12..2.15 rows=1 width=4)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..2.12 rows=1 width=4)
-> Seq Scan on t2 (cost=0.00..2.08 rows=1 width=4)
Filter: c2 > (-1) AND c2 <= 1
Optimizer status: Postgres query optimizer
(9 rows)
select c1 from t1 where c1 =all (select c2 from t2 where c2 > -1 and c2 <= 1);
c1
----
1
(1 row)
--
--q33
--
explain select c1 from t1 where c1 <>all (select c2 from t2);
QUERY PLAN
-------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=2.44..5.63 rows=4 width=4)
-> Hash Left Anti Semi (Not-In) Join (cost=2.44..5.63 rows=2 width=4)
Hash Cond: t1.c1 = t2.c2
-> Seq Scan on t1 (cost=0.00..3.10 rows=4 width=4)
-> Hash (cost=2.25..2.25 rows=5 width=4)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..2.25 rows=5 width=4)
-> Seq Scan on t2 (cost=0.00..2.05 rows=2 width=4)
Optimizer status: Postgres query optimizer
(8 rows)
select c1 from t1 where c1 <>all (select c2 from t2);
c1
----
7
9
6
8
10
(5 rows)
--
--q34
--
explain select c1 from t1 where c1 <=all (select c2 from t2 where c2 not in (select c1n from t1n));
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=4.88..10.23 rows=4 width=4)
-> Nested Loop Left Anti Semi (Not-In) Join (cost=4.88..10.23 rows=2 width=4)
Join Filter: t1.c1 > t2.c2
-> Seq Scan on t1 (cost=0.00..3.10 rows=4 width=4)
-> Materialize (cost=4.88..4.98 rows=4 width=4)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=2.61..4.87 rows=4 width=4)
-> Hash Left Anti Semi (Not-In) Join (cost=2.61..4.74 rows=2 width=4)
Hash Cond: t2.c2 = t1n.c1n
-> Seq Scan on t2 (cost=0.00..2.05 rows=2 width=4)
-> Hash (cost=2.35..2.35 rows=7 width=4)
-> Broadcast Motion 3:3 (slice3; segments: 3) (cost=0.00..2.35 rows=7 width=4)
-> Seq Scan on t1n (cost=0.00..2.07 rows=3 width=4)
Optimizer status: Postgres query optimizer
(13 rows)
select c1 from t1 where c1 <=all (select c2 from t2 where c2 not in (select c1n from t1n));
c1
----
2
4
6
8
10
1
3
5
7
9
(10 rows)
--
--q35
--
explain select c1 from t1 where not c1 =all (select c2 from t2 where not c2 >all (select c3 from t3));
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=20000000004.22..20000000004.39 rows=10 width=4)
-> HashAggregate (cost=20000000004.22..20000000004.25 rows=3 width=4)
Group Key: (RowIdExpr)
-> Redistribute Motion 3:3 (slice2; segments: 3) (cost=20000000000.00..20000000004.21 rows=3 width=4)
Hash Key: (RowIdExpr)
-> Nested Loop (cost=20000000000.00..20000000004.15 rows=3 width=4)
Join Filter: (t1.c1 <> t2.c2)
-> Broadcast Motion 3:3 (slice3; segments: 3) (cost=0.00..1.17 rows=10 width=4)
-> Seq Scan on t1 (cost=0.00..1.03 rows=3 width=4)
-> Materialize (cost=10000000000.00..10000000002.82 rows=1 width=4)
-> Nested Loop Semi Join (cost=10000000000.00..10000000002.81 rows=1 width=4)
Join Filter: (t2.c2 <= t3.c3)
-> Seq Scan on t2 (cost=0.00..1.02 rows=2 width=4)
-> Materialize (cost=0.00..1.06 rows=3 width=4)
-> Broadcast Motion 3:3 (slice4; segments: 3) (cost=0.00..1.05 rows=3 width=4)
-> Seq Scan on t3 (cost=0.00..1.01 rows=1 width=4)
Optimizer: Postgres query optimizer
(17 rows)
select c1 from t1 where not c1 =all (select c2 from t2 where not c2 >all (select c3 from t3));
c1
----
1
3
5
7
9
2
4
6
8
10
(10 rows)
--
--q36
--
explain select c1 from t1 where not c1 <>all (select c1n from t1n where c1n <all (select c3 from t3 where c3 = c1n));
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=9.27..12.44 rows=4 width=4)
-> Hash Semi Join (cost=9.27..12.44 rows=2 width=4)
Hash Cond: t1.c1 = t1n.c1n
-> Seq Scan on t1 (cost=0.00..3.10 rows=4 width=4)
-> Hash (cost=9.23..9.23 rows=2 width=4)
-> Seq Scan on t1n (cost=0.00..9.23 rows=2 width=4)
Filter: (subplan)
SubPlan 1
-> Result (cost=2.04..2.05 rows=1 width=4)
Filter: t3.c3 = $0
-> Materialize (cost=2.04..2.05 rows=1 width=4)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..2.04 rows=1 width=4)
-> Seq Scan on t3 (cost=0.00..2.04 rows=1 width=4)
Settings: optimizer=off
Optimizer status: Postgres query optimizer
(15 rows)
select c1 from t1 where not c1 <>all (select c1n from t1n where c1n <all (select c3 from t3 where c3 = c1n));
c1
----
5
7
6
(3 rows)
--
--q37
--
explain select c1 from t1 where not c1 >=all (select c2 from t2 where c2 = c1);
QUERY PLAN
----------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=2.11..5.30 rows=4 width=4)
-> Hash Semi Join (cost=2.11..5.30 rows=2 width=4)
Hash Cond: t1.c1 = t2.c2
Join Filter: t1.c1 < t2.c2
-> Seq Scan on t1 (cost=0.00..3.10 rows=4 width=4)
-> Hash (cost=2.05..2.05 rows=2 width=4)
-> Seq Scan on t2 (cost=0.00..2.05 rows=2 width=4)
Settings: optimizer=off
Optimizer status: Postgres query optimizer
(9 rows)
select c1 from t1 where not c1 >=all (select c2 from t2 where c2 = c1);
c1
----
(0 rows)
--
--q38
--
explain select c1 from t1 where not exists (select c2 from t2 where c2 = c1);
QUERY PLAN
----------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=2.11..5.28 rows=4 width=4)
-> Hash Anti Join (cost=2.11..5.28 rows=2 width=4)
Hash Cond: t1.c1 = t2.c2
-> Seq Scan on t1 (cost=0.00..3.10 rows=4 width=4)
-> Hash (cost=2.05..2.05 rows=2 width=4)
-> Seq Scan on t2 (cost=0.00..2.05 rows=2 width=4)
Settings: optimizer=off
Optimizer status: Postgres query optimizer
(8 rows)
select c1 from t1 where not exists (select c2 from t2 where c2 = c1);
c1
----
7
9
6
8
10
(5 rows)
--
--q39
--
explain select c1 from t1 where not exists (select c2 from t2 where c2 not in (select c3 from t3) and c2 = c1);
QUERY PLAN
-------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=4.42..7.59 rows=4 width=4)
-> Hash Anti Join (cost=4.42..7.59 rows=2 width=4)
Hash Cond: t1.c1 = t2.c2
-> Seq Scan on t1 (cost=0.00..3.10 rows=4 width=4)
-> Hash (cost=4.38..4.38 rows=2 width=4)
-> Hash Left Anti Semi (Not-In) Join (cost=2.26..4.38 rows=2 width=4)
Hash Cond: t2.c2 = t3.c3
-> Seq Scan on t2 (cost=0.00..2.05 rows=2 width=4)
-> Hash (cost=2.15..2.15 rows=3 width=4)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..2.15 rows=3 width=4)
-> Seq Scan on t3 (cost=0.00..2.03 rows=1 width=4)
Settings: optimizer=off
Optimizer status: Postgres query optimizer
(15 rows)
select c1 from t1 where not exists (select c2 from t2 where c2 not in (select c3 from t3) and c2 = c1);
c1
----
1
3
7
9
2
6
8
10
(8 rows)
--
--q40
-- GPDB_90_MERGE_FIXME: We should be able to push down join filter on param $0 to a result node on top of LASJ (Not in)
--
explain select c1 from t1 where not exists (select c2 from t2 where exists (select c3 from t3) and c2 <>all (select c3 from t3) and c2 = c1);
QUERY PLAN
-------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=6.10..9.34 rows=4 width=4)
InitPlan 1 (returns $0) (slice3)
-> Gather Motion 3:1 (slice4; segments: 3) (cost=0.00..2.09 rows=3 width=0)
-> Seq Scan on t3 t3_1 (cost=0.00..2.03 rows=1 width=0)
-> Hash Anti Join (cost=6.10..9.27 rows=2 width=4)
Hash Cond: (t1.c1 = t2.c2)
Join Filter: $0
-> Seq Scan on t1 (cost=0.00..3.10 rows=4 width=4)
-> Hash (cost=5.36..5.36 rows=2 width=4)
-> Hash Left Anti Semi (Not-In) Join (cost=2.26..5.36 rows=2 width=4)
Hash Cond: (t2.c2 = t3.c3)
-> Seq Scan on t2 (cost=0.00..3.05 rows=2 width=4)
-> Hash (cost=2.15..2.15 rows=3 width=4)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..2.15 rows=3 width=4)
-> Seq Scan on t3 (cost=0.00..2.03 rows=1 width=4)
Optimizer: Postgres query optimizer
(16 rows)
select c1 from t1 where not exists (select c2 from t2 where exists (select c3 from t3) and c2 <>all (select c3 from t3) and c2 = c1);
c1
----
2
6
8
10
1
3
7
9
(8 rows)
--
--q41
--
select c1 from t1 where c1 not in (select c2 from t2) or c1 = 49;
c1
----
7
9
6
8
10
(5 rows)
--
--q42
--
select c1 from t1 where not not not c1 in (select c2 from t2);
c1
----
7
9
6
8
10
(5 rows)
--
--q43
--
explain select c1 from t1 where c1 not in (select c2 from t2 where c2 > 4) and c1 is not null;
QUERY PLAN
-------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=1.10..2.20 rows=3 width=4)
-> Hash Left Anti Semi (Not-In) Join (cost=1.10..2.15 rows=1 width=4)
Hash Cond: (t1.c1 = t2.c2)
-> Seq Scan on t1 (cost=0.00..1.03 rows=3 width=4)
Filter: (c1 IS NOT NULL)
-> Hash (cost=1.02..1.02 rows=1 width=4)
-> Seq Scan on t2 (cost=0.00..1.02 rows=1 width=4)
Filter: (c2 > 4)
Optimizer: Postgres query optimizer
(9 rows)
select c1 from t1 where c1 not in (select c2 from t2 where c2 > 4) and c1 is not null;
c1
----
2
4
6
8
10
1
3
7
9
(9 rows)
--
--q44
--
select c1 from t1 where c1 not in (select c2 from t2 where c2 > 4) and c1 > 2;
c1
----
4
6
8
10
3
7
9
(7 rows)
-- Test if the equality operator is implemented by a SQL function
--
--q45
--
create domain absint as int4;
create function iszero(absint) returns bool as $$ begin return $1::int4 = 0; end; $$ language plpgsql immutable strict;
create or replace function abseq (absint, absint) returns bool as $$ select iszero(abs($1) - abs($2)); $$ language sql immutable strict;
create operator = (PROCEDURE = abseq, leftarg=absint, rightarg=absint);
explain select c1 from t1 where c1::absint not in
(select c1n::absint from t1n);
QUERY PLAN
--------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=10000000000.00..10000000062.89 rows=4 width=4)
-> Nested Loop Left Anti Semi (Not-In) Join (cost=10000000000.00..10000000062.89 rows=2 width=4)
Join Filter: iszero(((abs(((t1.c1)::absint)::integer) - abs(((t1n.c1n)::absint)::integer)))::absint)
-> Seq Scan on t1 (cost=0.00..3.10 rows=4 width=4)
-> Materialize (cost=0.00..3.45 rows=7 width=4)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..3.35 rows=7 width=4)
-> Seq Scan on t1n (cost=0.00..3.07 rows=3 width=4)
Planning time: 0.865 ms
Optimizer: Postgres query optimizer
(9 rows)
select c1 from t1 where c1::absint not in
(select c1n::absint from t1n);
c1
----
(0 rows)
-- Test the null not in an empty set
-- null not in an unempty set, always returns false
-- null not in an empty set, always returns true
--
-- q46
--
create table table_source (c1 varchar(100),c2 varchar(100),c3 varchar(100),c4 varchar(100));
insert into table_source (c1 ,c2 ,c3 ,c4 ) values ('000181202006010000003158',null,'INC','0000000001') ;
create table table_source2 as select * from table_source distributed by (c2);
create table table_source3 as select * from table_source distributed replicated;
create table table_source4 (c1 varchar(100),c2 varchar(100) not null,c3 varchar(100),c4 varchar(100));
insert into table_source4 (c1 ,c2 ,c3 ,c4 ) values ('000181202006010000003158','a','INC','0000000001') ;
create table table_config (c1 varchar(10) ,c2 varchar(10) ,PRIMARY KEY (c1));
insert into table_config select i, 'test' from generate_series(1, 1000)i;
analyze table_config;
delete from table_config where gp_segment_id = 0;
explain select * from table_source where c3 = 'INC' and c4 = '0000000001' and c2 not in (SELECT c1 from table_config where c2='test');
QUERY PLAN
-----------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=31.00..32.18 rows=10 width=258)
-> Hash Left Anti Semi (Not-In) Join (cost=31.00..32.05 rows=3 width=258)
Hash Cond: ((table_source.c2)::text = (table_config.c1)::text)
-> Seq Scan on table_source (cost=0.00..1.01 rows=1 width=258)
Filter: (((c3)::text = 'INC'::text) AND ((c4)::text = '0000000001'::text))
-> Hash (cost=18.50..18.50 rows=1000 width=3)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..18.50 rows=1000 width=3)
-> Seq Scan on table_config (cost=0.00..5.17 rows=333 width=3)
Filter: ((c2)::text = 'test'::text)
Optimizer: Postgres query optimizer
(10 rows)
select * from table_source where c3 = 'INC' and c4 = '0000000001' and c2 not in (SELECT c1 from table_config where c2='test');
c1 | c2 | c3 | c4
----+----+----+----
(0 rows)
explain select * from table_source2 where c3 = 'INC' and c4 = '0000000001' and c2 not in (SELECT c1 from table_config where c2='test');
QUERY PLAN
-----------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=31.00..32.18 rows=10 width=258)
-> Hash Left Anti Semi (Not-In) Join (cost=31.00..32.05 rows=3 width=258)
Hash Cond: ((table_source2.c2)::text = (table_config.c1)::text)
-> Seq Scan on table_source2 (cost=0.00..1.01 rows=1 width=258)
Filter: (((c3)::text = 'INC'::text) AND ((c4)::text = '0000000001'::text))
-> Hash (cost=18.50..18.50 rows=1000 width=3)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..18.50 rows=1000 width=3)
-> Seq Scan on table_config (cost=0.00..5.17 rows=333 width=3)
Filter: ((c2)::text = 'test'::text)
Optimizer: Postgres query optimizer
(10 rows)
select * from table_source2 where c3 = 'INC' and c4 = '0000000001' and c2 not in (SELECT c1 from table_config where c2='test');
c1 | c2 | c3 | c4
----+----+----+----
(0 rows)
explain select * from table_source3 where c3 = 'INC' and c4 = '0000000001' and c2 not in (SELECT c1 from table_config where c2='test');
QUERY PLAN
--------------------------------------------------------------------------------------------
Hash Left Anti Semi (Not-In) Join (cost=32.02..32.12 rows=10 width=258)
Hash Cond: ((table_source3.c2)::text = (table_config.c1)::text)
-> Gather Motion 1:1 (slice1; segments: 1) (cost=1.01..1.01 rows=1 width=258)
-> Seq Scan on table_source3 (cost=0.00..1.01 rows=1 width=258)
Filter: (((c3)::text = 'INC'::text) AND ((c4)::text = '0000000001'::text))
-> Hash (cost=18.50..18.50 rows=1000 width=3)
-> Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..18.50 rows=1000 width=3)
-> Seq Scan on table_config (cost=0.00..5.17 rows=333 width=3)
Filter: ((c2)::text = 'test'::text)
Optimizer: Postgres query optimizer
(10 rows)
select * from table_source3 where c3 = 'INC' and c4 = '0000000001' and c2 not in (SELECT c1 from table_config where c2='test');
c1 | c2 | c3 | c4
----+----+----+----
(0 rows)
explain select * from table_source4 where c3 = 'INC' and c4 = '0000000001' and c2 not in (SELECT c1 from table_config where c2='test');
QUERY PLAN
------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=9.33..10.54 rows=10 width=42)
-> Hash Left Anti Semi (Not-In) Join (cost=9.33..10.41 rows=3 width=42)
Hash Cond: ((table_source4.c2)::text = (table_config.c1)::text)
-> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..1.03 rows=1 width=42)
Hash Key: table_source4.c2
-> Seq Scan on table_source4 (cost=0.00..1.01 rows=1 width=42)
Filter: (((c3)::text = 'INC'::text) AND ((c4)::text = '0000000001'::text))
-> Hash (cost=5.17..5.17 rows=333 width=3)
-> Seq Scan on table_config (cost=0.00..5.17 rows=333 width=3)
Filter: ((c2)::text = 'test'::text)
Optimizer: Postgres query optimizer
(11 rows)
select * from table_source4 where c3 = 'INC' and c4 = '0000000001' and c2 not in (SELECT c1 from table_config where c2='test');
c1 | c2 | c3 | c4
--------------------------+----+-----+------------
000181202006010000003158 | a | INC | 0000000001
(1 row)
--
-- Multi Column NOT-IN
-- Please refer to https://github.com/greenplum-db/gpdb/issues/12930
--
create table t1_12930(a int not null, b int not null);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table t2_12930(a int not null, b int not null);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
-- non-nullable: t1.a, t1.b, t2.a, t2.b
insert into t1_12930 values (1, 1), (2, 2);
insert into t2_12930 values (1, 1), (2, 3), (3,3);
explain select * from t1_12930 where (a, b) not in (select a, b from t2_12930);
QUERY PLAN
------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=751.50..207138.77 rows=16 width=8)
-> Hash Left Anti Semi (Not-In) Join (cost=751.50..207138.55 rows=5 width=8)
Hash Cond: ((t1_12930.a = t2_12930.a) AND (t1_12930.b = t2_12930.b))
-> Seq Scan on t1_12930 (cost=0.00..321.00 rows=28700 width=8)
-> Hash (cost=321.00..321.00 rows=28700 width=8)
-> Seq Scan on t2_12930 (cost=0.00..321.00 rows=28700 width=8)
Optimizer: Postgres query optimizer
(7 rows)
select * from t1_12930 where (a, b) not in (select a, b from t2_12930);
a | b
---+---
2 | 2
(1 row)
explain select * from t1_12930 where (a+1, b+1) not in (select a, b from t2_12930);
QUERY PLAN
----------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=751.50..413635.27 rows=16 width=8)
-> Hash Left Anti Semi (Not-In) Join (cost=751.50..413635.05 rows=5 width=8)
Hash Cond: (((t1_12930.a + 1) = t2_12930.a) AND ((t1_12930.b + 1) = t2_12930.b))
-> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..895.00 rows=28700 width=8)
Hash Key: (t1_12930.a + 1)
-> Seq Scan on t1_12930 (cost=0.00..321.00 rows=28700 width=8)
-> Hash (cost=321.00..321.00 rows=28700 width=8)
-> Seq Scan on t2_12930 (cost=0.00..321.00 rows=28700 width=8)
Optimizer: Postgres query optimizer
(9 rows)
select * from t1_12930 where (a+1, b+1) not in (select a, b from t2_12930);
a | b
---+---
1 | 1
(1 row)
explain select * from t1_12930 where (a,b) <> ALL (select a, b from t2_12930);
QUERY PLAN
------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=751.50..207138.77 rows=16 width=8)
-> Hash Left Anti Semi (Not-In) Join (cost=751.50..207138.55 rows=5 width=8)
Hash Cond: ((t1_12930.a = t2_12930.a) AND (t1_12930.b = t2_12930.b))
-> Seq Scan on t1_12930 (cost=0.00..321.00 rows=28700 width=8)
-> Hash (cost=321.00..321.00 rows=28700 width=8)
-> Seq Scan on t2_12930 (cost=0.00..321.00 rows=28700 width=8)
Optimizer: Postgres query optimizer
(7 rows)
select * from t1_12930 where (a,b) <> ALL (select a, b from t2_12930);
a | b
---+---
2 | 2
(1 row)
-- non-nullable: t1.a, t2.a, t2.b
-- nullable: t1.b
truncate t1_12930;
truncate t2_12930;
alter table t2_12930 alter column b set not null;
alter table t1_12930 alter column b drop not null;
insert into t1_12930 values (1, null);
insert into t2_12930 values (1, 1);
explain select * from t1_12930 where (a, b) <>ALL (select a, b from t2_12930);
QUERY PLAN
--------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=10000000000.00..10079226671.22 rows=16 width=8)
-> Nested Loop Left Anti Semi (Not-In) Join (cost=10000000000.00..10079226671.00 rows=5 width=8)
Join Filter: ((t1_12930.a = t2_12930.a) AND (t1_12930.b = t2_12930.b))
-> Seq Scan on t1_12930 (cost=0.00..321.00 rows=28700 width=8)
-> Materialize (cost=0.00..1899.50 rows=86100 width=8)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..1469.00 rows=86100 width=8)
-> Seq Scan on t2_12930 (cost=0.00..321.00 rows=28700 width=8)
Optimizer: Postgres query optimizer
(8 rows)
select * from t1_12930 where (a, b) <>ALL (select a, b from t2_12930);
a | b
---+---
(0 rows)
-- non-nullable: t1.a, t1.b, t2.a
-- nullable: t2.b
truncate t1_12930;
truncate t2_12930;
alter table t2_12930 alter column b drop not null;
insert into t1_12930 values (1, 1);
insert into t2_12930 values (1, null);
explain select * from t1_12930 where (a, b) not in (select a, b from t2_12930);
QUERY PLAN
--------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=10000000000.00..10079226671.22 rows=16 width=8)
-> Nested Loop Left Anti Semi (Not-In) Join (cost=10000000000.00..10079226671.00 rows=5 width=8)
Join Filter: ((t1_12930.a = t2_12930.a) AND (t1_12930.b = t2_12930.b))
-> Seq Scan on t1_12930 (cost=0.00..321.00 rows=28700 width=8)
-> Materialize (cost=0.00..1899.50 rows=86100 width=8)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..1469.00 rows=86100 width=8)
-> Seq Scan on t2_12930 (cost=0.00..321.00 rows=28700 width=8)
Optimizer: Postgres query optimizer
(8 rows)
select * from t1_12930 where (a, b) not in (select a, b from t2_12930);
a | b
---+---
(0 rows)
-- non-nullable: t1.a, t2.a, t2.b
-- nullable: t1.b
truncate t1_12930;
truncate t2_12930;
alter table t2_12930 alter column b set not null;
alter table t1_12930 alter column b drop not null;
insert into t1_12930 values (1, null);
insert into t2_12930 values (1, 1);
explain select * from t1_12930 where (a, b) not in (select a, b from t2_12930);
QUERY PLAN
--------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=10000000000.00..10079226671.22 rows=16 width=8)
-> Nested Loop Left Anti Semi (Not-In) Join (cost=10000000000.00..10079226671.00 rows=5 width=8)
Join Filter: ((t1_12930.a = t2_12930.a) AND (t1_12930.b = t2_12930.b))
-> Seq Scan on t1_12930 (cost=0.00..321.00 rows=28700 width=8)
-> Materialize (cost=0.00..1899.50 rows=86100 width=8)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..1469.00 rows=86100 width=8)
-> Seq Scan on t2_12930 (cost=0.00..321.00 rows=28700 width=8)
Optimizer: Postgres query optimizer
(8 rows)
select * from t1_12930 where (a, b) not in (select a, b from t2_12930);
a | b
---+---
(0 rows)
explain select * from t1_12930 where (a, b) not in (select a, b from t2_12930) and b is not null;
QUERY PLAN
------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=751.50..206932.71 rows=16 width=8)
-> Hash Left Anti Semi (Not-In) Join (cost=751.50..206932.49 rows=5 width=8)
Hash Cond: ((t1_12930.a = t2_12930.a) AND (t1_12930.b = t2_12930.b))
-> Seq Scan on t1_12930 (cost=0.00..321.00 rows=28671 width=8)
Filter: (b IS NOT NULL)
-> Hash (cost=321.00..321.00 rows=28700 width=8)
-> Seq Scan on t2_12930 (cost=0.00..321.00 rows=28700 width=8)
Optimizer: Postgres query optimizer
(8 rows)
select * from t1_12930 where (a, b) not in (select a, b from t2_12930) and b is not null;
a | b
---+---
(0 rows)
--
-- Test left anti semi (not-in) join
-- With is null expression inside an OR expression.
--
begin;
create table t1_lasj(c1 int) distributed by (c1);
create table t2_lasj_has_null(c1n int) distributed by (c1n);
insert into t1_lasj values (generate_series (1,10));
insert into t2_lasj_has_null values (1), (2), (3), (null), (5), (6), (7);
analyze t1_lasj;
analyze t2_lasj_has_null;
-- null test
select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where c1n is null) and c1 is not null;
c1
----
(0 rows)
-- null test under OR expression
select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where c1n is null or c1n > 0) and c1 is not null;
c1
----
(0 rows)
select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where c1n > 0 or c1n is null) and c1 is not null;
c1
----
(0 rows)
select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where (c1n > 1) or (c1n > 0 or c1n is null));
c1
----
(0 rows)
select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where c1n is null or c1n is null and c1n > 1) and c1 is not null;
c1
----
(0 rows)
select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where c1n is null or c1n is null or c1n > 1) and c1 is not null;
c1
----
(0 rows)
select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where c1n is null or c1n is null) and c1 is not null;
c1
----
(0 rows)
-- null test under recursive OR expression
select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where (c1n != 0 and c1n > 1) or (c1n > 0 or c1n is null)) and c1 is not null;
c1
----
(0 rows)
select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where c1n is null or c1n is null or true) and c1 is not null;
c1
----
(0 rows)
select c1 from t1_lasj where c1 not in (select c1n from t2_lasj_has_null where 2 > 1 or c1n is null or c1n is null or true) and c1 is not null;
c1
----
(0 rows)
abort;
reset search_path;
drop schema notin cascade;
NOTICE: drop cascades to 18 other objects
DETAIL: drop cascades to table notin.t1
drop cascades to table notin.t2
drop cascades to table notin.t3
drop cascades to table notin.t4
drop cascades to table notin.t1n
drop cascades to table notin.g1
drop cascades to table notin.l1
drop cascades to type notin.absint
drop cascades to function notin.iszero(notin.absint)
drop cascades to function notin.abseq(notin.absint,notin.absint)
drop cascades to operator notin.=(notin.absint,notin.absint)
drop cascades to table notin.table_source
drop cascades to table notin.table_source2
drop cascades to table notin.table_source3
drop cascades to table notin.table_source4
drop cascades to table notin.table_config
drop cascades to table notin.t1_12930
drop cascades to table notin.t2_12930