blob: fae1fdc7c5dc94095fc25538b9fe576e40a06bf7 [file] [log] [blame]
-- Extra GPDB tests for joins.
-- Ignore "workfile compresssion is not supported by this build" (see
-- 'zlib' test):
--
-- start_matchignore
-- m/ERROR: workfile compresssion is not supported by this build/
-- end_matchignore
--
-- test numeric hash join
--
set enable_hashjoin to on;
set enable_mergejoin to off;
set enable_nestloop to off;
create table nhtest (i numeric(10, 2)) distributed by (i);
insert into nhtest values(100000.22);
insert into nhtest values(300000.19);
explain select * from nhtest a join nhtest b using (i);
select * from nhtest a join nhtest b using (i);
create temp table l(a int);
insert into l values (1), (1), (2);
select * from l l1 join l l2 on l1.a = l2.a left join l l3 on l1.a = l3.a and l1.a = 2 order by 1,2,3;
--
-- test anti_join/left_anti_semi_join selectivities
--
create table aj_t1(a int, b int, c int) distributed by (a);
create table aj_t2(a int, b int, c int) distributed by (a);
insert into aj_t1 values(1,1,1);
insert into aj_t2 values(1,1,1),(2,2,2);
explain(costs off) select t1.a from aj_t1 t1 where not exists (select 1 from aj_t2 t2 where t1.b = t2.b and t1.c = t2.c);
select t1.a from aj_t1 t1 where not exists (select 1 from aj_t2 t2 where t1.b = t2.b and t1.c = t2.c);
--
-- test hash join
--
create table hjtest (i int, j int) distributed by (i,j);
insert into hjtest values(3, 4);
select count(*) from hjtest a1, hjtest a2 where a2.i = least (a1.i,4) and a2.j = 4;
--
-- Test for correct behavior when there is a Merge Join on top of Materialize
-- on top of a Motion :
-- 1. Use FULL OUTER JOIN to induce a Merge Join
-- 2. Use a large tuple size to induce a Materialize
-- 3. Use gp_dist_random() to induce a Redistribute
--
set enable_hashjoin to off;
set enable_mergejoin to on;
set enable_nestloop to off;
DROP TABLE IF EXISTS alpha;
DROP TABLE IF EXISTS theta;
CREATE TABLE alpha (i int, j int) distributed by (i);
CREATE TABLE theta (i int, j char(10000000)) distributed by (i);
INSERT INTO alpha values (1, 1), (2, 2);
INSERT INTO theta values (1, 'f'), (2, 'g');
SELECT *
FROM gp_dist_random('alpha') FULL OUTER JOIN gp_dist_random('theta')
ON (alpha.i = theta.i)
WHERE (alpha.j IS NULL or theta.j IS NULL);
reset enable_hashjoin;
reset enable_mergejoin;
reset enable_nestloop;
--
-- Predicate propagation over equality conditions
--
drop schema if exists pred;
create schema pred;
set search_path=pred;
create table t1 (x int, y int, z int) distributed by (y);
create table t2 (x int, y int, z int) distributed by (x);
insert into t1 select i, i, i from generate_series(1,100) i;
insert into t2 select * from t1;
analyze t1;
analyze t2;
--
-- infer over equalities
--
explain select count(*) from t1,t2 where t1.x = 100 and t1.x = t2.x;
select count(*) from t1,t2 where t1.x = 100 and t1.x = t2.x;
--
-- infer over >=
--
explain select * from t1,t2 where t1.x = 100 and t2.x >= t1.x;
select * from t1,t2 where t1.x = 100 and t2.x >= t1.x;
--
-- multiple inferences
--
set optimizer_segments=2;
explain select * from t1,t2 where t1.x = 100 and t1.x = t2.y and t1.x <= t2.x;
reset optimizer_segments;
select * from t1,t2 where t1.x = 100 and t1.x = t2.y and t1.x <= t2.x;
--
-- MPP-18537: hash clause references a constant in outer child target list
--
create table hjn_test (i int, j int) distributed by (i,j);
insert into hjn_test values(3, 4);
create table int4_tbl (f1 int) distributed by (f1);
insert into int4_tbl values(123456), (-2147483647), (0), (-123456), (2147483647);
select count(*) from hjn_test, (select 3 as bar) foo where hjn_test.i = least (foo.bar,4) and hjn_test.j = 4;
select count(*) from hjn_test, (select 3 as bar) foo where hjn_test.i = least (foo.bar,(array[4])[1]) and hjn_test.j = (array[4])[1];
select count(*) from hjn_test, (select 3 as bar) foo where least (foo.bar,(array[4])[1]) = hjn_test.i and hjn_test.j = (array[4])[1];
select count(*) from hjn_test, (select 3 as bar) foo where hjn_test.i = least (foo.bar, least(4,10)) and hjn_test.j = least(4,10);
select * from int4_tbl a join int4_tbl b on (a.f1 = (select f1 from int4_tbl c where c.f1=b.f1));
-- Same as the last query, but with a partitioned table (which requires a
-- Result node to do projection of the hash expression, as Append is not
-- projection-capable)
create table part4_tbl (f1 int4) partition by range (f1) (start(-1000000) end (1000000) every (1000000));
insert into part4_tbl values
(-123457), (-123456), (-123455),
(-1), (0), (1),
(123455), (123456), (123457);
select * from part4_tbl a join part4_tbl b on (a.f1 = (select f1 from int4_tbl c where c.f1=b.f1));
--
-- Test case where a Motion hash key is only needed for the redistribution,
-- and not returned in the final result set. There was a bug at one point where
-- tjoin.c1 was used as the hash key in a Motion node, but it was not added
-- to the sub-plans target list, causing a "variable not found in subplan
-- target list" error.
--
create table tjoin1(dk integer, id integer) distributed by (dk);
create table tjoin2(dk integer, id integer, t text) distributed by (dk);
create table tjoin3(dk integer, id integer, t text) distributed by (dk);
insert into tjoin1 values (1, 1), (1, 2), (1, 3), (2, 1), (2, 2), (2, 3);
insert into tjoin2 values (1, 1, '1-1'), (1, 2, '1-2'), (2, 1, '2-1'), (2, 2, '2-2');
insert into tjoin3 values (1, 1, '1-1'), (2, 1, '2-1');
select tjoin1.id, tjoin2.t, tjoin3.t
from tjoin1
left outer join (tjoin2 left outer join tjoin3 on tjoin2.id=tjoin3.id) on tjoin1.id=tjoin3.id;
set enable_hashjoin to off;
set optimizer_enable_hashjoin = off;
select count(*) from hjn_test, (select 3 as bar) foo where hjn_test.i = least (foo.bar,4) and hjn_test.j = 4;
select count(*) from hjn_test, (select 3 as bar) foo where hjn_test.i = least (foo.bar,(array[4])[1]) and hjn_test.j = (array[4])[1];
select count(*) from hjn_test, (select 3 as bar) foo where least (foo.bar,(array[4])[1]) = hjn_test.i and hjn_test.j = (array[4])[1];
select count(*) from hjn_test, (select 3 as bar) foo where hjn_test.i = least (foo.bar, least(4,10)) and hjn_test.j = least(4,10);
select * from int4_tbl a join int4_tbl b on (a.f1 = (select f1 from int4_tbl c where c.f1=b.f1));
reset enable_hashjoin;
reset optimizer_enable_hashjoin;
-- In case of Left Anti Semi Join, if the left rel is empty a dummy join
-- should be created
drop table if exists foo;
drop table if exists bar;
create table foo (a int, b int) distributed randomly;
create table bar (c int, d int) distributed randomly;
insert into foo select generate_series(1,10);
insert into bar select generate_series(1,10);
explain select a from foo where a<1 and a>1 and not exists (select c from bar where c=a);
select a from foo where a<1 and a>1 and not exists (select c from bar where c=a);
-- The merge join executor code doesn't support LASJ_NOTIN joins. Make sure
-- the planner doesn't create an invalid plan with them.
create index index_foo on foo (a);
create index index_bar on bar (c);
set enable_nestloop to off;
set enable_hashjoin to off;
set enable_mergejoin to on;
select * from foo where a not in (select c from bar where c <= 5);
set enable_nestloop to off;
set enable_hashjoin to on;
set enable_mergejoin to off;
create table dept
(
id int,
pid int,
name char(40)
);
insert into dept values(3, 0, 'root');
insert into dept values(4, 3, '2<-1');
insert into dept values(5, 4, '3<-2<-1');
insert into dept values(6, 4, '4<-2<-1');
insert into dept values(7, 3, '5<-1');
insert into dept values(8, 7, '5<-1');
insert into dept select i, i % 6 + 3 from generate_series(9,50) as i;
insert into dept select i, 99 from generate_series(100,15000) as i;
ANALYZE dept;
-- Test rescannable hashjoin with spilling hashtable
set statement_mem='1000kB';
set gp_workfile_compression = off;
WITH RECURSIVE subdept(id, parent_department, name) AS
(
-- non recursive term
SELECT * FROM dept WHERE name = 'root'
UNION ALL
-- recursive term
SELECT d.* FROM dept AS d, subdept AS sd
WHERE d.pid = sd.id
)
SELECT count(*) FROM subdept;
-- Test rescannable hashjoin with spilling hashtable, with compression
set gp_workfile_compression = on;
WITH RECURSIVE subdept(id, parent_department, name) AS
(
-- non recursive term
SELECT * FROM dept WHERE name = 'root'
UNION ALL
-- recursive term
SELECT d.* FROM dept AS d, subdept AS sd
WHERE d.pid = sd.id
)
SELECT count(*) FROM subdept;
-- Test rescannable hashjoin with in-memory hashtable
reset statement_mem;
WITH RECURSIVE subdept(id, parent_department, name) AS
(
-- non recursive term
SELECT * FROM dept WHERE name = 'root'
UNION ALL
-- recursive term
SELECT d.* FROM dept AS d, subdept AS sd
WHERE d.pid = sd.id
)
SELECT count(*) FROM subdept;
-- MPP-29458
-- When we join on a clause with two different types. If one table distribute by one type, the query plan
-- will redistribute data on another type. But the has values of two types would not be equal. The data will
-- redistribute to wrong segments.
create table test_timestamp_t1 (id numeric(10,0) ,field_dt date) distributed by (id);
create table test_timestamp_t2 (id numeric(10,0),field_tms timestamp without time zone) distributed by (id,field_tms);
insert into test_timestamp_t1 values(10 ,'2018-1-10');
insert into test_timestamp_t1 values(11 ,'2018-1-11');
insert into test_timestamp_t2 values(10 ,'2018-1-10'::timestamp);
insert into test_timestamp_t2 values(11 ,'2018-1-11'::timestamp);
-- Test nest loop redistribute keys
set enable_nestloop to on;
set enable_hashjoin to on;
set enable_mergejoin to on;
select count(*) from test_timestamp_t1 t1 ,test_timestamp_t2 t2 where T1.id = T2.id and T1.field_dt = t2.field_tms;
-- Test hash join redistribute keys
set enable_nestloop to off;
set enable_hashjoin to on;
set enable_mergejoin to on;
select count(*) from test_timestamp_t1 t1 ,test_timestamp_t2 t2 where T1.id = T2.id and T1.field_dt = t2.field_tms;
drop table test_timestamp_t1;
drop table test_timestamp_t2;
-- Test merge join redistribute keys
create table test_timestamp_t1 (id numeric(10,0) ,field_dt date) distributed randomly;
create table test_timestamp_t2 (id numeric(10,0),field_tms timestamp without time zone) distributed by (field_tms);
insert into test_timestamp_t1 values(10 ,'2018-1-10');
insert into test_timestamp_t1 values(11 ,'2018-1-11');
insert into test_timestamp_t2 values(10 ,'2018-1-10'::timestamp);
insert into test_timestamp_t2 values(11 ,'2018-1-11'::timestamp);
select * from test_timestamp_t1 t1 full outer join test_timestamp_t2 t2 on T1.id = T2.id and T1.field_dt = t2.field_tms;
-- test float type
set enable_nestloop to off;
set enable_hashjoin to on;
set enable_mergejoin to on;
create table test_float1(id int, data float4) DISTRIBUTED BY (data);
create table test_float2(id int, data float8) DISTRIBUTED BY (data);
insert into test_float1 values(1, 10), (2, 20);
insert into test_float2 values(3, 10), (4, 20);
select t1.id, t1.data, t2.id, t2.data from test_float1 t1, test_float2 t2 where t1.data = t2.data;
-- test int type
create table test_int1(id int, data int4) DISTRIBUTED BY (data);
create table test_int2(id int, data int8) DISTRIBUTED BY (data);
insert into test_int1 values(1, 10), (2, 20);
insert into test_int2 values(3, 10), (4, 20);
select t1.id, t1.data, t2.id, t2.data from test_int1 t1, test_int2 t2 where t1.data = t2.data;
-- Test to ensure that for full outer join on varchar columns, planner is successful in finding a sort operator in the catalog
create table input_table(a varchar(30), b varchar(30)) distributed by (a);
set enable_hashjoin = off;
explain (costs off) select X.a from input_table X full join (select a from input_table) Y ON X.a = Y.a;
-- Cleanup
reset enable_hashjoin;
set client_min_messages='warning'; -- silence drop-cascade NOTICEs
drop schema pred cascade;
reset search_path;
-- github issue 5370 cases
drop table if exists t5370;
drop table if exists t5370_2;
create table t5370(id int,name text) distributed by(id);
insert into t5370 select i,i from generate_series(1,1000) i;
create table t5370_2 as select * from t5370 distributed by (id);
analyze t5370_2;
analyze t5370;
explain select * from t5370 a , t5370_2 b where a.name=b.name;
drop table t5370;
drop table t5370_2;
-- github issue 6215 cases
-- When executing the following plan
-- ```
-- Gather Motion 1:1 (slice1; segments: 1)
-- -> Merge Full Join
-- -> Seq Scan on int4_tbl a
-- -> Seq Scan on int4_tbl b
--```
-- Cloudberry will raise an Assert Fail.
-- We force adding a material node for
-- merge full join on true.
drop table if exists t6215;
create table t6215(f1 int4) distributed replicated;
insert into t6215(f1) values (1), (2), (3);
set enable_material = off;
-- The plan still have Material operator
explain (costs off) select * from t6215 a full join t6215 b on true;
select * from t6215 a full join t6215 b on true;
drop table t6215;
--
-- This tripped an assertion while deciding the locus for the joins.
-- The code was failing to handle join between SingleQE and Hash correctly,
-- when there were join order restricitions. (see
-- https://github.com/greenplum-db/gpdb/issues/6643
--
select a.f1, b.f1, t.thousand, t.tenthous from
(select sum(f1) as f1 from int4_tbl i4b) b
left outer join
(select sum(f1)+1 as f1 from int4_tbl i4a) a ON a.f1 = b.f1
left outer join
tenk1 t ON b.f1 = t.thousand and (a.f1+b.f1+999) = t.tenthous;
-- tests to ensure that join reordering of LOJs and inner joins produces the
-- correct join predicates & residual filters
drop table if exists t1, t2, t3;
CREATE TABLE t1 (a int, b int, c int);
CREATE TABLE t2 (a int, b int, c int);
CREATE TABLE t3 (a int, b int, c int);
INSERT INTO t1 SELECT i, i, i FROM generate_series(1, 1000) i;
INSERT INTO t2 SELECT i, i, i FROM generate_series(2, 1000) i; -- start from 2 so that one row from t1 doesn't match
INSERT INTO t3 VALUES (1, 2, 3), (NULL, 2, 2);
ANALYZE t1;
ANALYZE t2;
ANALYZE t3;
-- ensure plan has a filter over left outer join
explain (costs off) select * from t1 left join t2 on (t1.a = t2.a) join t3 on (t1.b = t3.b) where (t2.a IS NULL OR (t1.c = t3.c));
select * from t1 left join t2 on (t1.a = t2.a) join t3 on (t1.b = t3.b) where (t2.a IS NULL OR (t1.c = t3.c));
-- ensure plan has two inner joins with the where clause & join predicates ANDed
explain (costs off) select * from t1 left join t2 on (t1.a = t2.a) join t3 on (t1.b = t3.b) where (t2.a = t3.a);
select * from t1 left join t2 on (t1.a = t2.a) join t3 on (t1.b = t3.b) where (t2.a = t3.a);
-- ensure plan has a filter over left outer join
explain (costs off) select * from t1 left join t2 on (t1.a = t2.a) join t3 on (t1.b = t3.b) where (t2.a is distinct from t3.a);
select * from t1 left join t2 on (t1.a = t2.a) join t3 on (t1.b = t3.b) where (t2.a is distinct from t3.a);
-- ensure plan has a filter over left outer join
explain select * from t3 join (select t1.a t1a, t1.b t1b, t1.c t1c, t2.a t2a, t2.b t2b, t2.c t2c from t1 left join t2 on (t1.a = t2.a)) t on (t1a = t3.a) WHERE (t2a IS NULL OR (t1c = t3.a));
select * from t3 join (select t1.a t1a, t1.b t1b, t1.c t1c, t2.a t2a, t2.b t2b, t2.c t2c from t1 left join t2 on (t1.a = t2.a)) t on (t1a = t3.a) WHERE (t2a IS NULL OR (t1c = t3.a));
-- ensure plan has a filter over left outer join
explain select * from (select t1.a t1a, t1.b t1b, t2.a t2a, t2.b t2b from t1 left join t2 on t1.a = t2.a) tt
join t3 on tt.t1b = t3.b
join (select t1.a t1a, t1.b t1b, t2.a t2a, t2.b t2b from t1 left join t2 on t1.a = t2.a) tt1 on tt1.t1b = t3.b
join t3 t3_1 on tt1.t1b = t3_1.b and (tt1.t2a is NULL OR tt1.t1b = t3.b);
select * from (select t1.a t1a, t1.b t1b, t2.a t2a, t2.b t2b from t1 left join t2 on t1.a = t2.a) tt
join t3 on tt.t1b = t3.b
join (select t1.a t1a, t1.b t1b, t2.a t2a, t2.b t2b from t1 left join t2 on t1.a = t2.a) tt1 on tt1.t1b = t3.b
join t3 t3_1 on tt1.t1b = t3_1.b and (tt1.t2a is NULL OR tt1.t1b = t3.b);
-- test different join order enumeration methods
set optimizer_join_order = query;
select * from t1 join t2 on t1.a = t2.a join t3 on t1.b = t3.b;
set optimizer_join_order = greedy;
select * from t1 join t2 on t1.a = t2.a join t3 on t1.b = t3.b;
set optimizer_join_order = exhaustive;
select * from t1 join t2 on t1.a = t2.a join t3 on t1.b = t3.b;
set optimizer_join_order = exhaustive2;
select * from t1 join t2 on t1.a = t2.a join t3 on t1.b = t3.b;
reset optimizer_join_order;
select * from t1 join t2 on t1.a = t2.a join t3 on t1.b = t3.b;
drop table t1, t2, t3;
--
-- Test a bug that nestloop path previously can not generate motion above
-- index path, which sometimes is wrong (this test case is an example).
-- We now depend on parameterized path related variables to judge instead.
-- We conservatively disallow motion when there is parameter requirement
-- for either outer or inner at this moment though there could be room
-- for further improvement (e.g. referring subplan code to do broadcast
-- for base rel if needed, which needs much effort and does not seem to
-- be deserved given we will probably refactor related code for the lateral
-- support in the near future). For the query and guc settings below, Postgres
-- planner can not generate a plan.
set enable_nestloop = 1;
set enable_material = 0;
set enable_seqscan = 0;
set enable_bitmapscan = 0;
explain select tenk1.unique2 >= 0 from tenk1 left join tenk2 on true limit 1;
select tenk1.unique2 >= 0 from tenk1 left join tenk2 on true limit 1;
reset enable_nestloop;
reset enable_material;
reset enable_seqscan;
reset enable_bitmapscan;
-- Below test cases are for planner's cdbpath_motion_for_join, so we close
-- ORCA temporarily.
set optimizer = off;
-- test outer join for general locus
-- replicated table's locus is SegmentGeneral
create table trep_join_gp (c1 int, c2 int) distributed replicated;
-- hash distributed table's locus is Hash
create table thash_join_gp (c1 int, c2 int) distributed by (c1);
-- randomly distributed table's locus is Strewn
create table trand_join_gp (c1 int, c2 int) distributed randomly;
-- start_ignore
create extension if not exists gp_debug_numsegments;
select gp_debug_set_create_table_default_numsegments(1);
-- end_ignore
-- the following replicated table's numsegments is 1
create table trep1_join_gp (c1 int, c2 int) distributed replicated;
insert into trep_join_gp values (1, 1), (2, 2);
insert into thash_join_gp values (1, 1), (2, 2);
insert into trep1_join_gp values (1, 1), (2, 2);
analyze trep_join_gp;
analyze thash_join_gp;
analyze trep1_join_gp;
analyze trand_join_gp;
-- This test is to check that: general left join segmentGeneral --> segmentGeneral
-- And segmentGeneral join hash does not need motion.
explain select * from generate_series(1, 5) g left join trep_join_gp on g = trep_join_gp.c1 join thash_join_gp on true;
select * from generate_series(1, 5) g left join trep_join_gp on g = trep_join_gp.c1 join thash_join_gp on true;
-- The following 4 tests are to check that general left join partition, we could redistribute the
-- general-locus relation when the filter condition is suitable. If we can redistributed
-- general-locus relation, we should not gather them to singleQE.
explain select * from generate_series(1, 5) g left join thash_join_gp on g = thash_join_gp.c1;
select * from generate_series(1, 5) g left join thash_join_gp on g = thash_join_gp.c1;
explain select * from generate_series(1, 5) g left join thash_join_gp on g = thash_join_gp.c2;
select * from generate_series(1, 5) g left join thash_join_gp on g = thash_join_gp.c2;
explain select * from generate_series(1, 5) g left join trand_join_gp on g = trand_join_gp.c1;
select * from generate_series(1, 5) g left join trand_join_gp on g = trand_join_gp.c1;
explain select * from generate_series(1, 5) g full join trand_join_gp on g = trand_join_gp.c1;
select * from generate_series(1, 5) g full join trand_join_gp on g = trand_join_gp.c1;
-- The following 3 tests are to check that segmentGeneral left join partition
-- we could redistribute the segment general-locus relation when the filter condition
-- is suitable. If we can redistributed general-locus relation, we should not
-- gather them to singleQE.
explain select * from trep_join_gp left join thash_join_gp using (c1);
select * from trep_join_gp left join thash_join_gp using (c1);
explain select * from trep_join_gp left join trand_join_gp using (c1);
select * from trep_join_gp left join trand_join_gp using (c1);
explain select * from trep1_join_gp join thash_join_gp using (c1);
select * from trep1_join_gp join thash_join_gp using (c1);
drop table trep_join_gp;
drop table thash_join_gp;
drop table trand_join_gp;
drop table trep1_join_gp;
select gp_debug_set_create_table_default_numsegments(3);
reset optimizer;
-- The following cases are to test planner join size estimation
-- so we need optimizer to be off.
-- When a partition table join other table using partition key,
-- planner should use root table's stat info instead of largest
-- partition's.
reset enable_hashjoin;
reset enable_mergejoin;
reset enable_nestloop;
create table t_joinsize_1 (c1 int, c2 int)
distributed by (c1)
partition by range (c2)
( start (0) end (5) every (1),
default partition extra );
create table t_joinsize_2 (c1 int, c2 int)
distributed by (c1);
create table t_joinsize_3 (c int) distributed randomly;
insert into t_joinsize_1 select i, i%5 from generate_series(1, 200)i;
insert into t_joinsize_1 select 1, null from generate_series(1, 1000);
insert into t_joinsize_2 select i,i%5 from generate_series(1, 1000)i;
insert into t_joinsize_3 select * from generate_series(1, 100);
analyze t_joinsize_1;
analyze t_joinsize_2;
analyze t_joinsize_3;
-- the following query should not broadcast the join result of t_joinsize_1, t_joinsize_2.
explain select * from (t_joinsize_1 join t_joinsize_2 on t_joinsize_1.c2 = t_joinsize_2.c2) join t_joinsize_3 on t_joinsize_3.c = t_joinsize_1.c1;
drop table t_joinsize_1;
drop table t_joinsize_2;
drop table t_joinsize_3;
-- test if subquery locus is general, then
-- we should keep it general
create table t_randomly_dist_table(c int) distributed randomly;
-- force_explain
-- the following plan should not contain redistributed motion (for planner)
explain
select * from (
select a from generate_series(1, 10)a
union all
select a from generate_series(1, 10)a
) t_subquery_general
join t_randomly_dist_table on t_subquery_general.a = t_randomly_dist_table.c;
drop table t_randomly_dist_table;
-- test lateral join inner plan contains limit
-- we cannot pass params across motion so we
-- can only generate a plan to gather all the
-- data to singleQE. Here we create a compound
-- data type as params to pass into inner plan.
-- By doing so, if we fail to pass correct params
-- into innerplan, it will throw error because
-- of nullpointer reference. If we only use int
-- type as params, the nullpointer reference error
-- may not happen because we parse null to integer 0.
create type mytype_for_lateral_test as (x int, y int);
create table t1_lateral_limit(a int, b int, c mytype_for_lateral_test);
create table t2_lateral_limit(a int, b int);
insert into t1_lateral_limit values (1, 1, '(1,1)');
insert into t1_lateral_limit values (1, 2, '(2,2)');
insert into t2_lateral_limit values (2, 2);
insert into t2_lateral_limit values (3, 3);
explain select * from t1_lateral_limit as t1 cross join lateral
(select ((c).x+t2.b) as n from t2_lateral_limit as t2 order by n limit 1)s;
select * from t1_lateral_limit as t1 cross join lateral
(select ((c).x+t2.b) as n from t2_lateral_limit as t2 order by n limit 1)s;
-- Continue with the above cases, if the lateral subquery contains union all
-- and in some of its appendquerys contain limit, it may also lead to bad plan.
-- The best solution may be to walk the query to and do some static analysis
-- to find out which rel has to be gathered and materialized. But it is complicated
-- to do so and this seems less efficient. I believe in future we should do big
-- refactor to make greenplum support lateral well so now, let's just make sure
-- we will not panic.
explain (costs off) select * from t1_lateral_limit as t1 cross join lateral
((select ((c).x+t2.b) as n from t2_lateral_limit as t2 order by n limit 1) union all select 1)s;
select * from t1_lateral_limit as t1 cross join lateral
((select ((c).x+t2.b) as n from t2_lateral_limit as t2 order by n limit 1) union all select 1)s;
-- test lateral subquery contains group by (group-by is another place that
-- may add motions in the subquery's plan).
explain select * from t1_lateral_limit t1 cross join lateral
(select (c).x+t2.a, sum(t2.a+t2.b) from t2_lateral_limit t2 group by (c).x+t2.a)x;
select * from t1_lateral_limit t1 cross join lateral
(select (c).x+t2.a, sum(t2.a+t2.b) from t2_lateral_limit t2 group by (c).x+t2.a)x;
-- The following case is from Github Issue
-- https://github.com/greenplum-db/gpdb/issues/8860
-- It is the same issue as the above test suite.
create table t_mylog_issue_8860 (myid int, log_date timestamptz );
insert into t_mylog_issue_8860 values (1,timestamptz '2000-01-02 03:04'),(1,timestamptz '2000-01-02 03:04'-'1 hour'::interval);
insert into t_mylog_issue_8860 values (2,timestamptz '2000-01-02 03:04'),(2,timestamptz '2000-01-02 03:04'-'2 hour'::interval);
explain select ml1.myid, log_date as first_date, ml2.next_date from t_mylog_issue_8860 ml1
inner join lateral
(select myid, log_date as next_date
from t_mylog_issue_8860 where myid = ml1.myid and log_date > ml1.log_date order by log_date asc limit 1) ml2
on true;
select ml1.myid, log_date as first_date, ml2.next_date from t_mylog_issue_8860 ml1
inner join lateral
(select myid, log_date as next_date
from t_mylog_issue_8860 where myid = ml1.myid and log_date > ml1.log_date order by log_date asc limit 1) ml2
on true;
-- test prefetch join qual
-- we do not handle this correct
-- the only case we need to prefetch join qual is:
-- 1. outer plan contains motion
-- 2. the join qual contains subplan that contains motion
reset client_min_messages;
set Test_print_prefetch_joinqual = true;
-- prefetch join qual is only set correct for planner
set optimizer = off;
create table t1_test_pretch_join_qual(a int, b int, c int);
create table t2_test_pretch_join_qual(a int, b int, c int);
-- the following plan contains redistribute motion in both inner and outer plan
-- the join qual is t1.c > t2.c, it contains no motion, should not prefetch
explain (costs off) select * from t1_test_pretch_join_qual t1 join t2_test_pretch_join_qual t2
on t1.b = t2.b and t1.c > t2.c;
create table t3_test_pretch_join_qual(a int, b int, c int);
-- the following plan contains motion in both outer plan and join qual,
-- so we should prefetch join qual
explain (costs off) select * from t1_test_pretch_join_qual t1 join t2_test_pretch_join_qual t2
on t1.b = t2.b and t1.a > any (select sum(b) from t3_test_pretch_join_qual t3 where c > t2.a);
reset Test_print_prefetch_joinqual;
reset optimizer;
-- Github Issue: https://github.com/greenplum-db/gpdb/issues/9733
-- Previously in the function bring_to_outer_query and
-- bring_to_singleQE it depends on the path->param_info field
-- to determine if the path contains outerParams. This is not
-- enought. The following case would SegFault before because
-- the indexpath's orderby clause contains outerParams.
create table gist_tbl_github9733 (b box, p point, c circle);
insert into gist_tbl_github9733
select box(point(0.05*i, 0.05*i), point(0.05*i, 0.05*i)),
point(0.05*i, 0.05*i),
circle(point(0.05*i, 0.05*i), 1.0)
from generate_series(0,10000) as i;
vacuum analyze gist_tbl_github9733;
create index gist_tbl_point_index_github9733 on gist_tbl_github9733 using gist (p);
set enable_seqscan=off;
set enable_bitmapscan=off;
explain (costs off)
select p from
(values (box(point(0,0), point(0.5,0.5))),
(box(point(0.5,0.5), point(0.75,0.75))),
(box(point(0.8,0.8), point(1.0,1.0)))) as v(bb)
cross join lateral
(select p from gist_tbl_github9733 where p <@ bb order by p <-> bb[0] limit 2) ss;
reset enable_seqscan;
explain (costs off)
select p from
(values (box(point(0,0), point(0.5,0.5))),
(box(point(0.5,0.5), point(0.75,0.75))),
(box(point(0.8,0.8), point(1.0,1.0)))) as v(bb)
cross join lateral
(select p from gist_tbl_github9733 where p <@ bb order by p <-> bb[0] limit 2) ss;
select p from
(values (box(point(0,0), point(0.5,0.5))),
(box(point(0.5,0.5), point(0.75,0.75))),
(box(point(0.8,0.8), point(1.0,1.0)))) as v(bb)
cross join lateral
(select p from gist_tbl_github9733 where p <@ bb order by p <-> bb[0] limit 2) ss;
reset enable_bitmapscan;
---
--- Test that GUC enable_hashagg takes effect for SEMI join
---
drop table if exists foo;
drop table if exists bar;
create table foo(a int) distributed by (a);
create table bar(b int) distributed by (b);
insert into foo select i from generate_series(1,10)i;
insert into bar select i from generate_series(1,1000)i;
analyze foo;
analyze bar;
set enable_hashagg to on;
explain (costs off)
select * from foo where exists (select 1 from bar where foo.a = bar.b);
select * from foo where exists (select 1 from bar where foo.a = bar.b);
set enable_hashagg to off;
explain (costs off)
select * from foo where exists (select 1 from bar where foo.a = bar.b);
select * from foo where exists (select 1 from bar where foo.a = bar.b);
reset enable_hashagg;
drop table foo;
drop table bar;
-- Fix github issue 10012
create table fix_param_a (i int, j int);
create table fix_param_b (i int UNIQUE, j int);
create table fix_param_c (i int, j int);
insert into fix_param_a select i, i from generate_series(1,20)i;
insert into fix_param_b select i, i from generate_series(1,2000)i;
insert into fix_param_c select i, i from generate_series(1,2000)i;
analyze fix_param_a;
analyze fix_param_b;
analyze fix_param_c;
explain (costs off)
select * from fix_param_a left join fix_param_b on
fix_param_a.i = fix_param_b.i and fix_param_b.j in
(select j from fix_param_c where fix_param_b.i = fix_param_c.i)
order by 1;
select * from fix_param_a left join fix_param_b on
fix_param_a.i = fix_param_b.i and fix_param_b.j in
(select j from fix_param_c where fix_param_b.i = fix_param_c.i)
order by 1;
-- Test targetlist contains placeholder var
-- When creating a redistributed motion with hash keys,
-- Cloudberry planner will invoke `cdbpullup_findEclassInTargetList`.
-- The following test case contains non-strict function `coalesce`
-- in the subquery at nullable-side of outerjoin and thus will
-- have PlaceHolderVar in targetlist. The case is to test if
-- function `cdbpullup_findEclassInTargetList` handles PlaceHolderVar
-- correct.
-- See github issue: https://github.com/greenplum-db/gpdb/issues/10315
create table t_issue_10315 ( id1 int, id2 int );
insert into t_issue_10315 select i,i from generate_series(1, 2)i;
insert into t_issue_10315 select i,null from generate_series(1, 2)i;
insert into t_issue_10315 select null,i from generate_series(1, 2)i;
select * from
( select coalesce( bq.id1 ) id1, coalesce ( bq.id2 ) id2
from ( select r.id1, r.id2 from t_issue_10315 r group by r.id1, r.id2 ) bq ) t
full join ( select r.id1, r.id2 from t_issue_10315 r group by r.id1, r.id2 ) bq_all
on t.id1 = bq_all.id1 and t.id2 = bq_all.id2
full join ( select r.id1, r.id2 from t_issue_10315 r group by r.id1, r.id2 ) tq_all
on (coalesce(t.id1) = tq_all.id1 and t.id2 = tq_all.id2) ;
drop table t_issue_10315;
--
-- Left Join Pruning --
-- Cases when join will be pruned--
-- Single Unique key in inner relation --
create table fooJoinPruning (a int,b int,c int,constraint idx1 unique(a));
create table barJoinPruning (p int,q int,r int,constraint idx2 unique(p));
-- Unique key of inner relation ie 'p' is present in the join condition and is equal to a column from outer relation or is a constant --
explain select fooJoinPruning.* from fooJoinPruning left join barJoinPruning on barJoinPruning.p=100 where fooJoinPruning.b>300;
explain select fooJoinPruning.* from fooJoinPruning left join barJoinPruning on fooJoinPruning.c=barJoinPruning.p where fooJoinPruning.b>300;
-- Unique key of inner relation ie 'p' is present in the join condition and is equal to a column from outer relation and filter contains subquery--
explain select fooJoinPruning.* from fooJoinPruning left join barJoinPruning on fooJoinPruning.c=barJoinPruning.p where fooJoinPruning.b>300 and fooJoinPruning.c in (select barJoinPruning.q from barJoinPruning );
explain select fooJoinPruning.* from fooJoinPruning left join barJoinPruning on fooJoinPruning.c=barJoinPruning.p where fooJoinPruning.b>300 and fooJoinPruning.c > ANY (select barJoinPruning.q from barJoinPruning );
-- Unique key of inner relation ie 'p' is present in the join condition and is equal to a column from outer relation and filter contains corelated subquery referencing outer relation column--
explain select fooJoinPruning.* from fooJoinPruning left join barJoinPruning on fooJoinPruning.c=barJoinPruning.p where fooJoinPruning.b in (select fooJoinPruning.a from barJoinPruning);
drop table fooJoinPruning;
drop table barJoinPruning;
-- MultipleUnique key sets in inner relation --
create table fooJoinPruning (a int, b int, c int,d int,e int,f int,g int,constraint idx1 unique(a,b),constraint idx2 unique(a,c,d));
create table barJoinPruning (p int, q int, r int,s int,t int,u int,v int,constraint idx3 unique(p,q),constraint idx4 unique(p,r,s));
create table t1JoinPruning(m int primary key,n int);
create table t2JoinPruning(x int primary key,y int);
-- Unique key set of inner relation ie 'p,q' is present in the join condition and is equal to a column from outer relation or is a constant --
explain select fooJoinPruning.* from fooJoinPruning left join barJoinPruning on barJoinPruning.p=100 and barJoinPruning.q=200 where fooJoinPruning.e >300 and fooJoinPruning.f<>10;
explain select fooJoinPruning.* from fooJoinPruning left join barJoinPruning on fooJoinPruning.g=barJoinPruning.p and fooJoinPruning.a=barJoinPruning.q where fooJoinPruning.e >300 and fooJoinPruning.f<>10;
explain select fooJoinPruning.* from fooJoinPruning left join barJoinPruning on fooJoinPruning.g=barJoinPruning.p and barJoinPruning.q=100 where fooJoinPruning.e >300 and fooJoinPruning.f<>10;
-- Unique key set of inner relation ie 'p,r,s' is present in the join condition and is equal to a column from outer relation or is a constant --
explain select fooJoinPruning.* from fooJoinPruning left join barJoinPruning on fooJoinPruning.g=barJoinPruning.p and barJoinPruning.r=100 and fooJoinPruning.b=barJoinPruning.s where fooJoinPruning.f<>10;
explain select fooJoinPruning.* from fooJoinPruning left join barJoinPruning on fooJoinPruning.a=barJoinPruning.p and fooJoinPruning.b=barJoinPruning.r and fooJoinPruning.c=barJoinPruning.s and barJoinPruning.s=barJoinPruning.t where fooJoinPruning.b>300;
-- Unique key of inner relation ie 'p' is present in the join condition and is equal to a column from outer relation and filter contains subquery --
explain select fooJoinPruning.* from fooJoinPruning left join barJoinPruning on fooJoinPruning.g=barJoinPruning.p and fooJoinPruning.a=barJoinPruning.q where fooJoinPruning.c in (select barJoinPruning.t from barJoinPruning );
explain select fooJoinPruning.* from fooJoinPruning left join barJoinPruning on fooJoinPruning.g=barJoinPruning.p and fooJoinPruning.a=barJoinPruning.q where fooJoinPruning.c > ANY (select barJoinPruning.t from barJoinPruning );
-- Unique key of inner relation ie 'p' is present in the join condition and is equal to a column from outer relation and filter contains corelated subquery referencing outer relation column --
explain select fooJoinPruning.* from fooJoinPruning left join barJoinPruning on fooJoinPruning.g=barJoinPruning.p and fooJoinPruning.a=barJoinPruning.q where fooJoinPruning.e in (select fooJoinPruning.f from barJoinPruning);
-- Prunable Left join present in subquery --
explain select t1JoinPruning.n from t1JoinPruning where t1JoinPruning.m in (select fooJoinPruning.a from fooJoinPruning left join barJoinPruning on barJoinPruning.p=100 and barJoinPruning.q=200);
drop table fooJoinPruning;
drop table barJoinPruning;
drop table t1JoinPruning;
drop table t2JoinPruning;
create table t1 (a int);
create table t2 (a int primary key, b int);
create table t3 (a int primary key, b int);
-- inner table is join
EXPLAIN select t1.a from t1 left join (t2 join t3 on true) on t2.a=t1.a and t3.a=t1.a;
-- inner table has new left join
EXPLAIN select t1.* from t1 left join (t2 left join t3 on t3.a=t2.b) on t2.a=t1.a;
-- inner table is a derived table
EXPLAIN (COSTS OFF)
select t1.* from t1 left join
(
select t2.b as v2b, count(*) as v2c
from t2 left join t3 on t3.a=t2.b
group by t2.b
) v2
on v2.v2b=t1.a;
drop table t1;
drop table t2;
drop table t3;
--
-- Cases where join will not be pruned
--
-- Single Unique key in inner relation --
create table fooJoinPruning (a int,b int,c int,constraint idx1 unique(a));
create table barJoinPruning (p int,q int,r int,constraint idx2 unique(p));
-- Unique key of inner relation ie 'p' is present in the join condition and is equal to a column from outer relation but filter is on a inner relation --
explain select fooJoinPruning.* from fooJoinPruning left join barJoinPruning on barJoinPruning.p=fooJoinPruning.b where barJoinPruning.q<>10;
-- Unique key of inner relation ie 'p' is present in the join condition and is equal to a column from outer relation but output columns are from inner relation --
explain select barJoinPruning.* from fooJoinPruning left join barJoinPruning on barJoinPruning.p=fooJoinPruning.b where fooJoinPruning.b>1000;
-- Subquery present in join condition
explain select fooJoinPruning.* from fooJoinPruning left join barJoinPruning on barJoinPruning.p in (select fooJoinPruning.b from fooJoinPruning ) where fooJoinPruning.c>100;
-- Unique key of inner relation ie 'p' is present in the join condition and is equal to a column from outer relation and filter contains corelated subquery referencing inner relation column--
explain select fooJoinPruning.* from fooJoinPruning left join barJoinPruning on fooJoinPruning.c=barJoinPruning.p where fooJoinPruning.b in (select barJoinPruning.q from fooJoinPruning);
explain select fooJoinPruning.* from fooJoinPruning left join barJoinPruning on fooJoinPruning.c=barJoinPruning.p where fooJoinPruning.b in (select barJoinPruning.q from fooJoinPruning where fooJoinPruning.a=barJoinPruning.r);
drop table fooJoinPruning;
drop table barJoinPruning;
-- Multiple Unique key sets in inner relation --
create table fooJoinPruning (a int, b int, c int,d int,e int,f int,g int,constraint idx1 unique(a,b),constraint idx2 unique(a,c,d));
create table barJoinPruning (p int, q int, r int,s int,t int,u int,v int,constraint idx3 unique(p,q),constraint idx4 unique(p,r,s));
-- No equality operator present in join condition --
explain select fooJoinPruning.* from fooJoinPruning left join barJoinPruning on barJoinPruning.p>100 and barJoinPruning.q>200 where fooJoinPruning.b>300;
-- OR operator is present in join condition --
explain select fooJoinPruning.* from fooJoinPruning left join barJoinPruning on fooJoinPruning.a=barJoinPruning.p and fooJoinPruning.c=barJoinPruning.r or fooJoinPruning.d=barJoinPruning.s;
explain select fooJoinPruning.* from fooJoinPruning left join barJoinPruning on fooJoinPruning.a=barJoinPruning.p or fooJoinPruning.b=barJoinPruning.q where fooJoinPruning.b>300;
-- Not all unique keys of inner relation are equal to a constant or column from outer relation
explain select fooJoinPruning.* from fooJoinPruning left join barJoinPruning on fooJoinPruning.a=barJoinPruning.p and barJoinPruning.r=barJoinPruning.s;
explain select fooJoinPruning.* from fooJoinPruning left join barJoinPruning on fooJoinPruning.a=barJoinPruning.p and fooJoinPruning.b=barJoinPruning.r and barJoinPruning.s=barJoinPruning.t where fooJoinPruning.b>300;
drop table fooJoinPruning;
drop table barJoinPruning;
--
-- Cases where join under union
--
create table foo(a int primary key, b int,c int);
create table bar(a int primary key, b int,c int);
insert into foo values (1,1,10),(2,1,10),(3,2,20),(4,2,30),(5,2,30),(6,NULL,NULL),(7,NULL,3);
insert into bar values (1,1,10),(2,2,20),(3,NULL,NULL),(4,3,NULL),(5,1,10);
analyze foo,bar;
explain (costs off) select foo.a, bar.b from foo left join bar on foo.a = bar.a
union select foo.a, bar.b from foo join bar on foo.a = bar.a;
-------------------------------------
-- CASES WHERE JOIN WILL BE PRUNED --
-------------------------------------
--------------------------------------------------------------------------------
-- join under UNION
-- For the below query the output columns of both the CLogicalLeftOuterJoin
-- are from the outer relation, so we can prune both the joins
--------------------------------------------------------------------------------
explain (costs off) select foo.b,foo.c from foo left join bar on foo.a=bar.a union
select bar.b,bar.c from bar left join foo on foo.a=bar.a;
select foo.b,foo.c from foo left join bar on foo.a=bar.a union
select bar.b,bar.c from bar left join foo on foo.a=bar.a;
--------------------------------------------------------------------------------
-- join under UNION ALL
-- For the below query the output columns of both the CLogicalLeftOuterJoin
-- are from the outer relation, so we can prune both the joins
--------------------------------------------------------------------------------
explain (costs off) select foo.b,foo.c from foo left join bar on foo.a=bar.a union all
select bar.b,bar.c from bar left join foo on foo.a=bar.a;
select foo.b,foo.c from foo left join bar on foo.a=bar.a union all
select bar.b,bar.c from bar left join foo on foo.a=bar.a;
--------------------------------------------------------------------------------
-- join under INTERSECT
-- For the below query the output columns of both the CLogicalLeftOuterJoin
-- are from the outer relation, so we can prune both the joins
--------------------------------------------------------------------------------
explain (costs off) select foo.b,foo.c from foo left join bar on foo.a=bar.a intersect
select bar.b,bar.c from bar left join foo on foo.a=bar.a;
select foo.b,foo.c from foo left join bar on foo.a=bar.a intersect
select bar.b,bar.c from bar left join foo on foo.a=bar.a;
--------------------------------------------------------------------------------
-- join under INTERSECT ALL
-- For the below query the output columns of both the CLogicalLeftOuterJoin
-- are from the outer relation, so we can prune both the joins
--------------------------------------------------------------------------------
explain (costs off) select foo.b,foo.c from foo left join bar on foo.a=bar.a intersect all
select bar.b,bar.c from bar left join foo on foo.a=bar.a;
select foo.b,foo.c from foo left join bar on foo.a=bar.a intersect all
select bar.b,bar.c from bar left join foo on foo.a=bar.a;
--------------------------------------------------------------------------------
-- join under EXCEPT
-- For the below query the output columns of both the CLogicalLeftOuterJoin
-- are from the outer relation, so we can prune both the joins
--------------------------------------------------------------------------------
explain (costs off) select foo.b,foo.c from foo left join bar on foo.a=bar.a except
select bar.b,bar.c from bar left join foo on foo.a=bar.a;
select foo.b,foo.c from foo left join bar on foo.a=bar.a except
select bar.b,bar.c from bar left join foo on foo.a=bar.a;
--------------------------------------------------------------------------------
-- join under EXCEPT ALL
-- For the below query the output columns of both the CLogicalLeftOuterJoin
-- are from the outer relation, so we can prune both the joins
--------------------------------------------------------------------------------
explain (costs off) select foo.b,foo.c from foo left join bar on foo.a=bar.a except all
select bar.b,bar.c from bar left join foo on foo.a=bar.a;
select foo.b,foo.c from foo left join bar on foo.a=bar.a except all
select bar.b,bar.c from bar left join foo on foo.a=bar.a;
------------------------------------------
-- CASES WHERE JOIN WILL NOT BE PRUNED --
------------------------------------------
--------------------------------------------------------------------------------
-- join under UNION
-- For the below query since for the outer CLogicalLeftOuterJoin, all the output
-- columns are from the outer relation, the outer join can be pruned but for the
-- inner CLogicalLeftOuterJoin the output column contains columns from
-- inner relation.So the inner join can't be pruned.
--------------------------------------------------------------------------------
explain (costs off) select foo.b,foo.c from foo left join bar on foo.a=bar.a union
select bar.b,foo.c from bar left join foo on foo.a=bar.a;
select foo.b,foo.c from foo left join bar on foo.a=bar.a union
select bar.b,foo.c from bar left join foo on foo.a=bar.a;
--------------------------------------------------------------------------------
-- join under UNION ALL
-- For the below query since for the outer CLogicalLeftOuterJoin, all the output
-- columns are from the outer relation, the outer join can be pruned but for the
-- inner CLogicalLeftOuterJoin the output column contains columns from
-- inner relation.So the inner join can't be pruned.
--------------------------------------------------------------------------------
explain (costs off) select foo.b,foo.c from foo left join bar on foo.a=bar.a union all
select bar.b,foo.c from bar left join foo on foo.a=bar.a;
select foo.b,foo.c from foo left join bar on foo.a=bar.a union all
select bar.b,foo.c from bar left join foo on foo.a=bar.a;
--------------------------------------------------------------------------------
-- join under INTERSECT
-- For the below query since for the outer CLogicalLeftOuterJoin, all the output
-- columns are from the outer relation, the outer join can be pruned but for the
-- inner CLogicalLeftOuterJoin the output column contains columns from
-- inner relation.So the inner join can't be pruned.
--------------------------------------------------------------------------------
explain (costs off) select foo.b,foo.c from foo left join bar on foo.a=bar.a intersect
select bar.b,foo.c from bar left join foo on foo.a=bar.a;
select foo.b,foo.c from foo left join bar on foo.a=bar.a intersect
select bar.b,foo.c from bar left join foo on foo.a=bar.a;
--------------------------------------------------------------------------------
-- join under INTERSECT ALL
-- For the below query since for the outer CLogicalLeftOuterJoin, all the output
-- columns are from the outer relation, the outer join can be pruned but for the
-- inner CLogicalLeftOuterJoin the output column contains columns from
-- inner relation.So the inner join can't be pruned.
--------------------------------------------------------------------------------
explain (costs off) select foo.b,foo.c from foo left join bar on foo.a=bar.a intersect all
select bar.b,foo.c from bar left join foo on foo.a=bar.a;
select foo.b,foo.c from foo left join bar on foo.a=bar.a intersect all
select bar.b,foo.c from bar left join foo on foo.a=bar.a;
--------------------------------------------------------------------------------
-- join under EXCEPT
-- For the below query since for the outer CLogicalLeftOuterJoin, all the output
-- columns are from the outer relation, the outer join can be pruned but for the
-- inner CLogicalLeftOuterJoin the output column contains columns from
-- inner relation.So the inner join can't be pruned.
--------------------------------------------------------------------------------
explain (costs off) select foo.b,foo.c from foo left join bar on foo.a=bar.a except
select bar.b,foo.c from bar left join foo on foo.a=bar.a;
select foo.b,foo.c from foo left join bar on foo.a=bar.a except
select bar.b,foo.c from bar left join foo on foo.a=bar.a;
--------------------------------------------------------------------------------
-- join under EXCEPT ALL
-- For the below query since for the outer CLogicalLeftOuterJoin, all the output
-- columns are from the outer relation, the outer join can be pruned but for the
-- inner CLogicalLeftOuterJoin the output column contains columns from
-- inner relation.So the inner join can't be pruned.
--------------------------------------------------------------------------------
explain (costs off) select foo.b,foo.c from foo left join bar on foo.a=bar.a except all
select bar.b,foo.c from bar left join foo on foo.a=bar.a;
select foo.b,foo.c from foo left join bar on foo.a=bar.a except all
select bar.b,foo.c from bar left join foo on foo.a=bar.a;
drop table foo;
drop table bar;
-----------------------------------------------------------------
-- Test cases on Dynamic Partition Elimination(DPE) for Right Joins
-----------------------------------------------------------------
-- Note1 : DPE for Right join will happen if, all the following satisfy
-- Condition 1: Outer table is partition table
-- Condition 2: The partitioned column is same as distribution column
-- Condition 3: Join condition is on partitioned key of outer table
-- Note2 : To view the effect of DPE, the queries should be run with
-- "Explain Analyze ...". With it, the exact number of partitions scanned
-- will be shows in the plan.
-- Eg: explain analyze select * from bar_PT1 right join foo on bar_PT1.a1_PC =foo.a;
drop table if exists foo;
drop table if exists bar_PT1;
drop table if exists bar_PT2;
drop table if exists bar_PT3;
drop table if exists bar_List_PT1;
drop table if exists bar_List_PT2;
-- Table creation : Normal table
create table foo (a int , b int) distributed by (a);
insert into foo select i,i from generate_series(1,5)i;
analyze foo;
-- Table creation : First range Partitioned table with same 'Distribution Column' and 'Partitioning key'
create table bar_PT1 (a1_PC int, b1 int) partition by range(a1_PC) (start (1) inclusive end (12) every (2)) distributed by (a1_PC);
insert into bar_PT1 select i,i from generate_series(1,11)i;
analyze bar_PT1;
-- Table creation : Second range Partitioned table with different 'Distribution Column' and 'Partitioning key'
create table bar_PT2 (a2 int, b2_PC int) partition by range(b2_PC) (start (1) inclusive end (12) every (2)) distributed by (a2);
insert into bar_PT2 select i,i from generate_series(1,11)i;
analyze bar_PT2;
-- Table creation : Third range Partitioned table with same 'Distribution Column' and 'Partitioning key'
create table bar_PT3 (a3_PC int, b3 int) partition by range(a3_PC) (start (1) inclusive end (6) every (2))distributed by (a3_PC);
insert into bar_PT3 select i,i from generate_series(1,5)i;
analyze bar_PT3;
-- Table creation : First list Partitioned table with same 'Distribution Column' and 'Partitioning key'
create table bar_List_PT1 (a1_PC int, b1 int) partition by list(a1_PC)
(partition p1 values(1,2), partition p2 values(3,4), partition p3 values(5,6), partition p4 values(7,8), partition p5 values(9,10),
partition p6 values(11,12), partition p7 values(13,14), partition p8 values(15,16), partition p9 values(17,18), partition p10 values(19,20),
partition p11 values(21,22), partition p12 values(23,24), default partition pdefault) distributed by (a1_PC);
insert into bar_List_PT1 select i,i from generate_series(1,24)i;
analyze bar_List_PT1;
-- Table creation : Second list Partitioned table with same 'Distribution Column' and 'Partitioning key'
create table bar_List_PT2 (a2_PC int, b2 int) partition by list(a2_PC)
(partition p1 values(1,2), partition p2 values(3,4), partition p3 values(5,6), partition p4 values(7,8), partition p5 values(9,10),
partition p6 values(11,12), default partition pdefault) distributed by (a2_PC);
insert into bar_List_PT2 select i,i from generate_series(1,12)i;
analyze bar_List_PT2;
-- Case-1 : Distribution colm = Partition Key.
-- FOR RANGE PARTITIONED TABLE
-- Outer table: Partitioned table, Join Condition on Partition key: Yes, Result: DPE - YES
explain (costs off) select * from bar_PT1 right join foo on bar_PT1.a1_PC =foo.a;
select * from bar_PT1 right join foo on bar_PT1.a1_PC =foo.a;
-- Outer table: Partitioned table, Join Condition on Partition key: No, Result: DPE - No
explain (costs off) select * from bar_PT1 right join foo on bar_PT1.b1 =foo.a;
select * from bar_PT1 right join foo on bar_PT1.b1 =foo.a;
-- Outer,Inner table: Partitioned table, Join Condition on Partition key: Yes, Result: DPE - Yes
explain (costs off) select * from bar_PT1 right join bar_PT3 on bar_PT1.a1_PC =bar_PT3.a3_PC;
select * from bar_PT1 right join bar_PT3 on bar_PT1.a1_PC =bar_PT3.a3_PC;
-- Outer table: Not a Partitioned table, Join Condition on Partition key: Yes, Result: DPE - No
explain (costs off) select * from foo right join bar_PT1 on foo.a=bar_PT1.a1_PC;
select * from foo right join bar_PT1 on foo.a=bar_PT1.a1_PC;
-- Right join with predicate on the column of non partitioned table in 'where clause'.
-- Result: DPE - Yes,
explain (costs off) select * from bar_PT1 right join foo on bar_PT1.a1_PC =foo.a where foo.a>2;
select * from bar_PT1 right join foo on bar_PT1.a1_PC =foo.a where foo.a>2;
--Conjunction in join condition, Result: DPE - Yes
explain (costs off) select * from bar_PT1 right join foo on bar_PT1.a1_PC =foo.a and bar_PT1.b1 =foo.b;
select * from bar_PT1 right join foo on bar_PT1.a1_PC =foo.a and bar_PT1.b1 =foo.b;
explain (costs off) select * from bar_PT1 right join foo on bar_PT1.a1_PC =foo.a and foo.b>2;
select * from bar_PT1 right join foo on bar_PT1.a1_PC =foo.a and foo.b>2;
-- Multiple Right Joins, DPE- Yes
explain (costs off) select * from bar_PT1 right join foo on bar_PT1.a1_PC =foo.a right join bar_PT2 on bar_PT1.a1_PC =bar_PT2.b2_PC;
select * from bar_PT1 right join foo on bar_PT1.a1_PC =foo.a right join bar_PT2 on bar_PT1.a1_PC =bar_PT2.b2_PC;
-- FOR LIST PARTITIONED TABLE
-- Outer table: List Partitioned table, Join Condition on Partition key: Yes, Result: DPE - YES
explain (costs off) select * from bar_List_PT1 right join foo on bar_List_PT1.a1_PC =foo.a;
select * from bar_List_PT1 right join foo on bar_List_PT1.a1_PC =foo.a;
-- Outer,Inner table: Partitioned table, Join Condition on Partition key: Yes, Result: DPE - Yes
explain (costs off) select * from bar_List_PT1 right join bar_List_PT2 on bar_List_PT1.a1_PC =bar_List_PT2.a2_PC;
select * from bar_List_PT1 right join bar_List_PT2 on bar_List_PT1.a1_PC =bar_List_PT2.a2_PC;
-- Case-2 : Distribution colm <> Partition Key.
-- Outer table: Partitioned table, Join Condition on Partition key: Yes, Result: DPE - No
explain (costs off) select * from bar_PT2 right join foo on bar_PT2.b2_PC =foo.a;
select * from bar_PT2 right join foo on bar_PT2.b2_PC =foo.a;
-- Outer,Inner table: Partitioned table, Join Condition on Partition key: Yes, Result: DPE - No
explain (costs off) select * from bar_PT2 right join bar_PT1 on bar_PT2.b2_PC =bar_PT1.b1;
select * from bar_PT2 right join bar_PT1 on bar_PT2.b2_PC =bar_PT1.b1;
drop table if exists foo;
drop table if exists bar_PT1;
drop table if exists bar_PT2;
drop table if exists bar_PT3;
drop table if exists bar_List_PT1;
drop table if exists bar_List_PT2;
-- Test that left-anti-semi-join not-in works with netowrk types
CREATE TABLE inverse (cidr inet);
INSERT INTO inverse values ('192.168.100.199');
explain SELECT 1 FROM inverse WHERE NOT (cidr <<= ANY(SELECT * FROM inverse));
SELECT 1 FROM inverse WHERE NOT (cidr <<= ANY(SELECT * FROM inverse));
create table foo_varchar (a varchar(5)) distributed by (a);
create table bar_char (p char(5)) distributed by (p);
create table random_dis_varchar (x varchar(5)) distributed randomly;
create table random_dis_char (y char(5)) distributed randomly;
insert into foo_varchar values ('1 '),('2 '),('3 ');
insert into bar_char values ('1 '),('2 '),('3 ');
insert into random_dis_varchar values ('1 '),('2 '),('3 ');
insert into random_dis_char values ('1 '),('2 '),('3 ');
set optimizer_enable_hashjoin to off;
set enable_hashjoin to off;
set enable_nestloop to on;
-- check motion is added when performing a NL Left Outer Join between relations
-- when the join condition columns belong to different opfamily and both are
-- distribution keys
explain select * from foo_varchar left join bar_char on foo_varchar.a=bar_char.p;
select * from foo_varchar left join bar_char on foo_varchar.a=bar_char.p;
-- There is a plan change (from redistribution to broadcast) because a NULL
-- matching distribution is returned when there is opfamily mismatch between join
-- columns.
explain select * from foo_varchar left join random_dis_char on foo_varchar.a=random_dis_char.y;
select * from foo_varchar left join random_dis_char on foo_varchar.a=random_dis_char.y;
explain select * from bar_char left join random_dis_varchar on bar_char.p=random_dis_varchar.x;
select * from bar_char left join random_dis_varchar on bar_char.p=random_dis_varchar.x;
-- check motion is added when performing a NL Inner Join between relations when
-- the join condition columns belong to different opfamily and both are
-- distribution keys
explain select * from foo_varchar inner join bar_char on foo_varchar.a=bar_char.p;
select * from foo_varchar inner join bar_char on foo_varchar.a=bar_char.p;
-- There is a plan change (from redistribution to broadcast) because a NULL
-- matching distribution is returned when there is opfamily mismatch between join
-- columns.
explain select * from foo_varchar inner join random_dis_char on foo_varchar.a=random_dis_char.y;
select * from foo_varchar inner join random_dis_char on foo_varchar.a=random_dis_char.y;
explain select * from bar_char inner join random_dis_varchar on bar_char.p=random_dis_varchar.x;
select * from bar_char inner join random_dis_varchar on bar_char.p=random_dis_varchar.x;
drop table foo_varchar;
drop table bar_char;
drop table random_dis_varchar;
drop table random_dis_char;
set optimizer_enable_hashjoin to on;
reset enable_hashjoin;
reset enable_nestloop;
-----------------------------------------------------------------
-- Test cases to check if ORCA generates correct result
-- for "Left Semi Join with replicated outer table"
-----------------------------------------------------------------
drop table if exists repli_t1;
drop table if exists dist_t1;
create table repli_t1 (a int) distributed replicated;
insert into repli_t1 values(1);
analyze repli_t1;
create table dist_t1 (a int , b int) distributed by (a);
insert into dist_t1 select i, 1 from generate_series(1, 5) i;
analyze dist_t1;
-- No explicitly defined primary key for replicated table
---------------------------------------------------------
-- Outer - replicated, Inner - distributed table
explain (costs off) select * from repli_t1 where exists ( select 1 from dist_t1 where repli_t1.a >= dist_t1.b);
select * from repli_t1 where exists ( select 1 from dist_t1 where repli_t1.a >= dist_t1.b);
explain (costs off) select * from (select t1.a as aVal1, t2.a as aVal2 from repli_t1 as t1 , repli_t1 as t2 where t1.a = t2.a) as t3
where exists (select 1 from dist_t1 as t4 where t3.aVal1 >= t4.b);
select * from (select t1.a as aVal1, t2.a as aVal2 from repli_t1 as t1 , repli_t1 as t2 where t1.a = t2.a) as t3
where exists (select 1 from dist_t1 as t4 where t3.aVal1 >= t4.b);
create index idx_repl_t1_a ON repli_t1 using btree(a);
analyze repli_t1;
explain (costs off) select * from repli_t1 where exists ( select 1 from dist_t1 where repli_t1.a >= dist_t1.b);
select * from repli_t1 where exists ( select 1 from dist_t1 where repli_t1.a >= dist_t1.b);
drop index idx_repl_t1_a;
-- Outer - distributed, Inner - replicated table
explain (costs off) select * from dist_t1 where exists ( select 1 from repli_t1 where repli_t1.a >= dist_t1.b);
select * from dist_t1 where exists ( select 1 from repli_t1 where repli_t1.a >= dist_t1.b);
-- Both replicated table
explain (costs off) select * from repli_t1 as t1 where exists ( select 1 from repli_t1 as t2 where t1.a >= t2.a);
select * from repli_t1 as t1 where exists ( select 1 from repli_t1 as t2 where t1.a >= t2.a);
-- Outer - Universal, Inner - distributed table
explain (costs off) select * from generate_series(1, 5) univ_t where exists ( select 1 from dist_t1 where univ_t >= dist_t1.b);
select * from generate_series(1, 5) univ_t where exists ( select 1 from dist_t1 where univ_t >= dist_t1.b);
-- Outer - distributed, Inner - universal table
explain (costs off) select * from dist_t1 where exists ( select 1 from generate_series(1, 5) univ_t where univ_t >= dist_t1.b);
select * from dist_t1 where exists ( select 1 from generate_series(1, 5) univ_t where univ_t >= dist_t1.b);
-- Outer - replicated, Inner - universal table
explain (costs off)select * from repli_t1 where exists ( select 1 from generate_series(1, 5) univ_t where univ_t >= repli_t1.a);
select * from repli_t1 where exists ( select 1 from generate_series(1, 5) univ_t where univ_t >= repli_t1.a);
-- Outer - universal, Inner - replicated table
explain (costs off) select * from generate_series(1, 5) univ_t where exists ( select 1 from repli_t1 where univ_t >= repli_t1.a);
select * from generate_series(1, 5) univ_t where exists ( select 1 from repli_t1 where univ_t >= repli_t1.a);
-- Explicitly defined primary key for replicated table
---------------------------------------------------------
drop table if exists repli_t1_pk;
drop table if exists repli_t2_pk;
drop table if exists repli_t3_pk;
drop table if exists repli_t4_pk;
-- Outer - replicated, Inner - distributed table
create table repli_t1_pk (a int, PRIMARY KEY(a)) distributed replicated;
insert into repli_t1_pk values(1);
analyze repli_t1_pk;
create table repli_t2_pk (a int, CONSTRAINT key1_t2 PRIMARY KEY(a) ) distributed replicated;
insert into repli_t2_pk values(1);
analyze repli_t2_pk;
create table repli_t3_pk (a int,b int, c int, d int, CONSTRAINT key1_t3 UNIQUE (c,d)) distributed replicated;
insert into repli_t3_pk values(1,2,3,4);
analyze repli_t3_pk;
create table repli_t4_pk (a int,b int, c int, d int, CONSTRAINT key1_t4 PRIMARY KEY(a) , CONSTRAINT key2_t4 UNIQUE (c,d)) distributed replicated;
insert into repli_t4_pk values(1,2,3,4);
analyze repli_t4_pk;
explain (costs off) select * from repli_t1_pk where exists ( select 1 from dist_t1 where repli_t1_pk.a >= dist_t1.b);
select * from repli_t1_pk where exists ( select 1 from dist_t1 where repli_t1_pk.a >= dist_t1.b);
create index idx_repli_t1_pk_a ON repli_t1_pk using btree(a);
analyze repli_t1_pk;
explain (costs off) select * from repli_t1_pk where exists ( select 1 from dist_t1 where repli_t1_pk.a >= dist_t1.b);
select * from repli_t1_pk where exists ( select 1 from dist_t1 where repli_t1_pk.a >= dist_t1.b);
drop index idx_repli_t1_pk_a;
explain (costs off) select * from repli_t2_pk where exists ( select 1 from dist_t1 where repli_t2_pk.a >= dist_t1.b);
select * from repli_t2_pk where exists ( select 1 from dist_t1 where repli_t2_pk.a >= dist_t1.b);
explain (costs off) select * from repli_t3_pk where exists ( select 1 from dist_t1 where repli_t3_pk.a >= dist_t1.b);
select * from repli_t3_pk where exists ( select 1 from dist_t1 where repli_t3_pk.a >= dist_t1.b);
explain (costs off) select * from repli_t4_pk where exists ( select 1 from dist_t1 where repli_t4_pk.a >= dist_t1.b);
select * from repli_t4_pk where exists ( select 1 from dist_t1 where repli_t4_pk.a >= dist_t1.b);
drop table if exists repli_t1;
drop table if exists dist_t1;
drop table if exists repli_t1_pk;
drop table if exists repli_t2_pk;
drop table if exists repli_t3_pk;
drop table if exists repli_t4_pk;