| -- Extra GPDB tests for joins. |
| |
| -- Ignore "workfile compresssion is not supported by this build" (see |
| -- 'zlib' test): |
| -- |
| -- start_matchignore |
| -- m/ERROR: workfile compresssion is not supported by this build/ |
| -- end_matchignore |
| |
| -- |
| -- test numeric hash join |
| -- |
| |
| set enable_hashjoin to on; |
| set enable_mergejoin to off; |
| set enable_nestloop to off; |
| create table nhtest (i numeric(10, 2)); |
| insert into nhtest values(100000.22); |
| insert into nhtest values(300000.19); |
| explain select * from nhtest a join nhtest b using (i); |
| select * from nhtest a join nhtest b using (i); |
| |
| create temp table l(a int); |
| insert into l values (1), (1), (2); |
| select * from l l1 join l l2 on l1.a = l2.a left join l l3 on l1.a = l3.a and l1.a = 2 order by 1,2,3; |
| |
| -- |
| -- test hash join |
| -- |
| |
| create table hjtest (i int, j int); |
| insert into hjtest values(3, 4); |
| |
| select count(*) from hjtest a1, hjtest a2 where a2.i = least (a1.i,4) and a2.j = 4; |
| |
| -- |
| -- Test for correct behavior when there is a Merge Join on top of Materialize |
| -- on top of a Motion : |
| -- 1. Use FULL OUTER JOIN to induce a Merge Join |
| -- 2. Use a large tuple size to induce a Materialize |
| -- 3. Use gp_dist_random() to induce a Redistribute |
| -- |
| |
| set enable_hashjoin to off; |
| set enable_mergejoin to on; |
| set enable_nestloop to off; |
| |
| DROP TABLE IF EXISTS alpha; |
| DROP TABLE IF EXISTS theta; |
| |
| CREATE TABLE alpha (i int, j int); |
| CREATE TABLE theta (i int, j char(10000000)); |
| |
| INSERT INTO alpha values (1, 1), (2, 2); |
| INSERT INTO theta values (1, 'f'), (2, 'g'); |
| |
| SELECT * |
| FROM gp_dist_random('alpha') FULL OUTER JOIN gp_dist_random('theta') |
| ON (alpha.i = theta.i) |
| WHERE (alpha.j IS NULL or theta.j IS NULL); |
| |
| reset enable_hashjoin; |
| reset enable_mergejoin; |
| reset enable_nestloop; |
| |
| -- |
| -- Predicate propagation over equality conditions |
| -- |
| |
| drop schema if exists pred; |
| create schema pred; |
| set search_path=pred; |
| |
| create table t1 (x int, y int, z int); |
| create table t2 (x int, y int, z int); |
| insert into t1 select i, i, i from generate_series(1,100) i; |
| insert into t2 select * from t1; |
| |
| analyze t1; |
| analyze t2; |
| |
| -- |
| -- infer over equalities |
| -- |
| explain select count(*) from t1,t2 where t1.x = 100 and t1.x = t2.x; |
| select count(*) from t1,t2 where t1.x = 100 and t1.x = t2.x; |
| |
| -- |
| -- infer over >= |
| -- |
| explain select * from t1,t2 where t1.x = 100 and t2.x >= t1.x; |
| select * from t1,t2 where t1.x = 100 and t2.x >= t1.x; |
| |
| -- |
| -- multiple inferences |
| -- |
| |
| set optimizer_segments=2; |
| explain select * from t1,t2 where t1.x = 100 and t1.x = t2.y and t1.x <= t2.x; |
| reset optimizer_segments; |
| select * from t1,t2 where t1.x = 100 and t1.x = t2.y and t1.x <= t2.x; |
| |
| -- |
| -- MPP-18537: hash clause references a constant in outer child target list |
| -- |
| |
| create table hjn_test (i int, j int); |
| insert into hjn_test values(3, 4); |
| create table int4_tbl (f1 int); |
| insert into int4_tbl values(123456), (-2147483647), (0), (-123456), (2147483647); |
| select count(*) from hjn_test, (select 3 as bar) foo where hjn_test.i = least (foo.bar,4) and hjn_test.j = 4; |
| select count(*) from hjn_test, (select 3 as bar) foo where hjn_test.i = least (foo.bar,(array[4])[1]) and hjn_test.j = (array[4])[1]; |
| select count(*) from hjn_test, (select 3 as bar) foo where least (foo.bar,(array[4])[1]) = hjn_test.i and hjn_test.j = (array[4])[1]; |
| select count(*) from hjn_test, (select 3 as bar) foo where hjn_test.i = least (foo.bar, least(4,10)) and hjn_test.j = least(4,10); |
| select * from int4_tbl a join int4_tbl b on (a.f1 = (select f1 from int4_tbl c where c.f1=b.f1)); |
| |
| -- Same as the last query, but with a partitioned table (which requires a |
| -- Result node to do projection of the hash expression, as Append is not |
| -- projection-capable) |
| create table part4_tbl (f1 int4) partition by range (f1) (start(-1000000) end (1000000) every (1000000)); |
| insert into part4_tbl values |
| (-123457), (-123456), (-123455), |
| (-1), (0), (1), |
| (123455), (123456), (123457); |
| select * from part4_tbl a join part4_tbl b on (a.f1 = (select f1 from int4_tbl c where c.f1=b.f1)); |
| |
| -- |
| -- Test case where a Motion hash key is only needed for the redistribution, |
| -- and not returned in the final result set. There was a bug at one point where |
| -- tjoin.c1 was used as the hash key in a Motion node, but it was not added |
| -- to the sub-plans target list, causing a "variable not found in subplan |
| -- target list" error. |
| -- |
| create table tjoin1(dk integer, id integer); |
| create table tjoin2(dk integer, id integer, t text); |
| create table tjoin3(dk integer, id integer, t text); |
| |
| insert into tjoin1 values (1, 1), (1, 2), (1, 3), (2, 1), (2, 2), (2, 3); |
| insert into tjoin2 values (1, 1, '1-1'), (1, 2, '1-2'), (2, 1, '2-1'), (2, 2, '2-2'); |
| insert into tjoin3 values (1, 1, '1-1'), (2, 1, '2-1'); |
| |
| select tjoin1.id, tjoin2.t, tjoin3.t |
| from tjoin1 |
| left outer join (tjoin2 left outer join tjoin3 on tjoin2.id=tjoin3.id) on tjoin1.id=tjoin3.id; |
| |
| |
| set enable_hashjoin to off; |
| set optimizer_enable_hashjoin = off; |
| |
| select count(*) from hjn_test, (select 3 as bar) foo where hjn_test.i = least (foo.bar,4) and hjn_test.j = 4; |
| select count(*) from hjn_test, (select 3 as bar) foo where hjn_test.i = least (foo.bar,(array[4])[1]) and hjn_test.j = (array[4])[1]; |
| select count(*) from hjn_test, (select 3 as bar) foo where least (foo.bar,(array[4])[1]) = hjn_test.i and hjn_test.j = (array[4])[1]; |
| select count(*) from hjn_test, (select 3 as bar) foo where hjn_test.i = least (foo.bar, least(4,10)) and hjn_test.j = least(4,10); |
| select * from int4_tbl a join int4_tbl b on (a.f1 = (select f1 from int4_tbl c where c.f1=b.f1)); |
| |
| reset enable_hashjoin; |
| reset optimizer_enable_hashjoin; |
| |
| -- In case of Left Anti Semi Join, if the left rel is empty a dummy join |
| -- should be created |
| drop table if exists foo; |
| drop table if exists bar; |
| create table foo (a int, b int); |
| create table bar (c int, d int); |
| insert into foo select generate_series(1,10); |
| insert into bar select generate_series(1,10); |
| |
| explain select a from foo where a<1 and a>1 and not exists (select c from bar where c=a); |
| select a from foo where a<1 and a>1 and not exists (select c from bar where c=a); |
| |
| -- The merge join executor code doesn't support LASJ_NOTIN joins. Make sure |
| -- the planner doesn't create an invalid plan with them. |
| create index index_foo on foo (a); |
| create index index_bar on bar (c); |
| |
| set enable_nestloop to off; |
| set enable_hashjoin to off; |
| set enable_mergejoin to on; |
| |
| select * from foo where a not in (select c from bar where c <= 5); |
| |
| set enable_nestloop to off; |
| set enable_hashjoin to on; |
| set enable_mergejoin to off; |
| |
| create table dept |
| ( |
| id int, |
| pid int, |
| name char(40) |
| ); |
| |
| insert into dept values(3, 0, 'root'); |
| insert into dept values(4, 3, '2<-1'); |
| insert into dept values(5, 4, '3<-2<-1'); |
| insert into dept values(6, 4, '4<-2<-1'); |
| insert into dept values(7, 3, '5<-1'); |
| insert into dept values(8, 7, '5<-1'); |
| insert into dept select i, i % 6 + 3 from generate_series(9,50) as i; |
| insert into dept select i, 99 from generate_series(100,15000) as i; |
| |
| ANALYZE dept; |
| |
| -- Test rescannable hashjoin with spilling hashtable |
| set statement_mem='1000kB'; |
| set gp_workfile_compression = off; |
| WITH RECURSIVE subdept(id, parent_department, name) AS |
| ( |
| -- non recursive term |
| SELECT * FROM dept WHERE name = 'root' |
| UNION ALL |
| -- recursive term |
| SELECT d.* FROM dept AS d, subdept AS sd |
| WHERE d.pid = sd.id |
| ) |
| SELECT count(*) FROM subdept; |
| |
| -- Test rescannable hashjoin with spilling hashtable, with compression |
| set gp_workfile_compression = on; |
| WITH RECURSIVE subdept(id, parent_department, name) AS |
| ( |
| -- non recursive term |
| SELECT * FROM dept WHERE name = 'root' |
| UNION ALL |
| -- recursive term |
| SELECT d.* FROM dept AS d, subdept AS sd |
| WHERE d.pid = sd.id |
| ) |
| SELECT count(*) FROM subdept; |
| |
| -- Test rescannable hashjoin with in-memory hashtable |
| reset statement_mem; |
| WITH RECURSIVE subdept(id, parent_department, name) AS |
| ( |
| -- non recursive term |
| SELECT * FROM dept WHERE name = 'root' |
| UNION ALL |
| -- recursive term |
| SELECT d.* FROM dept AS d, subdept AS sd |
| WHERE d.pid = sd.id |
| ) |
| SELECT count(*) FROM subdept; |
| |
| |
| -- MPP-29458 |
| -- When we join on a clause with two different types. If one table distribute by one type, the query plan |
| -- will redistribute data on another type. But the has values of two types would not be equal. The data will |
| -- redistribute to wrong segments. |
| create table test_timestamp_t1 (id numeric(10,0) ,field_dt date); |
| create table test_timestamp_t2 (id numeric(10,0),field_tms timestamp without time zone); |
| |
| insert into test_timestamp_t1 values(10 ,'2018-1-10'); |
| insert into test_timestamp_t1 values(11 ,'2018-1-11'); |
| insert into test_timestamp_t2 values(10 ,'2018-1-10'::timestamp); |
| insert into test_timestamp_t2 values(11 ,'2018-1-11'::timestamp); |
| |
| -- Test nest loop redistribute keys |
| set enable_nestloop to on; |
| set enable_hashjoin to on; |
| set enable_mergejoin to on; |
| select count(*) from test_timestamp_t1 t1 ,test_timestamp_t2 t2 where T1.id = T2.id and T1.field_dt = t2.field_tms; |
| |
| -- Test hash join redistribute keys |
| set enable_nestloop to off; |
| set enable_hashjoin to on; |
| set enable_mergejoin to on; |
| select count(*) from test_timestamp_t1 t1 ,test_timestamp_t2 t2 where T1.id = T2.id and T1.field_dt = t2.field_tms; |
| |
| drop table test_timestamp_t1; |
| drop table test_timestamp_t2; |
| |
| -- Test merge join redistribute keys |
| create table test_timestamp_t1 (id numeric(10,0) ,field_dt date); |
| |
| create table test_timestamp_t2 (id numeric(10,0),field_tms timestamp without time zone); |
| |
| insert into test_timestamp_t1 values(10 ,'2018-1-10'); |
| insert into test_timestamp_t1 values(11 ,'2018-1-11'); |
| insert into test_timestamp_t2 values(10 ,'2018-1-10'::timestamp); |
| insert into test_timestamp_t2 values(11 ,'2018-1-11'::timestamp); |
| |
| select * from test_timestamp_t1 t1 full outer join test_timestamp_t2 t2 on T1.id = T2.id and T1.field_dt = t2.field_tms; |
| |
| -- test float type |
| set enable_nestloop to off; |
| set enable_hashjoin to on; |
| set enable_mergejoin to on; |
| create table test_float1(id int, data float4); |
| create table test_float2(id int, data float8); |
| insert into test_float1 values(1, 10), (2, 20); |
| insert into test_float2 values(3, 10), (4, 20); |
| select t1.id, t1.data, t2.id, t2.data from test_float1 t1, test_float2 t2 where t1.data = t2.data; |
| |
| -- test int type |
| create table test_int1(id int, data int4); |
| create table test_int2(id int, data int8); |
| insert into test_int1 values(1, 10), (2, 20); |
| insert into test_int2 values(3, 10), (4, 20); |
| select t1.id, t1.data, t2.id, t2.data from test_int1 t1, test_int2 t2 where t1.data = t2.data; |
| |
| -- Test to ensure that for full outer join on varchar columns, planner is successful in finding a sort operator in the catalog |
| create table input_table(a varchar(30), b varchar(30)); |
| set enable_hashjoin = off; |
| explain (costs off) select X.a from input_table X full join (select a from input_table) Y ON X.a = Y.a; |
| |
| -- Cleanup |
| reset enable_hashjoin; |
| set client_min_messages='warning'; -- silence drop-cascade NOTICEs |
| drop schema pred cascade; |
| reset search_path; |
| |
| -- github issue 5370 cases |
| drop table if exists t5370; |
| drop table if exists t5370_2; |
| create table t5370(id int,name text); |
| insert into t5370 select i,i from generate_series(1,1000) i; |
| create table t5370_2 as select * from t5370; |
| analyze t5370_2; |
| analyze t5370; |
| explain select * from t5370 a , t5370_2 b where a.name=b.name; |
| |
| drop table t5370; |
| drop table t5370_2; |
| |
| -- github issue 6215 cases |
| -- When executing the following plan |
| -- ``` |
| -- Gather Motion 1:1 (slice1; segments: 1) |
| -- -> Merge Full Join |
| -- -> Seq Scan on int4_tbl a |
| -- -> Seq Scan on int4_tbl b |
| --``` |
| -- Cloudberry will raise an Assert Fail. |
| -- We force adding a material node for |
| -- merge full join on true. |
| drop table if exists t6215; |
| create table t6215(f1 int4); |
| insert into t6215(f1) values (1), (2), (3); |
| |
| set enable_material = off; |
| -- The plan still have Material operator |
| explain (costs off) select * from t6215 a full join t6215 b on true; |
| select * from t6215 a full join t6215 b on true; |
| |
| drop table t6215; |
| |
| |
| -- |
| -- This tripped an assertion while deciding the locus for the joins. |
| -- The code was failing to handle join between SingleQE and Hash correctly, |
| -- when there were join order restricitions. (see |
| -- https://github.com/greenplum-db/gpdb/issues/6643 |
| -- |
| select a.f1, b.f1, t.thousand, t.tenthous from |
| (select sum(f1) as f1 from int4_tbl i4b) b |
| left outer join |
| (select sum(f1)+1 as f1 from int4_tbl i4a) a ON a.f1 = b.f1 |
| left outer join |
| tenk1 t ON b.f1 = t.thousand and (a.f1+b.f1+999) = t.tenthous; |
| |
| |
| -- tests to ensure that join reordering of LOJs and inner joins produces the |
| -- correct join predicates & residual filters |
| drop table if exists t1, t2, t3; |
| CREATE TABLE t1 (a int, b int, c int); |
| CREATE TABLE t2 (a int, b int, c int); |
| CREATE TABLE t3 (a int, b int, c int); |
| INSERT INTO t1 SELECT i, i, i FROM generate_series(1, 1000) i; |
| INSERT INTO t2 SELECT i, i, i FROM generate_series(2, 1000) i; -- start from 2 so that one row from t1 doesn't match |
| INSERT INTO t3 VALUES (1, 2, 3), (NULL, 2, 2); |
| ANALYZE t1; |
| ANALYZE t2; |
| ANALYZE t3; |
| |
| -- ensure plan has a filter over left outer join |
| explain (costs off) select * from t1 left join t2 on (t1.a = t2.a) join t3 on (t1.b = t3.b) where (t2.a IS NULL OR (t1.c = t3.c)); |
| select * from t1 left join t2 on (t1.a = t2.a) join t3 on (t1.b = t3.b) where (t2.a IS NULL OR (t1.c = t3.c)); |
| |
| -- ensure plan has two inner joins with the where clause & join predicates ANDed |
| explain (costs off) select * from t1 left join t2 on (t1.a = t2.a) join t3 on (t1.b = t3.b) where (t2.a = t3.a); |
| select * from t1 left join t2 on (t1.a = t2.a) join t3 on (t1.b = t3.b) where (t2.a = t3.a); |
| |
| -- ensure plan has a filter over left outer join |
| explain (costs off) select * from t1 left join t2 on (t1.a = t2.a) join t3 on (t1.b = t3.b) where (t2.a is distinct from t3.a); |
| select * from t1 left join t2 on (t1.a = t2.a) join t3 on (t1.b = t3.b) where (t2.a is distinct from t3.a); |
| |
| -- ensure plan has a filter over left outer join |
| explain select * from t3 join (select t1.a t1a, t1.b t1b, t1.c t1c, t2.a t2a, t2.b t2b, t2.c t2c from t1 left join t2 on (t1.a = t2.a)) t on (t1a = t3.a) WHERE (t2a IS NULL OR (t1c = t3.a)); |
| select * from t3 join (select t1.a t1a, t1.b t1b, t1.c t1c, t2.a t2a, t2.b t2b, t2.c t2c from t1 left join t2 on (t1.a = t2.a)) t on (t1a = t3.a) WHERE (t2a IS NULL OR (t1c = t3.a)); |
| |
| -- ensure plan has a filter over left outer join |
| explain select * from (select t1.a t1a, t1.b t1b, t2.a t2a, t2.b t2b from t1 left join t2 on t1.a = t2.a) tt |
| join t3 on tt.t1b = t3.b |
| join (select t1.a t1a, t1.b t1b, t2.a t2a, t2.b t2b from t1 left join t2 on t1.a = t2.a) tt1 on tt1.t1b = t3.b |
| join t3 t3_1 on tt1.t1b = t3_1.b and (tt1.t2a is NULL OR tt1.t1b = t3.b); |
| |
| select * from (select t1.a t1a, t1.b t1b, t2.a t2a, t2.b t2b from t1 left join t2 on t1.a = t2.a) tt |
| join t3 on tt.t1b = t3.b |
| join (select t1.a t1a, t1.b t1b, t2.a t2a, t2.b t2b from t1 left join t2 on t1.a = t2.a) tt1 on tt1.t1b = t3.b |
| join t3 t3_1 on tt1.t1b = t3_1.b and (tt1.t2a is NULL OR tt1.t1b = t3.b); |
| |
| -- test different join order enumeration methods |
| set optimizer_join_order = query; |
| select * from t1 join t2 on t1.a = t2.a join t3 on t1.b = t3.b; |
| set optimizer_join_order = greedy; |
| select * from t1 join t2 on t1.a = t2.a join t3 on t1.b = t3.b; |
| set optimizer_join_order = exhaustive; |
| select * from t1 join t2 on t1.a = t2.a join t3 on t1.b = t3.b; |
| set optimizer_join_order = exhaustive2; |
| select * from t1 join t2 on t1.a = t2.a join t3 on t1.b = t3.b; |
| reset optimizer_join_order; |
| select * from t1 join t2 on t1.a = t2.a join t3 on t1.b = t3.b; |
| |
| drop table t1, t2, t3; |
| |
| -- |
| -- Test a bug that nestloop path previously can not generate motion above |
| -- index path, which sometimes is wrong (this test case is an example). |
| -- We now depend on parameterized path related variables to judge instead. |
| -- We conservatively disallow motion when there is parameter requirement |
| -- for either outer or inner at this moment though there could be room |
| -- for further improvement (e.g. referring subplan code to do broadcast |
| -- for base rel if needed, which needs much effort and does not seem to |
| -- be deserved given we will probably refactor related code for the lateral |
| -- support in the near future). For the query and guc settings below, Postgres |
| -- planner can not generate a plan. |
| set enable_nestloop = 1; |
| set enable_material = 0; |
| set enable_seqscan = 0; |
| set enable_bitmapscan = 0; |
| explain select tenk1.unique2 >= 0 from tenk1 left join tenk2 on true limit 1; |
| select tenk1.unique2 >= 0 from tenk1 left join tenk2 on true limit 1; |
| reset enable_nestloop; |
| reset enable_material; |
| reset enable_seqscan; |
| reset enable_bitmapscan; |
| |
| -- Below test cases are for planner's cdbpath_motion_for_join, so we close |
| -- ORCA temporarily. |
| set optimizer = off; |
| -- test outer join for general locus |
| -- replicated table's locus is SegmentGeneral |
| create table trep_join_gp (c1 int, c2 int); |
| -- hash distributed table's locus is Hash |
| create table thash_join_gp (c1 int, c2 int); |
| -- randomly distributed table's locus is Strewn |
| create table trand_join_gp (c1 int, c2 int); |
| -- start_ignore |
| create extension if not exists gp_debug_numsegments; |
| select gp_debug_set_create_table_default_numsegments(1); |
| -- end_ignore |
| -- the following replicated table's numsegments is 1 |
| create table trep1_join_gp (c1 int, c2 int); |
| |
| insert into trep_join_gp values (1, 1), (2, 2); |
| insert into thash_join_gp values (1, 1), (2, 2); |
| insert into trep1_join_gp values (1, 1), (2, 2); |
| |
| analyze trep_join_gp; |
| analyze thash_join_gp; |
| analyze trep1_join_gp; |
| analyze trand_join_gp; |
| |
| -- This test is to check that: general left join segmentGeneral --> segmentGeneral |
| -- And segmentGeneral join hash does not need motion. |
| explain select * from generate_series(1, 5) g left join trep_join_gp on g = trep_join_gp.c1 join thash_join_gp on true; |
| select * from generate_series(1, 5) g left join trep_join_gp on g = trep_join_gp.c1 join thash_join_gp on true; |
| |
| -- The following 4 tests are to check that general left join partition, we could redistribute the |
| -- general-locus relation when the filter condition is suitable. If we can redistributed |
| -- general-locus relation, we should not gather them to singleQE. |
| explain select * from generate_series(1, 5) g left join thash_join_gp on g = thash_join_gp.c1; |
| select * from generate_series(1, 5) g left join thash_join_gp on g = thash_join_gp.c1; |
| |
| explain select * from generate_series(1, 5) g left join thash_join_gp on g = thash_join_gp.c2; |
| select * from generate_series(1, 5) g left join thash_join_gp on g = thash_join_gp.c2; |
| |
| explain select * from generate_series(1, 5) g left join trand_join_gp on g = trand_join_gp.c1; |
| select * from generate_series(1, 5) g left join trand_join_gp on g = trand_join_gp.c1; |
| |
| explain select * from generate_series(1, 5) g full join trand_join_gp on g = trand_join_gp.c1; |
| select * from generate_series(1, 5) g full join trand_join_gp on g = trand_join_gp.c1; |
| |
| -- The following 3 tests are to check that segmentGeneral left join partition |
| -- we could redistribute the segment general-locus relation when the filter condition |
| -- is suitable. If we can redistributed general-locus relation, we should not |
| -- gather them to singleQE. |
| explain select * from trep_join_gp left join thash_join_gp using (c1); |
| select * from trep_join_gp left join thash_join_gp using (c1); |
| |
| explain select * from trep_join_gp left join trand_join_gp using (c1); |
| select * from trep_join_gp left join trand_join_gp using (c1); |
| |
| explain select * from trep1_join_gp join thash_join_gp using (c1); |
| select * from trep1_join_gp join thash_join_gp using (c1); |
| |
| drop table trep_join_gp; |
| drop table thash_join_gp; |
| drop table trand_join_gp; |
| drop table trep1_join_gp; |
| |
| -- select gp_debug_set_create_table_default_numsegments(3); |
| |
| reset optimizer; |
| |
| -- The following cases are to test planner join size estimation |
| -- so we need optimizer to be off. |
| -- When a partition table join other table using partition key, |
| -- planner should use root table's stat info instead of largest |
| -- partition's. |
| reset enable_hashjoin; |
| reset enable_mergejoin; |
| reset enable_nestloop; |
| |
| create table t_joinsize_1 (c1 int, c2 int) |
| partition by range (c2) |
| ( start (0) end (5) every (1), |
| default partition extra ); |
| |
| create table t_joinsize_2 (c1 int, c2 int); |
| |
| create table t_joinsize_3 (c int); |
| |
| insert into t_joinsize_1 select i, i%5 from generate_series(1, 200)i; |
| insert into t_joinsize_1 select 1, null from generate_series(1, 1000); |
| |
| insert into t_joinsize_2 select i,i%5 from generate_series(1, 1000)i; |
| insert into t_joinsize_3 select * from generate_series(1, 100); |
| |
| analyze t_joinsize_1; |
| analyze t_joinsize_2; |
| analyze t_joinsize_3; |
| |
| -- the following query should not broadcast the join result of t_joinsize_1, t_joinsize_2. |
| explain select * from (t_joinsize_1 join t_joinsize_2 on t_joinsize_1.c2 = t_joinsize_2.c2) join t_joinsize_3 on t_joinsize_3.c = t_joinsize_1.c1; |
| |
| drop table t_joinsize_1; |
| drop table t_joinsize_2; |
| drop table t_joinsize_3; |
| |
| -- test if subquery locus is general, then |
| -- we should keep it general |
| create table t_randomly_dist_table(c int); |
| |
| -- force_explain |
| -- the following plan should not contain redistributed motion (for planner) |
| explain |
| select * from ( |
| select a from generate_series(1, 10)a |
| union all |
| select a from generate_series(1, 10)a |
| ) t_subquery_general |
| join t_randomly_dist_table on t_subquery_general.a = t_randomly_dist_table.c; |
| |
| drop table t_randomly_dist_table; |
| |
| |
| -- test lateral join inner plan contains limit |
| -- we cannot pass params across motion so we |
| -- can only generate a plan to gather all the |
| -- data to singleQE. Here we create a compound |
| -- data type as params to pass into inner plan. |
| -- By doing so, if we fail to pass correct params |
| -- into innerplan, it will throw error because |
| -- of nullpointer reference. If we only use int |
| -- type as params, the nullpointer reference error |
| -- may not happen because we parse null to integer 0. |
| |
| create type mytype_for_lateral_test as (x int, y int); |
| create table t1_lateral_limit(a int, b int, c mytype_for_lateral_test); |
| create table t2_lateral_limit(a int, b int); |
| insert into t1_lateral_limit values (1, 1, '(1,1)'); |
| insert into t1_lateral_limit values (1, 2, '(2,2)'); |
| insert into t2_lateral_limit values (2, 2); |
| insert into t2_lateral_limit values (3, 3); |
| |
| explain select * from t1_lateral_limit as t1 cross join lateral |
| (select ((c).x+t2.b) as n from t2_lateral_limit as t2 order by n limit 1)s; |
| |
| select * from t1_lateral_limit as t1 cross join lateral |
| (select ((c).x+t2.b) as n from t2_lateral_limit as t2 order by n limit 1)s; |
| |
| -- Continue with the above cases, if the lateral subquery contains union all |
| -- and in some of its appendquerys contain limit, it may also lead to bad plan. |
| -- The best solution may be to walk the query to and do some static analysis |
| -- to find out which rel has to be gathered and materialized. But it is complicated |
| -- to do so and this seems less efficient. I believe in future we should do big |
| -- refactor to make greenplum support lateral well so now, let's just make sure |
| -- we will not panic. |
| explain (costs off) select * from t1_lateral_limit as t1 cross join lateral |
| ((select ((c).x+t2.b) as n from t2_lateral_limit as t2 order by n limit 1) union all select 1)s; |
| |
| select * from t1_lateral_limit as t1 cross join lateral |
| ((select ((c).x+t2.b) as n from t2_lateral_limit as t2 order by n limit 1) union all select 1)s; |
| |
| -- test lateral subquery contains group by (group-by is another place that |
| -- may add motions in the subquery's plan). |
| explain select * from t1_lateral_limit t1 cross join lateral |
| (select (c).x+t2.a, sum(t2.a+t2.b) from t2_lateral_limit t2 group by (c).x+t2.a)x; |
| |
| select * from t1_lateral_limit t1 cross join lateral |
| (select (c).x+t2.a, sum(t2.a+t2.b) from t2_lateral_limit t2 group by (c).x+t2.a)x; |
| |
| -- The following case is from Github Issue |
| -- https://github.com/greenplum-db/gpdb/issues/8860 |
| -- It is the same issue as the above test suite. |
| create table t_mylog_issue_8860 (myid int, log_date timestamptz ); |
| insert into t_mylog_issue_8860 values (1,timestamptz '2000-01-02 03:04'),(1,timestamptz '2000-01-02 03:04'-'1 hour'::interval); |
| insert into t_mylog_issue_8860 values (2,timestamptz '2000-01-02 03:04'),(2,timestamptz '2000-01-02 03:04'-'2 hour'::interval); |
| |
| explain select ml1.myid, log_date as first_date, ml2.next_date from t_mylog_issue_8860 ml1 |
| inner join lateral |
| (select myid, log_date as next_date |
| from t_mylog_issue_8860 where myid = ml1.myid and log_date > ml1.log_date order by log_date asc limit 1) ml2 |
| on true; |
| |
| select ml1.myid, log_date as first_date, ml2.next_date from t_mylog_issue_8860 ml1 |
| inner join lateral |
| (select myid, log_date as next_date |
| from t_mylog_issue_8860 where myid = ml1.myid and log_date > ml1.log_date order by log_date asc limit 1) ml2 |
| on true; |
| |
| -- test prefetch join qual |
| -- we do not handle this correct |
| -- the only case we need to prefetch join qual is: |
| -- 1. outer plan contains motion |
| -- 2. the join qual contains subplan that contains motion |
| reset client_min_messages; |
| set Test_print_prefetch_joinqual = true; |
| -- prefetch join qual is only set correct for planner |
| set optimizer = off; |
| |
| create table t1_test_pretch_join_qual(a int, b int, c int); |
| create table t2_test_pretch_join_qual(a int, b int, c int); |
| |
| -- the following plan contains redistribute motion in both inner and outer plan |
| -- the join qual is t1.c > t2.c, it contains no motion, should not prefetch |
| explain (costs off) select * from t1_test_pretch_join_qual t1 join t2_test_pretch_join_qual t2 |
| on t1.b = t2.b and t1.c > t2.c; |
| |
| create table t3_test_pretch_join_qual(a int, b int, c int); |
| |
| -- the following plan contains motion in both outer plan and join qual, |
| -- so we should prefetch join qual |
| explain (costs off) select * from t1_test_pretch_join_qual t1 join t2_test_pretch_join_qual t2 |
| on t1.b = t2.b and t1.a > any (select sum(b) from t3_test_pretch_join_qual t3 where c > t2.a); |
| |
| reset Test_print_prefetch_joinqual; |
| reset optimizer; |
| |
| -- Github Issue: https://github.com/greenplum-db/gpdb/issues/9733 |
| -- Previously in the function bring_to_outer_query and |
| -- bring_to_singleQE it depends on the path->param_info field |
| -- to determine if the path contains outerParams. This is not |
| -- enought. The following case would SegFault before because |
| -- the indexpath's orderby clause contains outerParams. |
| create table gist_tbl_github9733 (b box, p point, c circle); |
| insert into gist_tbl_github9733 |
| select box(point(0.05*i, 0.05*i), point(0.05*i, 0.05*i)), |
| point(0.05*i, 0.05*i), |
| circle(point(0.05*i, 0.05*i), 1.0) |
| from generate_series(0,10000) as i; |
| vacuum analyze gist_tbl_github9733; |
| create index gist_tbl_point_index_github9733 on gist_tbl_github9733 using gist (p); |
| set enable_seqscan=off; |
| set enable_bitmapscan=off; |
| explain (costs off) |
| select p from |
| (values (box(point(0,0), point(0.5,0.5))), |
| (box(point(0.5,0.5), point(0.75,0.75))), |
| (box(point(0.8,0.8), point(1.0,1.0)))) as v(bb) |
| cross join lateral |
| (select p from gist_tbl_github9733 where p <@ bb order by p <-> bb[0] limit 2) ss; |
| |
| reset enable_seqscan; |
| explain (costs off) |
| select p from |
| (values (box(point(0,0), point(0.5,0.5))), |
| (box(point(0.5,0.5), point(0.75,0.75))), |
| (box(point(0.8,0.8), point(1.0,1.0)))) as v(bb) |
| cross join lateral |
| (select p from gist_tbl_github9733 where p <@ bb order by p <-> bb[0] limit 2) ss; |
| |
| select p from |
| (values (box(point(0,0), point(0.5,0.5))), |
| (box(point(0.5,0.5), point(0.75,0.75))), |
| (box(point(0.8,0.8), point(1.0,1.0)))) as v(bb) |
| cross join lateral |
| (select p from gist_tbl_github9733 where p <@ bb order by p <-> bb[0] limit 2) ss; |
| |
| reset enable_bitmapscan; |
| |
| --- |
| --- Test that GUC enable_hashagg takes effect for SEMI join |
| --- |
| drop table if exists foo; |
| drop table if exists bar; |
| |
| create table foo(a int); |
| create table bar(b int); |
| |
| insert into foo select i from generate_series(1,10)i; |
| insert into bar select i from generate_series(1,1000)i; |
| |
| analyze foo; |
| analyze bar; |
| |
| set enable_hashagg to on; |
| explain (costs off) |
| select * from foo where exists (select 1 from bar where foo.a = bar.b); |
| select * from foo where exists (select 1 from bar where foo.a = bar.b); |
| |
| set enable_hashagg to off; |
| explain (costs off) |
| select * from foo where exists (select 1 from bar where foo.a = bar.b); |
| select * from foo where exists (select 1 from bar where foo.a = bar.b); |
| |
| reset enable_hashagg; |
| drop table foo; |
| drop table bar; |
| |
| -- Fix github issue 10012 |
| create table fix_param_a (i int, j int); |
| create table fix_param_b (i int UNIQUE, j int); |
| create table fix_param_c (i int, j int); |
| |
| insert into fix_param_a select i, i from generate_series(1,20)i; |
| insert into fix_param_b select i, i from generate_series(1,2000)i; |
| insert into fix_param_c select i, i from generate_series(1,2000)i; |
| |
| analyze fix_param_a; |
| analyze fix_param_b; |
| analyze fix_param_c; |
| |
| explain (costs off) |
| select * from fix_param_a left join fix_param_b on |
| fix_param_a.i = fix_param_b.i and fix_param_b.j in |
| (select j from fix_param_c where fix_param_b.i = fix_param_c.i) |
| order by 1; |
| select * from fix_param_a left join fix_param_b on |
| fix_param_a.i = fix_param_b.i and fix_param_b.j in |
| (select j from fix_param_c where fix_param_b.i = fix_param_c.i) |
| order by 1; |
| |
| -- Test targetlist contains placeholder var |
| -- When creating a redistributed motion with hash keys, |
| -- Cloudberry planner will invoke `cdbpullup_findEclassInTargetList`. |
| -- The following test case contains non-strict function `coalesce` |
| -- in the subquery at nullable-side of outerjoin and thus will |
| -- have PlaceHolderVar in targetlist. The case is to test if |
| -- function `cdbpullup_findEclassInTargetList` handles PlaceHolderVar |
| -- correct. |
| -- See github issue: https://github.com/greenplum-db/gpdb/issues/10315 |
| create table t_issue_10315 ( id1 int, id2 int ); |
| |
| insert into t_issue_10315 select i,i from generate_series(1, 2)i; |
| insert into t_issue_10315 select i,null from generate_series(1, 2)i; |
| insert into t_issue_10315 select null,i from generate_series(1, 2)i; |
| |
| select * from |
| ( select coalesce( bq.id1 ) id1, coalesce ( bq.id2 ) id2 |
| from ( select r.id1, r.id2 from t_issue_10315 r group by r.id1, r.id2 ) bq ) t |
| full join ( select r.id1, r.id2 from t_issue_10315 r group by r.id1, r.id2 ) bq_all |
| on t.id1 = bq_all.id1 and t.id2 = bq_all.id2 |
| full join ( select r.id1, r.id2 from t_issue_10315 r group by r.id1, r.id2 ) tq_all |
| on (coalesce(t.id1) = tq_all.id1 and t.id2 = tq_all.id2) ; |
| |
| drop table t_issue_10315; |