src/test/singlenode_regress/sql/join_gp.sql - cloudberry - Git at Google

 -- Extra GPDB tests for joins.

 -- Ignore "workfile compresssion is not supported by this build" (see
 -- 'zlib' test):
 --
 -- start_matchignore
 -- m/ERROR:  workfile compresssion is not supported by this build/
 -- end_matchignore

 --
 -- test numeric hash join
 --

 set enable_hashjoin to on;
 set enable_mergejoin to off;
 set enable_nestloop to off;
 create table nhtest (i numeric(10, 2));
 insert into nhtest values(100000.22);
 insert into nhtest values(300000.19);
 explain select * from nhtest a join nhtest b using (i);
 select * from nhtest a join nhtest b using (i);

 create temp table l(a int);
 insert into l values (1), (1), (2);
 select * from l l1 join l l2 on l1.a = l2.a left join l l3 on l1.a = l3.a and l1.a = 2 order by 1,2,3;

 --
 -- test hash join
 --

 create table hjtest (i int, j int);
 insert into hjtest values(3, 4);

 select count(*) from hjtest a1, hjtest a2 where a2.i = least (a1.i,4) and a2.j = 4;

 --
 -- Test for correct behavior when there is a Merge Join on top of Materialize
 -- on top of a Motion :
 -- 1. Use FULL OUTER JOIN to induce a Merge Join
 -- 2. Use a large tuple size to induce a Materialize
 -- 3. Use gp_dist_random() to induce a Redistribute
 --

 set enable_hashjoin to off;
 set enable_mergejoin to on;
 set enable_nestloop to off;

 DROP TABLE IF EXISTS alpha;
 DROP TABLE IF EXISTS theta;

 CREATE TABLE alpha (i int, j int);
 CREATE TABLE theta (i int, j char(10000000));

 INSERT INTO alpha values (1, 1), (2, 2);
 INSERT INTO theta values (1, 'f'), (2, 'g');

 SELECT *
 FROM gp_dist_random('alpha') FULL OUTER JOIN gp_dist_random('theta')
   ON (alpha.i = theta.i)
 WHERE (alpha.j IS NULL or theta.j IS NULL);

 reset enable_hashjoin;
 reset enable_mergejoin;
 reset enable_nestloop;

 --
 -- Predicate propagation over equality conditions
 --

 drop schema if exists pred;
 create schema pred;
 set search_path=pred;

 create table t1 (x int, y int, z int);
 create table t2 (x int, y int, z int);
 insert into t1 select i, i, i from generate_series(1,100) i;
 insert into t2 select * from t1;

 analyze t1;
 analyze t2;

 --
 -- infer over equalities
 --
 explain select count(*) from t1,t2 where t1.x = 100 and t1.x = t2.x;
 select count(*) from t1,t2 where t1.x = 100 and t1.x = t2.x;

 --
 -- infer over >=
 --
 explain select * from t1,t2 where t1.x = 100 and t2.x >= t1.x;
 select * from t1,t2 where t1.x = 100 and t2.x >= t1.x;

 --
 -- multiple inferences
 --

 set optimizer_segments=2;
 explain select * from t1,t2 where t1.x = 100 and t1.x = t2.y and t1.x <= t2.x;
 reset optimizer_segments;
 select * from t1,t2 where t1.x = 100 and t1.x = t2.y and t1.x <= t2.x;

 --
 -- MPP-18537: hash clause references a constant in outer child target list
 --

 create table hjn_test (i int, j int);
 insert into hjn_test values(3, 4);
 create table int4_tbl (f1 int);
 insert into int4_tbl values(123456), (-2147483647), (0), (-123456), (2147483647);
 select count(*) from hjn_test, (select 3 as bar) foo where hjn_test.i = least (foo.bar,4) and hjn_test.j = 4;
 select count(*) from hjn_test, (select 3 as bar) foo where hjn_test.i = least (foo.bar,(array[4])[1]) and hjn_test.j = (array[4])[1];
 select count(*) from hjn_test, (select 3 as bar) foo where least (foo.bar,(array[4])[1]) = hjn_test.i and hjn_test.j = (array[4])[1];
 select count(*) from hjn_test, (select 3 as bar) foo where hjn_test.i = least (foo.bar, least(4,10)) and hjn_test.j = least(4,10);
 select * from int4_tbl a join int4_tbl b on (a.f1 = (select f1 from int4_tbl c where c.f1=b.f1));

 -- Same as the last query, but with a partitioned table (which requires a
 -- Result node to do projection of the hash expression, as Append is not
 -- projection-capable)
 create table part4_tbl (f1 int4) partition by range (f1) (start(-1000000) end (1000000) every (1000000));
 insert into part4_tbl values
        (-123457), (-123456), (-123455),
        (-1), (0), (1),
        (123455), (123456), (123457);
 select * from part4_tbl a join part4_tbl b on (a.f1 = (select f1 from int4_tbl c where c.f1=b.f1));

 --
 -- Test case where a Motion hash key is only needed for the redistribution,
 -- and not returned in the final result set. There was a bug at one point where
 -- tjoin.c1 was used as the hash key in a Motion node, but it was not added
 -- to the sub-plans target list, causing a "variable not found in subplan
 -- target list" error.
 --
 create table tjoin1(dk integer, id integer);
 create table tjoin2(dk integer, id integer, t text);
 create table tjoin3(dk integer, id integer, t text);

 insert into tjoin1 values (1, 1), (1, 2), (1, 3), (2, 1), (2, 2), (2, 3);
 insert into tjoin2 values (1, 1, '1-1'), (1, 2, '1-2'), (2, 1, '2-1'), (2, 2, '2-2');
 insert into tjoin3 values (1, 1, '1-1'), (2, 1, '2-1');

 select tjoin1.id, tjoin2.t, tjoin3.t
 from tjoin1
 left outer join (tjoin2 left outer join tjoin3 on tjoin2.id=tjoin3.id) on tjoin1.id=tjoin3.id;


 set enable_hashjoin to off;
 set optimizer_enable_hashjoin = off;

 select count(*) from hjn_test, (select 3 as bar) foo where hjn_test.i = least (foo.bar,4) and hjn_test.j = 4;
 select count(*) from hjn_test, (select 3 as bar) foo where hjn_test.i = least (foo.bar,(array[4])[1]) and hjn_test.j = (array[4])[1];
 select count(*) from hjn_test, (select 3 as bar) foo where least (foo.bar,(array[4])[1]) = hjn_test.i and hjn_test.j = (array[4])[1];
 select count(*) from hjn_test, (select 3 as bar) foo where hjn_test.i = least (foo.bar, least(4,10)) and hjn_test.j = least(4,10);
 select * from int4_tbl a join int4_tbl b on (a.f1 = (select f1 from int4_tbl c where c.f1=b.f1));

 reset enable_hashjoin;
 reset optimizer_enable_hashjoin;

 -- In case of Left Anti Semi Join, if the left rel is empty a dummy join
 -- should be created
 drop table if exists foo;
 drop table if exists bar;
 create table foo (a int, b int);
 create table bar (c int, d int);
 insert into foo select generate_series(1,10);
 insert into bar select generate_series(1,10);

 explain select a from foo where a<1 and a>1 and not exists (select c from bar where c=a);
 select a from foo where a<1 and a>1 and not exists (select c from bar where c=a);

 -- The merge join executor code doesn't support LASJ_NOTIN joins. Make sure
 -- the planner doesn't create an invalid plan with them.
 create index index_foo on foo (a);
 create index index_bar on bar (c);

 set enable_nestloop to off;
 set enable_hashjoin to off;
 set enable_mergejoin to on;

 select * from foo where a not in (select c from bar where c <= 5);

 set enable_nestloop to off;
 set enable_hashjoin to on;
 set enable_mergejoin to off;

 create table dept
 (
 	id int,
 	pid int,
 	name char(40)
 );

 insert into dept values(3, 0, 'root');
 insert into dept values(4, 3, '2<-1');
 insert into dept values(5, 4, '3<-2<-1');
 insert into dept values(6, 4, '4<-2<-1');
 insert into dept values(7, 3, '5<-1');
 insert into dept values(8, 7, '5<-1');
 insert into dept select i, i % 6 + 3 from generate_series(9,50) as i;
 insert into dept select i, 99 from generate_series(100,15000) as i;

 ANALYZE dept;

 -- Test rescannable hashjoin with spilling hashtable
 set statement_mem='1000kB';
 set gp_workfile_compression = off;
 WITH RECURSIVE subdept(id, parent_department, name) AS
 (
 	-- non recursive term
 	SELECT * FROM dept WHERE name = 'root'
 	UNION ALL
 	-- recursive term
 	SELECT d.* FROM dept AS d, subdept AS sd
 		WHERE d.pid = sd.id
 )
 SELECT count(*) FROM subdept;

 -- Test rescannable hashjoin with spilling hashtable, with compression
 set gp_workfile_compression = on;
 WITH RECURSIVE subdept(id, parent_department, name) AS
 (
 	-- non recursive term
 	SELECT * FROM dept WHERE name = 'root'
 	UNION ALL
 	-- recursive term
 	SELECT d.* FROM dept AS d, subdept AS sd
 		WHERE d.pid = sd.id
 )
 SELECT count(*) FROM subdept;

 -- Test rescannable hashjoin with in-memory hashtable
 reset statement_mem;
 WITH RECURSIVE subdept(id, parent_department, name) AS
 (
 	-- non recursive term
 	SELECT * FROM dept WHERE name = 'root'
 	UNION ALL
 	-- recursive term
 	SELECT d.* FROM dept AS d, subdept AS sd
 		WHERE d.pid = sd.id
 )
 SELECT count(*) FROM subdept;


 -- MPP-29458
 -- When we join on a clause with two different types. If one table distribute by one type, the query plan
 -- will redistribute data on another type. But the has values of two types would not be equal. The data will
 -- redistribute to wrong segments.
 create table test_timestamp_t1 (id  numeric(10,0) ,field_dt date);
 create table test_timestamp_t2 (id numeric(10,0),field_tms timestamp without time zone);

 insert into test_timestamp_t1 values(10 ,'2018-1-10');
 insert into test_timestamp_t1 values(11 ,'2018-1-11');
 insert into test_timestamp_t2 values(10 ,'2018-1-10'::timestamp);
 insert into test_timestamp_t2 values(11 ,'2018-1-11'::timestamp);

 -- Test nest loop redistribute keys
 set enable_nestloop to on;
 set enable_hashjoin to on;
 set enable_mergejoin to on;
 select count(*) from test_timestamp_t1 t1 ,test_timestamp_t2 t2 where T1.id = T2.id and T1.field_dt = t2.field_tms;

 -- Test hash join redistribute keys
 set enable_nestloop to off;
 set enable_hashjoin to on;
 set enable_mergejoin to on;
 select count(*) from test_timestamp_t1 t1 ,test_timestamp_t2 t2 where T1.id = T2.id and T1.field_dt = t2.field_tms;

 drop table test_timestamp_t1;
 drop table test_timestamp_t2;

 -- Test merge join redistribute keys
 create table test_timestamp_t1 (id  numeric(10,0) ,field_dt date);

 create table test_timestamp_t2 (id numeric(10,0),field_tms timestamp without time zone);

 insert into test_timestamp_t1 values(10 ,'2018-1-10');
 insert into test_timestamp_t1 values(11 ,'2018-1-11');
 insert into test_timestamp_t2 values(10 ,'2018-1-10'::timestamp);
 insert into test_timestamp_t2 values(11 ,'2018-1-11'::timestamp);

 select * from test_timestamp_t1 t1 full outer join test_timestamp_t2 t2 on T1.id = T2.id and T1.field_dt = t2.field_tms;

 -- test float type
 set enable_nestloop to off;
 set enable_hashjoin to on;
 set enable_mergejoin to on;
 create table test_float1(id int, data float4);
 create table test_float2(id int, data float8);
 insert into test_float1 values(1, 10), (2, 20);
 insert into test_float2 values(3, 10), (4, 20);
 select t1.id, t1.data, t2.id, t2.data from test_float1 t1, test_float2 t2 where t1.data = t2.data;

 -- test int type
 create table test_int1(id int, data int4);
 create table test_int2(id int, data int8);
 insert into test_int1 values(1, 10), (2, 20);
 insert into test_int2 values(3, 10), (4, 20);
 select t1.id, t1.data, t2.id, t2.data from test_int1 t1, test_int2 t2 where t1.data = t2.data;

 -- Test to ensure that for full outer join on varchar columns, planner is successful in finding a sort operator in the catalog
 create table input_table(a varchar(30), b varchar(30));
 set enable_hashjoin = off;
 explain (costs off) select X.a from input_table X full join (select a from input_table) Y ON X.a = Y.a;

 -- Cleanup
 reset enable_hashjoin;
 set client_min_messages='warning'; -- silence drop-cascade NOTICEs
 drop schema pred cascade;
 reset search_path;

 -- github issue 5370 cases
 drop table if exists t5370;
 drop table if exists t5370_2;
 create table t5370(id int,name text);
 insert into t5370 select i,i from  generate_series(1,1000) i;
 create table t5370_2 as select * from t5370;
 analyze t5370_2;
 analyze t5370;
 explain select * from t5370 a , t5370_2 b where a.name=b.name;

 drop table t5370;
 drop table t5370_2;

 -- github issue 6215 cases
 -- When executing the following plan
 -- ```
 --  Gather Motion 1:1  (slice1; segments: 1)
 --    ->  Merge Full Join
 --         ->  Seq Scan on int4_tbl a
 --         ->  Seq Scan on int4_tbl b
 --```
 -- Cloudberry will raise an Assert Fail.
 -- We force adding a material node for
 -- merge full join on true.
 drop table if exists t6215;
 create table t6215(f1 int4);
 insert into t6215(f1) values (1), (2), (3);

 set enable_material = off;
 -- The plan still have Material operator
 explain (costs off) select * from t6215 a full join t6215 b on true;
 select * from t6215 a full join t6215 b on true;

 drop table t6215;


 --
 -- This tripped an assertion while deciding the locus for the joins.
 -- The code was failing to handle join between SingleQE and Hash correctly,
 -- when there were join order restricitions. (see
 -- https://github.com/greenplum-db/gpdb/issues/6643
 --
 select a.f1, b.f1, t.thousand, t.tenthous from
   (select sum(f1) as f1 from int4_tbl i4b) b
    left outer join
      (select sum(f1)+1 as f1 from int4_tbl i4a) a ON a.f1 = b.f1
       left outer join
         tenk1 t ON b.f1 = t.thousand and (a.f1+b.f1+999) = t.tenthous;


 -- tests to ensure that join reordering of LOJs and inner joins produces the
 -- correct join predicates & residual filters
 drop table if exists t1, t2, t3;
 CREATE TABLE t1 (a int, b int, c int);
 CREATE TABLE t2 (a int, b int, c int);
 CREATE TABLE t3 (a int, b int, c int);
 INSERT INTO t1 SELECT i, i, i FROM generate_series(1, 1000) i;
 INSERT INTO t2 SELECT i, i, i FROM generate_series(2, 1000) i; -- start from 2 so that one row from t1 doesn't match
 INSERT INTO t3 VALUES (1, 2, 3), (NULL, 2, 2);
 ANALYZE t1;
 ANALYZE t2;
 ANALYZE t3;

 -- ensure plan has a filter over left outer join
 explain (costs off) select * from t1 left join t2 on (t1.a = t2.a) join t3 on (t1.b = t3.b) where (t2.a IS NULL OR (t1.c = t3.c));
 select * from t1 left join t2 on (t1.a = t2.a) join t3 on (t1.b = t3.b) where (t2.a IS NULL OR (t1.c = t3.c));

 -- ensure plan has two inner joins with the where clause & join predicates ANDed
 explain (costs off) select * from t1 left join t2 on (t1.a = t2.a) join t3 on (t1.b = t3.b) where (t2.a = t3.a);
 select * from t1 left join t2 on (t1.a = t2.a) join t3 on (t1.b = t3.b) where (t2.a = t3.a);

 -- ensure plan has a filter over left outer join
 explain (costs off) select * from t1 left join t2 on (t1.a = t2.a) join t3 on (t1.b = t3.b) where (t2.a is distinct from t3.a);
 select * from t1 left join t2 on (t1.a = t2.a) join t3 on (t1.b = t3.b) where (t2.a is distinct from t3.a);

 -- ensure plan has a filter over left outer join
 explain select * from t3 join (select t1.a t1a, t1.b t1b, t1.c t1c, t2.a t2a, t2.b t2b, t2.c t2c from t1 left join t2 on (t1.a = t2.a)) t on (t1a = t3.a) WHERE (t2a IS NULL OR (t1c = t3.a));
 select * from t3 join (select t1.a t1a, t1.b t1b, t1.c t1c, t2.a t2a, t2.b t2b, t2.c t2c from t1 left join t2 on (t1.a = t2.a)) t on (t1a = t3.a) WHERE (t2a IS NULL OR (t1c = t3.a));

 -- ensure plan has a filter over left outer join
 explain select * from (select t1.a t1a, t1.b t1b, t2.a t2a, t2.b t2b from t1 left join t2 on t1.a = t2.a) tt
   join t3 on tt.t1b = t3.b
   join (select t1.a t1a, t1.b t1b, t2.a t2a, t2.b t2b from t1 left join t2 on t1.a = t2.a) tt1 on tt1.t1b = t3.b
   join t3 t3_1 on tt1.t1b = t3_1.b and (tt1.t2a is NULL OR tt1.t1b = t3.b);

 select * from (select t1.a t1a, t1.b t1b, t2.a t2a, t2.b t2b from t1 left join t2 on t1.a = t2.a) tt
   join t3 on tt.t1b = t3.b
   join (select t1.a t1a, t1.b t1b, t2.a t2a, t2.b t2b from t1 left join t2 on t1.a = t2.a) tt1 on tt1.t1b = t3.b
   join t3 t3_1 on tt1.t1b = t3_1.b and (tt1.t2a is NULL OR tt1.t1b = t3.b);

 -- test different join order enumeration methods
 set optimizer_join_order = query;
 select * from t1 join t2 on t1.a = t2.a join t3 on t1.b = t3.b;
 set optimizer_join_order = greedy;
 select * from t1 join t2 on t1.a = t2.a join t3 on t1.b = t3.b;
 set optimizer_join_order = exhaustive;
 select * from t1 join t2 on t1.a = t2.a join t3 on t1.b = t3.b;
 set optimizer_join_order = exhaustive2;
 select * from t1 join t2 on t1.a = t2.a join t3 on t1.b = t3.b;
 reset optimizer_join_order;
 select * from t1 join t2 on t1.a = t2.a join t3 on t1.b = t3.b;

 drop table t1, t2, t3;

 --
 -- Test a bug that nestloop path previously can not generate motion above
 -- index path, which sometimes is wrong (this test case is an example).
 -- We now depend on parameterized path related variables to judge instead.
 -- We conservatively disallow motion when there is parameter requirement
 -- for either outer or inner at this moment though there could be room
 -- for further improvement (e.g. referring subplan code to do broadcast
 -- for base rel if needed, which needs much effort and does not seem to
 -- be deserved given we will probably refactor related code for the lateral
 -- support in the near future). For the query and guc settings below, Postgres
 -- planner can not generate a plan.
 set enable_nestloop = 1;
 set enable_material = 0;
 set enable_seqscan = 0;
 set enable_bitmapscan = 0;
 explain select tenk1.unique2 >= 0 from tenk1 left join tenk2 on true limit 1;
 select tenk1.unique2 >= 0 from tenk1 left join tenk2 on true limit 1;
 reset enable_nestloop;
 reset enable_material;
 reset enable_seqscan;
 reset enable_bitmapscan;

 -- Below test cases are for planner's cdbpath_motion_for_join, so we close
 -- ORCA temporarily.
 set optimizer = off;
 -- test outer join for general locus
 -- replicated table's locus is SegmentGeneral
 create table trep_join_gp (c1 int, c2 int);
 -- hash distributed table's locus is Hash
 create table thash_join_gp (c1 int, c2 int);
 -- randomly distributed table's locus is Strewn
 create table trand_join_gp (c1 int, c2 int);
 -- start_ignore
 create extension if not exists gp_debug_numsegments;
 select gp_debug_set_create_table_default_numsegments(1);
 -- end_ignore
 -- the following replicated table's numsegments is 1
 create table trep1_join_gp (c1 int, c2 int);

 insert into trep_join_gp values (1, 1), (2, 2);
 insert into thash_join_gp values (1, 1), (2, 2);
 insert into trep1_join_gp values (1, 1), (2, 2);

 analyze trep_join_gp;
 analyze thash_join_gp;
 analyze trep1_join_gp;
 analyze trand_join_gp;

 -- This test is to check that: general left join segmentGeneral --> segmentGeneral
 -- And segmentGeneral join hash does not need motion.
 explain select * from generate_series(1, 5) g left join trep_join_gp on g = trep_join_gp.c1 join thash_join_gp on true;
 select * from generate_series(1, 5) g left join trep_join_gp on g = trep_join_gp.c1 join thash_join_gp on true;

 -- The following 4 tests are to check that general left join partition, we could redistribute the
 -- general-locus relation when the filter condition is suitable. If we can redistributed
 -- general-locus relation, we should not gather them to singleQE.
 explain select * from generate_series(1, 5) g left join thash_join_gp on g = thash_join_gp.c1;
 select * from generate_series(1, 5) g left join thash_join_gp on g = thash_join_gp.c1;

 explain select * from generate_series(1, 5) g left join thash_join_gp on g = thash_join_gp.c2;
 select * from generate_series(1, 5) g left join thash_join_gp on g = thash_join_gp.c2;

 explain select * from generate_series(1, 5) g left join trand_join_gp on g = trand_join_gp.c1;
 select * from generate_series(1, 5) g left join trand_join_gp on g = trand_join_gp.c1;

 explain select * from generate_series(1, 5) g full join trand_join_gp on g = trand_join_gp.c1;
 select * from generate_series(1, 5) g full join trand_join_gp on g = trand_join_gp.c1;

 -- The following 3 tests are to check that segmentGeneral left join partition
 -- we could redistribute the segment general-locus relation when the filter condition
 -- is suitable. If we can redistributed general-locus relation, we should not
 -- gather them to singleQE.
 explain select * from trep_join_gp left join thash_join_gp using (c1);
 select * from trep_join_gp left join thash_join_gp using (c1);

 explain select * from trep_join_gp left join trand_join_gp using (c1);
 select * from trep_join_gp left join trand_join_gp using (c1);

 explain select * from trep1_join_gp join thash_join_gp using (c1);
 select * from trep1_join_gp join thash_join_gp using (c1);

 drop table trep_join_gp;
 drop table thash_join_gp;
 drop table trand_join_gp;
 drop table trep1_join_gp;

 -- select gp_debug_set_create_table_default_numsegments(3);

 reset optimizer;

 -- The following cases are to test planner join size estimation
 -- so we need optimizer to be off.
 -- When a partition table join other table using partition key,
 -- planner should use root table's stat info instead of largest
 -- partition's.
 reset enable_hashjoin;
 reset enable_mergejoin;
 reset enable_nestloop;

 create table t_joinsize_1 (c1 int, c2 int)
 partition by range (c2)
 ( start (0) end (5) every (1),
   default partition extra );

 create table t_joinsize_2 (c1 int, c2 int);

 create table t_joinsize_3 (c int);

 insert into t_joinsize_1 select i, i%5 from generate_series(1, 200)i;
 insert into t_joinsize_1 select 1, null from generate_series(1, 1000);

 insert into t_joinsize_2 select i,i%5 from generate_series(1, 1000)i;
 insert into t_joinsize_3 select * from generate_series(1, 100);

 analyze t_joinsize_1;
 analyze t_joinsize_2;
 analyze t_joinsize_3;

 -- the following query should not broadcast the join result of t_joinsize_1, t_joinsize_2.
 explain select * from (t_joinsize_1 join t_joinsize_2 on t_joinsize_1.c2 = t_joinsize_2.c2) join t_joinsize_3 on t_joinsize_3.c = t_joinsize_1.c1;

 drop table t_joinsize_1;
 drop table t_joinsize_2;
 drop table t_joinsize_3;

 -- test if subquery locus is general, then
 -- we should keep it general
 create table t_randomly_dist_table(c int);

 -- force_explain
 -- the following plan should not contain redistributed motion (for planner)
 explain
 select * from (
   select a from generate_series(1, 10)a
   union all
   select a from generate_series(1, 10)a
 ) t_subquery_general
 join t_randomly_dist_table on t_subquery_general.a = t_randomly_dist_table.c;

 drop table t_randomly_dist_table;


 -- test lateral join inner plan contains limit
 -- we cannot pass params across motion so we
 -- can only generate a plan to gather all the
 -- data to singleQE. Here we create a compound
 -- data type as params to pass into inner plan.
 -- By doing so, if we fail to pass correct params
 -- into innerplan, it will throw error because
 -- of nullpointer reference. If we only use int
 -- type as params, the nullpointer reference error
 -- may not happen because we parse null to integer 0.

 create type mytype_for_lateral_test as (x int, y int);
 create table t1_lateral_limit(a int, b int, c mytype_for_lateral_test);
 create table t2_lateral_limit(a int, b int);
 insert into t1_lateral_limit values (1, 1, '(1,1)');
 insert into t1_lateral_limit values (1, 2, '(2,2)');
 insert into t2_lateral_limit values (2, 2);
 insert into t2_lateral_limit values (3, 3);

 explain select * from t1_lateral_limit as t1 cross join lateral
 (select ((c).x+t2.b) as n  from t2_lateral_limit as t2 order by n limit 1)s;

 select * from t1_lateral_limit as t1 cross join lateral
 (select ((c).x+t2.b) as n  from t2_lateral_limit as t2 order by n limit 1)s;

 -- Continue with the above cases, if the lateral subquery contains union all
 -- and in some of its appendquerys contain limit, it may also lead to bad plan.
 -- The best solution may be to walk the query to and do some static analysis
 -- to find out which rel has to be gathered and materialized. But it is complicated
 -- to do so and this seems less efficient. I believe in future we should do big
 -- refactor to make greenplum support lateral well so now, let's just make sure
 -- we will not panic.
 explain (costs off) select * from t1_lateral_limit as t1 cross join lateral
 ((select ((c).x+t2.b) as n  from t2_lateral_limit as t2 order by n limit 1) union all select 1)s;

 select * from t1_lateral_limit as t1 cross join lateral
 ((select ((c).x+t2.b) as n  from t2_lateral_limit as t2 order by n limit 1) union all select 1)s;

 -- test lateral subquery contains group by (group-by is another place that
 -- may add motions in the subquery's plan).
 explain select * from t1_lateral_limit t1 cross join lateral
 (select (c).x+t2.a, sum(t2.a+t2.b) from t2_lateral_limit t2 group by (c).x+t2.a)x;

 select * from t1_lateral_limit t1 cross join lateral
 (select (c).x+t2.a, sum(t2.a+t2.b) from t2_lateral_limit t2 group by (c).x+t2.a)x;

 -- The following case is from Github Issue
 -- https://github.com/greenplum-db/gpdb/issues/8860
 -- It is the same issue as the above test suite.
 create table t_mylog_issue_8860 (myid int, log_date timestamptz );
 insert into  t_mylog_issue_8860 values (1,timestamptz '2000-01-02 03:04'),(1,timestamptz '2000-01-02 03:04'-'1 hour'::interval);
 insert into  t_mylog_issue_8860 values (2,timestamptz '2000-01-02 03:04'),(2,timestamptz '2000-01-02 03:04'-'2 hour'::interval);

 explain select ml1.myid, log_date as first_date, ml2.next_date from t_mylog_issue_8860 ml1
 inner join lateral
 (select myid, log_date as next_date
  from t_mylog_issue_8860 where myid = ml1.myid and log_date > ml1.log_date order by log_date asc limit 1) ml2
 on true;

 select ml1.myid, log_date as first_date, ml2.next_date from t_mylog_issue_8860 ml1
 inner join lateral
 (select myid, log_date as next_date
  from t_mylog_issue_8860 where myid = ml1.myid and log_date > ml1.log_date order by log_date asc limit 1) ml2
 on true;

 -- test prefetch join qual
 -- we do not handle this correct
 -- the only case we need to prefetch join qual is:
 --   1. outer plan contains motion
 --   2. the join qual contains subplan that contains motion
 reset client_min_messages;
 set Test_print_prefetch_joinqual = true;
 -- prefetch join qual is only set correct for planner
 set optimizer = off;

 create table t1_test_pretch_join_qual(a int, b int, c int);
 create table t2_test_pretch_join_qual(a int, b int, c int);

 -- the following plan contains redistribute motion in both inner and outer plan
 -- the join qual is t1.c > t2.c, it contains no motion, should not prefetch
 explain (costs off) select * from t1_test_pretch_join_qual t1 join t2_test_pretch_join_qual t2
 on t1.b = t2.b and t1.c > t2.c;

 create table t3_test_pretch_join_qual(a int, b int, c int);

 -- the following plan contains motion in both outer plan and join qual,
 -- so we should prefetch join qual
 explain (costs off) select * from t1_test_pretch_join_qual t1 join t2_test_pretch_join_qual t2
 on t1.b = t2.b and t1.a > any (select sum(b) from t3_test_pretch_join_qual t3 where c > t2.a);

 reset Test_print_prefetch_joinqual;
 reset optimizer;

 -- Github Issue: https://github.com/greenplum-db/gpdb/issues/9733
 -- Previously in the function bring_to_outer_query and
 -- bring_to_singleQE it depends on the path->param_info field
 -- to determine if the path contains outerParams. This is not
 -- enought. The following case would SegFault before because
 -- the indexpath's orderby clause contains outerParams.
 create table gist_tbl_github9733 (b box, p point, c circle);
 insert into gist_tbl_github9733
 select box(point(0.05*i, 0.05*i), point(0.05*i, 0.05*i)),
        point(0.05*i, 0.05*i),
        circle(point(0.05*i, 0.05*i), 1.0)
 from generate_series(0,10000) as i;
 vacuum analyze gist_tbl_github9733;
 create index gist_tbl_point_index_github9733 on gist_tbl_github9733 using gist (p);
 set enable_seqscan=off;
 set enable_bitmapscan=off;
 explain (costs off)
 select p from
   (values (box(point(0,0), point(0.5,0.5))),
           (box(point(0.5,0.5), point(0.75,0.75))),
           (box(point(0.8,0.8), point(1.0,1.0)))) as v(bb)
 cross join lateral
   (select p from gist_tbl_github9733 where p <@ bb order by p <-> bb[0] limit 2) ss;

 reset enable_seqscan;
 explain (costs off)
 select p from
   (values (box(point(0,0), point(0.5,0.5))),
           (box(point(0.5,0.5), point(0.75,0.75))),
           (box(point(0.8,0.8), point(1.0,1.0)))) as v(bb)
 cross join lateral
   (select p from gist_tbl_github9733 where p <@ bb order by p <-> bb[0] limit 2) ss;

 select p from
   (values (box(point(0,0), point(0.5,0.5))),
           (box(point(0.5,0.5), point(0.75,0.75))),
           (box(point(0.8,0.8), point(1.0,1.0)))) as v(bb)
 cross join lateral
   (select p from gist_tbl_github9733 where p <@ bb order by p <-> bb[0] limit 2) ss;

 reset enable_bitmapscan;

 ---
 --- Test that GUC enable_hashagg takes effect for SEMI join
 ---
 drop table if exists foo;
 drop table if exists bar;

 create table foo(a int);
 create table bar(b int);

 insert into foo select i from generate_series(1,10)i;
 insert into bar select i from generate_series(1,1000)i;

 analyze foo;
 analyze bar;

 set enable_hashagg to on;
 explain (costs off)
 select * from foo where exists (select 1 from bar where foo.a = bar.b);
 select * from foo where exists (select 1 from bar where foo.a = bar.b);

 set enable_hashagg to off;
 explain (costs off)
 select * from foo where exists (select 1 from bar where foo.a = bar.b);
 select * from foo where exists (select 1 from bar where foo.a = bar.b);

 reset enable_hashagg;
 drop table foo;
 drop table bar;

 -- Fix github issue 10012
 create table fix_param_a (i int, j int);
 create table fix_param_b (i int UNIQUE, j int);
 create table fix_param_c (i int, j int);

 insert into fix_param_a select i, i from generate_series(1,20)i;
 insert into fix_param_b select i, i from generate_series(1,2000)i;
 insert into fix_param_c select i, i from generate_series(1,2000)i;

 analyze fix_param_a;
 analyze fix_param_b;
 analyze fix_param_c;

 explain (costs off)
 select * from fix_param_a left join fix_param_b on
 	fix_param_a.i = fix_param_b.i and fix_param_b.j in
 		(select j from fix_param_c where fix_param_b.i = fix_param_c.i)
 	order by 1;
 select * from fix_param_a left join fix_param_b on
 	fix_param_a.i = fix_param_b.i and fix_param_b.j in
 		(select j from fix_param_c where fix_param_b.i = fix_param_c.i)
 	order by 1;

 -- Test targetlist contains placeholder var
 -- When creating a redistributed motion with hash keys,
 -- Cloudberry planner will invoke `cdbpullup_findEclassInTargetList`.
 -- The following test case contains non-strict function `coalesce`
 -- in the subquery at nullable-side of outerjoin and thus will
 -- have PlaceHolderVar in targetlist. The case is to test if
 -- function `cdbpullup_findEclassInTargetList` handles PlaceHolderVar
 -- correct.
 -- See github issue: https://github.com/greenplum-db/gpdb/issues/10315
 create table t_issue_10315 ( id1 int, id2 int );

 insert into t_issue_10315 select i,i from generate_series(1, 2)i;
 insert into t_issue_10315 select i,null from generate_series(1, 2)i;
 insert into t_issue_10315 select null,i from generate_series(1, 2)i;

 select *  from
 ( select coalesce( bq.id1 ) id1, coalesce ( bq.id2 ) id2
         from ( select r.id1, r.id2 from t_issue_10315 r group by r.id1, r.id2 ) bq  ) t
 full join ( select r.id1, r.id2 from t_issue_10315 r group by r.id1, r.id2 ) bq_all
 on t.id1 = bq_all.id1  and t.id2 = bq_all.id2
 full join ( select r.id1, r.id2 from t_issue_10315 r group by r.id1, r.id2 ) tq_all
 on (coalesce(t.id1) = tq_all.id1  and t.id2 = tq_all.id2) ;

 drop table t_issue_10315;