src/test/regress/sql/qp_full_join.sql - cloudberry - Git at Google

 -- This test verifies plans involving full joins, which currently have
 -- limited ORCA support. The first half of the tests covers full hash
 -- join. Specifically, we examine if the motions are correctly enforced,
 -- and if the full hash join delivers correct distribution spec. The
 -- second half of the tests (currently don't exist) covers full merge
 -- join. Specifically, we examine if the plan alternative is correctly
 -- generated, given a versatile combination of join conditions.

 -- start_ignore
 drop schema if exists full_join cascade;
 -- end_ignore

 -- greenplum
 create schema full_join;
 set search_path=full_join;
 set optimizer_trace_fallback=on;

 -- distributed
 create table dist (c1 int) distributed by (c1);
 insert into dist select i from generate_series(1,999) i;
 insert into dist select null from generate_series(1,10);
 create table dist2 (c1 int, c2 int) distributed by (c1);
 insert into dist2 select i, i+1 from generate_series(100,1099) i;
 insert into dist2 select null from generate_series(1,10);

 -- randomly distributed
 create table rand (c1 int) distributed randomly;
 insert into rand select i from generate_series(-100,899) i;
 insert into rand select null from generate_series(1,10);

 -- replicated
 create table rep (c1 int) distributed replicated;
 insert into rep select i from generate_series(-200,799) i;
 insert into rep select null from generate_series(1,20);
 create table rep2 (c1 int) distributed replicated;
 insert into rep2 select i from generate_series(-300,699) i;
 insert into rep2 select null from generate_series(1,20);

 -- tainted replicated
 create view tainted_rep as (select * from rep limit 2);
 create view tainted_rep2 as (select * from rep2 limit 3);

 -- universal
 create view uni as (select generate_series(-10,10) c1);
 create view uni2 as (select unnest(string_to_array('-3,-2,-1,0,1,2,3',','))::int c1);

 -- partitioned
 create table part (c1 int, c2 int) partition by list(c2) (
 partition part1 values (1, 2, 3, 4),
 partition part2 values (5, 6, 7),
 partition part3 values (8, 9, 0),
 default partition part4);
 insert into part select i, i%10 from generate_series(-400, 599) i;
 insert into part select null from generate_series(1,20);

 analyze dist;
 analyze dist2;
 analyze rand;
 analyze rep;
 analyze rep2;
 analyze part;

 -- Disable merge join and commutative hash join alternatives, for a clear
 -- view of the distribution request and the derived property given two
 -- relations with different distribution policies. The idea is to maintain
 -- the left child on the left and the right child on the right. This ensures
 -- a clear understanding of how each side responds to the request and what
 -- property we derive based on the left and right children's properties.

 select disable_xform('CXformImplementFullOuterMergeJoin');
 select disable_xform('CXformFullJoinCommutativity');
 select disable_xform('CXformInnerJoinCommutativity');
 select disable_xform('CXformLeftJoin2RightJoin');

 --------------------------------------------
 -- 2-table join: test requested distribution spec
 --------------------------------------------

 -- Full hash join sends <hash, hash> and <singleton, singleton> requests to
 -- its children. A quick way to tell which alternative is selected is by
 -- examining whether the full hash join occurs on the segments or coordinator.
 -- If the join occurs on the segments, it indicates both inputs are made hash
 -- distributed. If the join occurs on the coordinator, it indicates both inputs
 -- are rendered available on the coordinator.
 --
 -- The following tests check the correctness of the distribution requests.
 -- Specifically, we ensure necessary motions aren't missing, and the full
 -- join doesn't output duplicates.

 -- distributed ⟗  random
 -- The right relation is randomly distributed. It is redistributed to be hash
 -- distributed. Both relations are deduplicated to begin with, so there's no
 -- duplication risk.
 explain (costs off, timing off, summary off) select * from dist full join rand on dist.c1 = rand.c1;
 select count(*) from dist full join rand on dist.c1 = rand.c1;

 -- distributed ⟗  universal
 -- The right relation is universal. To avoid duplicates, the left side is
 -- gathered onto the coordinator.
 explain (costs off, timing off, summary off) select * from dist full join uni on dist.c1 = uni.c1;
 select count(*) from dist full join uni on dist.c1 = uni.c1;

 -- universal ⟗  distributed
 -- The left relation is universal. To avoid duplicates, a hash filter
 -- (non-physical motion) is applied to the left side.
 explain (costs off, timing off, summary off) select * from uni full join dist on uni.c1 = dist.c1;
 select count(*) from uni full join dist on uni.c1 = dist.c1;

 -- random ⟗  replicated
 -- The right relation is replicated. To avoid duplicates, both sides are
 -- gathered onto the coordinator.
 explain (costs off, timing off, summary off) select * from rand full join rep on rand.c1 = rep.c1;
 select count(*) from rand full join rep on rand.c1 = rep.c1;

 -- replicated ⟗  random
 -- The left relation is replicated. To avoid duplicates, a hash filter
 -- (non-physical motion) is applied to the left side. The right relation
 -- is randomly distributed. It's redistributed to be hash distributed.
 explain (costs off, timing off, summary off) select * from rep full join rand on rep.c1 = rand.c1;
 select count(*) from rep full join rand on rep.c1 = rand.c1;

 -- replicated ⟗  universal
 -- The left relation is replicated, and the right relation is universal. To
 -- avoid duplicates, the left side is gathered onto the coordinator.
 explain (costs off, timing off, summary off) select * from rep full join uni on rep.c1 = uni.c1;
 select count(*) from rep full join uni on rep.c1 = uni.c1;

 -- universal ⟗  replicated
 -- The left relation is universal, and the right relation is replicated. To
 -- avoid duplicates, the right side is gathered onto the coordinator.
 explain (costs off, timing off, summary off) select * from uni full join rep on uni.c1 = rep.c1;
 select count(*) from uni full join rep on uni.c1 = rep.c1;

 -- tainted-replicated ⟗  tainted-replicated
 -- Both left and right relations are tainted-replicated. To avoid duplicates,
 -- both sides are gathered onto the coordinator.
 explain (costs off, timing off, summary off) select * from tainted_rep full join tainted_rep2 on tainted_rep.c1 = tainted_rep2.c1;

 -- tainted-replicated ⟗  distributed
 -- The left relation is tainted-replicated. To avoid duplicates, the left side
 -- is redistributed from one segment onto all the segments.
 explain (costs off, timing off, summary off) select * from tainted_rep full join dist on tainted_rep.c1 = dist.c1;

 ------------------------------------------
 -- 3-table join: test derived distribution spec
 ------------------------------------------

 -- The 2-table join tests are effective in assessing required distribution
 -- specs. They provide some coverage for testing derived spec. Essentially,
 -- if the derived spec falls under the Non-Singleton category, the join output
 -- will be gathered onto the coordinator, whereas if the derived spec is
 -- already singleton, no gather motion is necessary. However, for a thorough
 -- examination, 3-table join tests can verify if motions other than gather are
 -- properly enforced, especially in scenarios where the join conditions are
 -- null aware.

 -- The following views are named after the likely property (so that the cost
 -- is low) that they deliver when used in a join operation.
 create view vw_dist as (select dist.c1 as c11, dist2.c1 as c12 from dist full join dist2 on dist.c1 = dist2.c1);
 create view vw_uni as (select uni.c1 as c11, uni2.c1 as c12 from uni full join uni2 on uni.c1 = uni2.c1);
 create view vw_sin as (select rep.c1 as c11, uni.c1 as c12 from rep full join uni on rep.c1 = uni.c1);
 create view vw_rep as (select uni.c1 as c11, rep.c1 as c12 from uni full join rep on uni.c1 = rep.c1);
 create view vw_rep2 as (select rep.c1 as c11, rep2.c1 as c12 from rep full join rep2 on rep.c1 = rep2.c1);

 -- Full hash join of two distributed relations most likely delivers the
 -- combined hash properties from its two children for all non-null tuples.
 -- If the full join is placed on the left side of an outer join, its output
 -- doesn't need redistribution if the outer join condition isn't null aware.
 -- This is because outer joins send hash distribution requests to their left
 -- child without request null colocation. A motion is necessary, however, if
 -- the join condition is INDF (is not distinct from).
 --
 -- (distributed ⟗  distributed) ⟕ random
 explain (costs off, timing off, summary off) select * from vw_dist left join rand on vw_dist.c11 = rand.c1;
 explain (costs off, timing off, summary off) select * from vw_dist left join rand on vw_dist.c12 = rand.c1;
 explain (costs off, timing off, summary off) select * from vw_dist left join rand on vw_dist.c11 is not distinct from rand.c1;
 explain (costs off, timing off, summary off) select * from vw_dist left join rand on vw_dist.c12 is not distinct from rand.c1;
 select count(*) from vw_dist left join rand on vw_dist.c11 = rand.c1;
 select count(*) from vw_dist left join rand on vw_dist.c12 = rand.c1;
 select count(*) from vw_dist left join rand on vw_dist.c11 is not distinct from rand.c1;
 select count(*) from vw_dist left join rand on vw_dist.c12 is not distinct from rand.c1;

 -- (distributed ⟗  distributed) ⟗  random
 explain (costs off, timing off, summary off) select * from vw_dist full join rand on vw_dist.c11 = rand.c1;
 explain (costs off, timing off, summary off) select * from vw_dist full join rand on vw_dist.c12 = rand.c1;
 explain (costs off, timing off, summary off) select * from vw_dist full join rand on vw_dist.c11 is not distinct from rand.c1;
 explain (costs off, timing off, summary off) select * from vw_dist full join rand on vw_dist.c12 is not distinct from rand.c1;

 select count(*) from vw_dist full join rand on vw_dist.c11 = rand.c1;
 select count(*) from vw_dist full join rand on vw_dist.c12 = rand.c1;
 select count(*) from vw_dist full join rand on vw_dist.c11 is not distinct from rand.c1;
 select count(*) from vw_dist full join rand on vw_dist.c12 is not distinct from rand.c1;

 -- If the full join is placed on the right side of an outer join, or, is
 -- used in an inner join, its output needs redistribution regardless of
 -- the null awareness of the join condition. This is because outer joins
 -- send hash distribution requests to their right child requesting null
 -- colocation, so does inner joins to both children. Note those motions
 -- aren't necessary, because null colocation is only relevant if the join
 -- condition is null aware. Currently, however, the motion exists because
 -- we request tighter specs than that we need. We should look into relaxing
 -- such requests in future.
 --
 -- random ⟕  (distributed ⟗  distributed)
 explain (costs off, timing off, summary off) select * from rand left join vw_dist on rand.c1 = vw_dist.c11;
 explain (costs off, timing off, summary off) select * from rand left join vw_dist on rand.c1 = vw_dist.c12;
 select count(*) from rand left join vw_dist on rand.c1 = vw_dist.c11;
 select count(*) from rand left join vw_dist on rand.c1 = vw_dist.c12;

 -- random ⟗  (distributed ⟗  distributed)
 explain (costs off, timing off, summary off) select * from rand full join vw_dist on rand.c1 = vw_dist.c11;
 explain (costs off, timing off, summary off) select * from rand full join vw_dist on rand.c1 = vw_dist.c12;
 select count(*) from rand full join vw_dist on rand.c1 = vw_dist.c11;
 select count(*) from rand full join vw_dist on rand.c1 = vw_dist.c12;

 -- (distributed ⟗  distributed) ⋈ random
 explain (costs off, timing off, summary off) select * from vw_dist join rand on vw_dist.c11 = rand.c1;
 explain (costs off, timing off, summary off) select * from vw_dist join rand on vw_dist.c12 = rand.c1;
 select count(*) from vw_dist join rand on vw_dist.c11 = rand.c1;
 select count(*) from vw_dist join rand on vw_dist.c12 = rand.c1;

 -- Full hash join of two universal relations most likely delivers universal
 -- property. In the following tests, the full join is placed on the outer
 -- side of an inner join. To avoid duplicates, a hash filter (non-physical
 -- motion) is applied to that side.
 --
 -- (universal ⟗  universal) ⋈ random
 explain (costs off, timing off, summary off) select * from vw_uni join rand on vw_uni.c11 = rand.c1;
 explain (costs off, timing off, summary off) select * from vw_uni join rand on vw_uni.c12 = rand.c1;
 select count(*) from vw_uni join rand on vw_uni.c11 = rand.c1;
 select count(*) from vw_uni join rand on vw_uni.c12 = rand.c1;

 -- Full hash join of a replicated and a universal relation delivers singleton
 -- property. In the following tests, the full join is placed on the outer side
 -- of an inner join. Its output data is hash or randomly redistributed across
 -- the segments to maximize MPP execution.
 --
 -- (replicated ⟗  universal) ⋈ random
 explain (costs off, timing off, summary off) select * from vw_sin join rand on vw_sin.c11 = rand.c1;
 explain (costs off, timing off, summary off) select * from vw_sin join rand on vw_sin.c12 = rand.c1;
 select count(*) from vw_sin join rand on vw_sin.c11 = rand.c1;
 select count(*) from vw_sin join rand on vw_sin.c12 = rand.c1;

 -- Full hash join of a universal and a replicated relation most likely
 -- delivers replicated property. In the following tests, the full join is
 -- placed on the inner side of an inner join. With the inner side being
 -- replicated, the inner join executes without motion.
 --
 -- random ⋈ (universal ⟗  replicated)
 explain (costs off, timing off, summary off) select * from rand join vw_rep on rand.c1 = vw_rep.c11;
 explain (costs off, timing off, summary off) select * from rand join vw_rep on rand.c1 = vw_rep.c12;
 select count(*) from rand join vw_rep on rand.c1 = vw_rep.c11;
 select count(*) from rand join vw_rep on rand.c1 = vw_rep.c12;

 -- Full hash join of two replicated relations most likely delivers replicated
 -- property. In the following tests, the full join is placed on the inner side
 -- of an inner join. With the inner side being replicated, the inner join
 -- executes without motion.
 --
 -- random ⋈ (replicated ⟗  replicated)
 explain (costs off, timing off, summary off) select * from rand join vw_rep2 on rand.c1 = vw_rep2.c11;
 explain (costs off, timing off, summary off) select * from rand join vw_rep2 on rand.c1 = vw_rep2.c12;
 select count(*) from rand join vw_rep2 on rand.c1 = vw_rep2.c11;
 select count(*) from rand join vw_rep2 on rand.c1 = vw_rep2.c12;

 --------------------------------------------
 -- Self join: test derived distribution spec
 --------------------------------------------

 -- Spec derivation of self full join generally follows that of 2-table
 -- full join. One exception is when a distributed table full joins
 -- itself on the same column. This guarantees match in each non-null tuple,
 -- ensuring nulls stay colocated.

 create view vw_self_same_col as (select o1.c1 as c11, o2.c1 as c12 from dist2 o1 full join (select distinct c1 from dist2) o2 on o1.c1 = o2.c1);
 create view vw_self_diff_col as (select o1.c1 as c11, o1.c2 as c12, o2.c1 as c21, o2.c2 as c22 from dist2 o1 full join dist2 o2 on o1.c1 = o2.c2);
 create view vw_self_expr_col as (select o1.c1 as c11, o2.c1 as c12 from dist2 o1 full join (select distinct c1 from dist2) o2 on o1.c1 = o2.c1+1);

 -- (distributed ⟗  itself on the same column) ⋈ distributed
 -- Self full hash join of a distributed relation on the same column delivers
 -- the combined hash properties from its two children, with null tuples
 -- colocated. Therefore, its output doesn't need to be redistributed, even
 -- when the join condition is null aware.
 explain (costs off, timing off, summary off) select * from vw_self_same_col join dist on vw_self_same_col.c11 is not distinct from dist.c1;
 select count(*) from vw_self_same_col join dist on vw_self_same_col.c11 is not distinct from dist.c1;

 -- (distributed ⟗  itself on different columns) ⋈ distributed
 -- Self full hash join of a distributed relation on different columns, on the,
 -- other hand, doesn't ensure null colocation. Its output does require
 -- redistribution to colocate null tuples if the join condition is null aware.
 explain (costs off, timing off, summary off) select * from vw_self_diff_col join dist on vw_self_diff_col.c11 is not distinct from dist.c1;
 select count(*) from vw_self_diff_col join dist on vw_self_diff_col.c11 is not distinct from dist.c1;

 -- (distributed ⟗  itself on the same column with an expression) ⋈ distributed
 -- Self full hash join of a distributed relation on the same column, with
 -- an expression applied to one side, is essentially a join on different
 -- columns. Its output needs to be redistributed to colocate null tuples if
 -- the join condition is null aware.
 explain (costs off, timing off, summary off) select * from vw_self_expr_col join dist on vw_self_expr_col.c11 is not distinct from dist.c1;
 select count(*) from vw_self_expr_col join dist on vw_self_expr_col.c11 is not distinct from dist.c1;

 --------------------------------
 -- dynamic partition elimination
 --------------------------------

 -- Similar to left join, DPE is disabled for full join. This is because for
 -- each right tuple scanned, we cannot skip scanning partitions from the left
 -- side. In outer join, the left table is (also) an outer table, where all the
 -- tuples are output with or without a match.
 --
 -- partitioned ⟗  replicated
 explain (costs off, timing off, summary off) select * from part full join rep on part.c1 = rep.c1 and part.c2 = rep.c1;
 select count(*) from part full join rep on part.c1 = rep.c1 and part.c2 = rep.c1;

 -- (We don't test static partition elimination here because full join with a
 -- null filtering predicate that can be used for SPE is converted to left join
 -- in expression normalizer. It doesn't enter the logic of full hash join.)

 ------------------------
 -- commutative transform
 ------------------------

 -- distributed ⟗  distributed
 -- Full hash join commutative transform adds plan alternative where left and
 -- right children swap their places. Cost model chooses the more performant
 -- alternative where the smaller relation is placed on the right side, i.e.,
 -- hash side. Here dist is the smaller relation, with a tuple count 1 order
 -- of magnitude lower than that of relation dist2.
 explain (costs off, timing off, summary off) select * from dist full join dist2 on dist.c1 = dist2.c1;
 select enable_xform('CXformFullJoinCommutativity');
 explain (costs off, timing off, summary off) select * from dist full join dist2 on dist.c1 = dist2.c1;
	-- This test verifies plans involving full joins, which currently have
	-- limited ORCA support. The first half of the tests covers full hash
	-- join. Specifically, we examine if the motions are correctly enforced,
	-- and if the full hash join delivers correct distribution spec. The
	-- second half of the tests (currently don't exist) covers full merge
	-- join. Specifically, we examine if the plan alternative is correctly
	-- generated, given a versatile combination of join conditions.

	-- start_ignore
	drop schema if exists full_join cascade;
	-- end_ignore

	-- greenplum
	create schema full_join;
	set search_path=full_join;
	set optimizer_trace_fallback=on;

	-- distributed
	create table dist (c1 int) distributed by (c1);
	insert into dist select i from generate_series(1,999) i;
	insert into dist select null from generate_series(1,10);
	create table dist2 (c1 int, c2 int) distributed by (c1);
	insert into dist2 select i, i+1 from generate_series(100,1099) i;
	insert into dist2 select null from generate_series(1,10);

	-- randomly distributed
	create table rand (c1 int) distributed randomly;
	insert into rand select i from generate_series(-100,899) i;
	insert into rand select null from generate_series(1,10);

	-- replicated
	create table rep (c1 int) distributed replicated;
	insert into rep select i from generate_series(-200,799) i;
	insert into rep select null from generate_series(1,20);
	create table rep2 (c1 int) distributed replicated;
	insert into rep2 select i from generate_series(-300,699) i;
	insert into rep2 select null from generate_series(1,20);

	-- tainted replicated
	create view tainted_rep as (select * from rep limit 2);
	create view tainted_rep2 as (select * from rep2 limit 3);

	-- universal
	create view uni as (select generate_series(-10,10) c1);
	create view uni2 as (select unnest(string_to_array('-3,-2,-1,0,1,2,3',','))::int c1);

	-- partitioned
	create table part (c1 int, c2 int) partition by list(c2) (
	partition part1 values (1, 2, 3, 4),
	partition part2 values (5, 6, 7),
	partition part3 values (8, 9, 0),
	default partition part4);
	insert into part select i, i%10 from generate_series(-400, 599) i;
	insert into part select null from generate_series(1,20);

	analyze dist;
	analyze dist2;
	analyze rand;
	analyze rep;
	analyze rep2;
	analyze part;

	-- Disable merge join and commutative hash join alternatives, for a clear
	-- view of the distribution request and the derived property given two
	-- relations with different distribution policies. The idea is to maintain
	-- the left child on the left and the right child on the right. This ensures
	-- a clear understanding of how each side responds to the request and what
	-- property we derive based on the left and right children's properties.

	select disable_xform('CXformImplementFullOuterMergeJoin');
	select disable_xform('CXformFullJoinCommutativity');
	select disable_xform('CXformInnerJoinCommutativity');
	select disable_xform('CXformLeftJoin2RightJoin');

	--------------------------------------------
	-- 2-table join: test requested distribution spec
	--------------------------------------------

	-- Full hash join sends <hash, hash> and <singleton, singleton> requests to
	-- its children. A quick way to tell which alternative is selected is by
	-- examining whether the full hash join occurs on the segments or coordinator.
	-- If the join occurs on the segments, it indicates both inputs are made hash
	-- distributed. If the join occurs on the coordinator, it indicates both inputs
	-- are rendered available on the coordinator.
	--
	-- The following tests check the correctness of the distribution requests.
	-- Specifically, we ensure necessary motions aren't missing, and the full
	-- join doesn't output duplicates.

	-- distributed ⟗ random
	-- The right relation is randomly distributed. It is redistributed to be hash
	-- distributed. Both relations are deduplicated to begin with, so there's no
	-- duplication risk.
	explain (costs off, timing off, summary off) select * from dist full join rand on dist.c1 = rand.c1;
	select count(*) from dist full join rand on dist.c1 = rand.c1;

	-- distributed ⟗ universal
	-- The right relation is universal. To avoid duplicates, the left side is
	-- gathered onto the coordinator.
	explain (costs off, timing off, summary off) select * from dist full join uni on dist.c1 = uni.c1;
	select count(*) from dist full join uni on dist.c1 = uni.c1;

	-- universal ⟗ distributed
	-- The left relation is universal. To avoid duplicates, a hash filter
	-- (non-physical motion) is applied to the left side.
	explain (costs off, timing off, summary off) select * from uni full join dist on uni.c1 = dist.c1;
	select count(*) from uni full join dist on uni.c1 = dist.c1;

	-- random ⟗ replicated
	-- The right relation is replicated. To avoid duplicates, both sides are
	-- gathered onto the coordinator.
	explain (costs off, timing off, summary off) select * from rand full join rep on rand.c1 = rep.c1;
	select count(*) from rand full join rep on rand.c1 = rep.c1;

	-- replicated ⟗ random
	-- The left relation is replicated. To avoid duplicates, a hash filter
	-- (non-physical motion) is applied to the left side. The right relation
	-- is randomly distributed. It's redistributed to be hash distributed.
	explain (costs off, timing off, summary off) select * from rep full join rand on rep.c1 = rand.c1;
	select count(*) from rep full join rand on rep.c1 = rand.c1;

	-- replicated ⟗ universal
	-- The left relation is replicated, and the right relation is universal. To
	-- avoid duplicates, the left side is gathered onto the coordinator.
	explain (costs off, timing off, summary off) select * from rep full join uni on rep.c1 = uni.c1;
	select count(*) from rep full join uni on rep.c1 = uni.c1;

	-- universal ⟗ replicated
	-- The left relation is universal, and the right relation is replicated. To
	-- avoid duplicates, the right side is gathered onto the coordinator.
	explain (costs off, timing off, summary off) select * from uni full join rep on uni.c1 = rep.c1;
	select count(*) from uni full join rep on uni.c1 = rep.c1;

	-- tainted-replicated ⟗ tainted-replicated
	-- Both left and right relations are tainted-replicated. To avoid duplicates,
	-- both sides are gathered onto the coordinator.
	explain (costs off, timing off, summary off) select * from tainted_rep full join tainted_rep2 on tainted_rep.c1 = tainted_rep2.c1;

	-- tainted-replicated ⟗ distributed
	-- The left relation is tainted-replicated. To avoid duplicates, the left side
	-- is redistributed from one segment onto all the segments.
	explain (costs off, timing off, summary off) select * from tainted_rep full join dist on tainted_rep.c1 = dist.c1;

	------------------------------------------
	-- 3-table join: test derived distribution spec
	------------------------------------------

	-- The 2-table join tests are effective in assessing required distribution
	-- specs. They provide some coverage for testing derived spec. Essentially,
	-- if the derived spec falls under the Non-Singleton category, the join output
	-- will be gathered onto the coordinator, whereas if the derived spec is
	-- already singleton, no gather motion is necessary. However, for a thorough
	-- examination, 3-table join tests can verify if motions other than gather are
	-- properly enforced, especially in scenarios where the join conditions are
	-- null aware.

	-- The following views are named after the likely property (so that the cost
	-- is low) that they deliver when used in a join operation.
	create view vw_dist as (select dist.c1 as c11, dist2.c1 as c12 from dist full join dist2 on dist.c1 = dist2.c1);
	create view vw_uni as (select uni.c1 as c11, uni2.c1 as c12 from uni full join uni2 on uni.c1 = uni2.c1);
	create view vw_sin as (select rep.c1 as c11, uni.c1 as c12 from rep full join uni on rep.c1 = uni.c1);
	create view vw_rep as (select uni.c1 as c11, rep.c1 as c12 from uni full join rep on uni.c1 = rep.c1);
	create view vw_rep2 as (select rep.c1 as c11, rep2.c1 as c12 from rep full join rep2 on rep.c1 = rep2.c1);

	-- Full hash join of two distributed relations most likely delivers the
	-- combined hash properties from its two children for all non-null tuples.
	-- If the full join is placed on the left side of an outer join, its output
	-- doesn't need redistribution if the outer join condition isn't null aware.
	-- This is because outer joins send hash distribution requests to their left
	-- child without request null colocation. A motion is necessary, however, if
	-- the join condition is INDF (is not distinct from).
	--
	-- (distributed ⟗ distributed) ⟕ random
	explain (costs off, timing off, summary off) select * from vw_dist left join rand on vw_dist.c11 = rand.c1;
	explain (costs off, timing off, summary off) select * from vw_dist left join rand on vw_dist.c12 = rand.c1;
	explain (costs off, timing off, summary off) select * from vw_dist left join rand on vw_dist.c11 is not distinct from rand.c1;
	explain (costs off, timing off, summary off) select * from vw_dist left join rand on vw_dist.c12 is not distinct from rand.c1;
	select count(*) from vw_dist left join rand on vw_dist.c11 = rand.c1;
	select count(*) from vw_dist left join rand on vw_dist.c12 = rand.c1;
	select count(*) from vw_dist left join rand on vw_dist.c11 is not distinct from rand.c1;
	select count(*) from vw_dist left join rand on vw_dist.c12 is not distinct from rand.c1;

	-- (distributed ⟗ distributed) ⟗ random
	explain (costs off, timing off, summary off) select * from vw_dist full join rand on vw_dist.c11 = rand.c1;
	explain (costs off, timing off, summary off) select * from vw_dist full join rand on vw_dist.c12 = rand.c1;
	explain (costs off, timing off, summary off) select * from vw_dist full join rand on vw_dist.c11 is not distinct from rand.c1;
	explain (costs off, timing off, summary off) select * from vw_dist full join rand on vw_dist.c12 is not distinct from rand.c1;

	select count(*) from vw_dist full join rand on vw_dist.c11 = rand.c1;
	select count(*) from vw_dist full join rand on vw_dist.c12 = rand.c1;
	select count(*) from vw_dist full join rand on vw_dist.c11 is not distinct from rand.c1;
	select count(*) from vw_dist full join rand on vw_dist.c12 is not distinct from rand.c1;

	-- If the full join is placed on the right side of an outer join, or, is
	-- used in an inner join, its output needs redistribution regardless of
	-- the null awareness of the join condition. This is because outer joins
	-- send hash distribution requests to their right child requesting null
	-- colocation, so does inner joins to both children. Note those motions
	-- aren't necessary, because null colocation is only relevant if the join
	-- condition is null aware. Currently, however, the motion exists because
	-- we request tighter specs than that we need. We should look into relaxing
	-- such requests in future.
	--
	-- random ⟕ (distributed ⟗ distributed)
	explain (costs off, timing off, summary off) select * from rand left join vw_dist on rand.c1 = vw_dist.c11;
	explain (costs off, timing off, summary off) select * from rand left join vw_dist on rand.c1 = vw_dist.c12;
	select count(*) from rand left join vw_dist on rand.c1 = vw_dist.c11;
	select count(*) from rand left join vw_dist on rand.c1 = vw_dist.c12;

	-- random ⟗ (distributed ⟗ distributed)
	explain (costs off, timing off, summary off) select * from rand full join vw_dist on rand.c1 = vw_dist.c11;
	explain (costs off, timing off, summary off) select * from rand full join vw_dist on rand.c1 = vw_dist.c12;
	select count(*) from rand full join vw_dist on rand.c1 = vw_dist.c11;
	select count(*) from rand full join vw_dist on rand.c1 = vw_dist.c12;

	-- (distributed ⟗ distributed) ⋈ random
	explain (costs off, timing off, summary off) select * from vw_dist join rand on vw_dist.c11 = rand.c1;
	explain (costs off, timing off, summary off) select * from vw_dist join rand on vw_dist.c12 = rand.c1;
	select count(*) from vw_dist join rand on vw_dist.c11 = rand.c1;
	select count(*) from vw_dist join rand on vw_dist.c12 = rand.c1;

	-- Full hash join of two universal relations most likely delivers universal
	-- property. In the following tests, the full join is placed on the outer
	-- side of an inner join. To avoid duplicates, a hash filter (non-physical
	-- motion) is applied to that side.
	--
	-- (universal ⟗ universal) ⋈ random
	explain (costs off, timing off, summary off) select * from vw_uni join rand on vw_uni.c11 = rand.c1;
	explain (costs off, timing off, summary off) select * from vw_uni join rand on vw_uni.c12 = rand.c1;
	select count(*) from vw_uni join rand on vw_uni.c11 = rand.c1;
	select count(*) from vw_uni join rand on vw_uni.c12 = rand.c1;

	-- Full hash join of a replicated and a universal relation delivers singleton
	-- property. In the following tests, the full join is placed on the outer side
	-- of an inner join. Its output data is hash or randomly redistributed across
	-- the segments to maximize MPP execution.
	--
	-- (replicated ⟗ universal) ⋈ random
	explain (costs off, timing off, summary off) select * from vw_sin join rand on vw_sin.c11 = rand.c1;
	explain (costs off, timing off, summary off) select * from vw_sin join rand on vw_sin.c12 = rand.c1;
	select count(*) from vw_sin join rand on vw_sin.c11 = rand.c1;
	select count(*) from vw_sin join rand on vw_sin.c12 = rand.c1;

	-- Full hash join of a universal and a replicated relation most likely
	-- delivers replicated property. In the following tests, the full join is
	-- placed on the inner side of an inner join. With the inner side being
	-- replicated, the inner join executes without motion.
	--
	-- random ⋈ (universal ⟗ replicated)
	explain (costs off, timing off, summary off) select * from rand join vw_rep on rand.c1 = vw_rep.c11;
	explain (costs off, timing off, summary off) select * from rand join vw_rep on rand.c1 = vw_rep.c12;
	select count(*) from rand join vw_rep on rand.c1 = vw_rep.c11;
	select count(*) from rand join vw_rep on rand.c1 = vw_rep.c12;

	-- Full hash join of two replicated relations most likely delivers replicated
	-- property. In the following tests, the full join is placed on the inner side
	-- of an inner join. With the inner side being replicated, the inner join
	-- executes without motion.
	--
	-- random ⋈ (replicated ⟗ replicated)
	explain (costs off, timing off, summary off) select * from rand join vw_rep2 on rand.c1 = vw_rep2.c11;
	explain (costs off, timing off, summary off) select * from rand join vw_rep2 on rand.c1 = vw_rep2.c12;
	select count(*) from rand join vw_rep2 on rand.c1 = vw_rep2.c11;
	select count(*) from rand join vw_rep2 on rand.c1 = vw_rep2.c12;

	--------------------------------------------
	-- Self join: test derived distribution spec
	--------------------------------------------

	-- Spec derivation of self full join generally follows that of 2-table
	-- full join. One exception is when a distributed table full joins
	-- itself on the same column. This guarantees match in each non-null tuple,
	-- ensuring nulls stay colocated.

	create view vw_self_same_col as (select o1.c1 as c11, o2.c1 as c12 from dist2 o1 full join (select distinct c1 from dist2) o2 on o1.c1 = o2.c1);
	create view vw_self_diff_col as (select o1.c1 as c11, o1.c2 as c12, o2.c1 as c21, o2.c2 as c22 from dist2 o1 full join dist2 o2 on o1.c1 = o2.c2);
	create view vw_self_expr_col as (select o1.c1 as c11, o2.c1 as c12 from dist2 o1 full join (select distinct c1 from dist2) o2 on o1.c1 = o2.c1+1);

	-- (distributed ⟗ itself on the same column) ⋈ distributed
	-- Self full hash join of a distributed relation on the same column delivers
	-- the combined hash properties from its two children, with null tuples
	-- colocated. Therefore, its output doesn't need to be redistributed, even
	-- when the join condition is null aware.
	explain (costs off, timing off, summary off) select * from vw_self_same_col join dist on vw_self_same_col.c11 is not distinct from dist.c1;
	select count(*) from vw_self_same_col join dist on vw_self_same_col.c11 is not distinct from dist.c1;

	-- (distributed ⟗ itself on different columns) ⋈ distributed
	-- Self full hash join of a distributed relation on different columns, on the,
	-- other hand, doesn't ensure null colocation. Its output does require
	-- redistribution to colocate null tuples if the join condition is null aware.
	explain (costs off, timing off, summary off) select * from vw_self_diff_col join dist on vw_self_diff_col.c11 is not distinct from dist.c1;
	select count(*) from vw_self_diff_col join dist on vw_self_diff_col.c11 is not distinct from dist.c1;

	-- (distributed ⟗ itself on the same column with an expression) ⋈ distributed
	-- Self full hash join of a distributed relation on the same column, with
	-- an expression applied to one side, is essentially a join on different
	-- columns. Its output needs to be redistributed to colocate null tuples if
	-- the join condition is null aware.
	explain (costs off, timing off, summary off) select * from vw_self_expr_col join dist on vw_self_expr_col.c11 is not distinct from dist.c1;
	select count(*) from vw_self_expr_col join dist on vw_self_expr_col.c11 is not distinct from dist.c1;

	--------------------------------
	-- dynamic partition elimination
	--------------------------------

	-- Similar to left join, DPE is disabled for full join. This is because for
	-- each right tuple scanned, we cannot skip scanning partitions from the left
	-- side. In outer join, the left table is (also) an outer table, where all the
	-- tuples are output with or without a match.
	--
	-- partitioned ⟗ replicated
	explain (costs off, timing off, summary off) select * from part full join rep on part.c1 = rep.c1 and part.c2 = rep.c1;
	select count(*) from part full join rep on part.c1 = rep.c1 and part.c2 = rep.c1;

	-- (We don't test static partition elimination here because full join with a
	-- null filtering predicate that can be used for SPE is converted to left join
	-- in expression normalizer. It doesn't enter the logic of full hash join.)

	------------------------
	-- commutative transform
	------------------------

	-- distributed ⟗ distributed
	-- Full hash join commutative transform adds plan alternative where left and
	-- right children swap their places. Cost model chooses the more performant
	-- alternative where the smaller relation is placed on the right side, i.e.,
	-- hash side. Here dist is the smaller relation, with a tuple count 1 order
	-- of magnitude lower than that of relation dist2.
	explain (costs off, timing off, summary off) select * from dist full join dist2 on dist.c1 = dist2.c1;
	select enable_xform('CXformFullJoinCommutativity');
	explain (costs off, timing off, summary off) select * from dist full join dist2 on dist.c1 = dist2.c1;