src/test/regress/sql/gp_distinct_plans.sql - cloudberry - Git at Google

 --
 -- Test all the different plan shapes that the planner can generate for
 -- DISTINCT queries
 --
 create table distinct_test (a int, b int, c int) distributed by (a);
 insert into distinct_test select g / 1000, g / 2000, g from generate_series(1, 10000) g;
 analyze distinct_test;

 --
 -- With the default cost settings, you get hashed plans
 --

 -- If the DISTINCT is a superset of the table's distribution keys, the
 -- duplicates can be eliminated independently in the segments.
 explain select distinct a, b from distinct_test;
 select distinct a, b from distinct_test;

 -- Otherwise, redistribution is needed
 explain select distinct b from distinct_test;
 select distinct b from distinct_test;

 -- The two-stage aggregation can be disabled with GUC
 set gp_enable_preunique = off;
 explain select distinct b from distinct_test;
 reset gp_enable_preunique;

 -- If the input is highly unique already the pre-Unique step is not worthwhile.
 -- (Only print count(*) of the result because it returns so many rows)
 explain select distinct c from distinct_test;
 select count(*) from (
         select distinct c from distinct_test
 offset 0) as x;

 --
 -- Repeat the same tests with sorted Unique plans
 --
 set enable_hashagg=off;
 set optimizer_enable_hashagg=off;

 -- If the DISTINCT is a superset of the table's distribution keys, the
 -- duplicates can be eliminated independently in the segments.
 explain select distinct a, b from distinct_test;
 select distinct a, b from distinct_test;

 -- Otherwise, redistribution is needed
 explain select distinct b from distinct_test;
 select distinct b from distinct_test;

 -- If the input is highly unique already the pre-Unique step is not worthwhile.
 -- (Only print count(*) of the result because it returns so many rows)
 explain select distinct c from distinct_test;
 select count(*) from (
         select distinct c from distinct_test
 offset 0) as x;

 --
 -- Also test paths where the explicit Sort is not needed
 --
 create index on distinct_test (a, b);
 create index on distinct_test (b);
 create index on distinct_test (c);
 set random_page_cost=1;


 -- If the DISTINCT is a superset of the table's distribution keys, the
 -- duplicates can be eliminated independently in the segments.
 explain select distinct a, b from distinct_test;
 select distinct a, b from distinct_test;

 -- Otherwise, redistribution is needed
 explain select distinct b from distinct_test;
 select distinct b from distinct_test;

 -- If the input is highly unique already the pre-Unique step is not worthwhile.
 -- (Only print count(*) of the result because it returns so many rows)
 explain select distinct c from distinct_test;
 select count(*) from (
         select distinct c from distinct_test
 offset 0) as x;
	--
	-- Test all the different plan shapes that the planner can generate for
	-- DISTINCT queries
	--
	create table distinct_test (a int, b int, c int) distributed by (a);
	insert into distinct_test select g / 1000, g / 2000, g from generate_series(1, 10000) g;
	analyze distinct_test;

	--
	-- With the default cost settings, you get hashed plans
	--

	-- If the DISTINCT is a superset of the table's distribution keys, the
	-- duplicates can be eliminated independently in the segments.
	explain select distinct a, b from distinct_test;
	select distinct a, b from distinct_test;

	-- Otherwise, redistribution is needed
	explain select distinct b from distinct_test;
	select distinct b from distinct_test;

	-- The two-stage aggregation can be disabled with GUC
	set gp_enable_preunique = off;
	explain select distinct b from distinct_test;
	reset gp_enable_preunique;

	-- If the input is highly unique already the pre-Unique step is not worthwhile.
	-- (Only print count(*) of the result because it returns so many rows)
	explain select distinct c from distinct_test;
	select count(*) from (
	select distinct c from distinct_test
	offset 0) as x;

	--
	-- Repeat the same tests with sorted Unique plans
	--
	set enable_hashagg=off;
	set optimizer_enable_hashagg=off;

	-- If the DISTINCT is a superset of the table's distribution keys, the
	-- duplicates can be eliminated independently in the segments.
	explain select distinct a, b from distinct_test;
	select distinct a, b from distinct_test;

	-- Otherwise, redistribution is needed
	explain select distinct b from distinct_test;
	select distinct b from distinct_test;

	-- If the input is highly unique already the pre-Unique step is not worthwhile.
	-- (Only print count(*) of the result because it returns so many rows)
	explain select distinct c from distinct_test;
	select count(*) from (
	select distinct c from distinct_test
	offset 0) as x;

	--
	-- Also test paths where the explicit Sort is not needed
	--
	create index on distinct_test (a, b);
	create index on distinct_test (b);
	create index on distinct_test (c);
	set random_page_cost=1;


	-- If the DISTINCT is a superset of the table's distribution keys, the
	-- duplicates can be eliminated independently in the segments.
	explain select distinct a, b from distinct_test;
	select distinct a, b from distinct_test;

	-- Otherwise, redistribution is needed
	explain select distinct b from distinct_test;
	select distinct b from distinct_test;

	-- If the input is highly unique already the pre-Unique step is not worthwhile.
	-- (Only print count(*) of the result because it returns so many rows)
	explain select distinct c from distinct_test;
	select count(*) from (
	select distinct c from distinct_test
	offset 0) as x;