blob: 72bb4b1e16c4edbe79abe2670e87fe69f715b89f [file] [log] [blame]
--
-- Test all the different plan shapes that the planner can generate for
-- DISTINCT queries
--
create table distinct_test (a int, b int, c int) distributed by (a);
insert into distinct_test select g / 1000, g / 2000, g from generate_series(1, 10000) g;
analyze distinct_test;
--
-- With the default cost settings, you get hashed plans
--
-- If the DISTINCT is a superset of the table's distribution keys, the
-- duplicates can be eliminated independently in the segments.
explain select distinct a, b from distinct_test;
select distinct a, b from distinct_test;
-- Otherwise, redistribution is needed
explain select distinct b from distinct_test;
select distinct b from distinct_test;
-- The two-stage aggregation can be disabled with GUC
set gp_enable_preunique = off;
explain select distinct b from distinct_test;
reset gp_enable_preunique;
-- If the input is highly unique already the pre-Unique step is not worthwhile.
-- (Only print count(*) of the result because it returns so many rows)
explain select distinct c from distinct_test;
select count(*) from (
select distinct c from distinct_test
offset 0) as x;
--
-- Repeat the same tests with sorted Unique plans
--
set enable_hashagg=off;
set optimizer_enable_hashagg=off;
-- If the DISTINCT is a superset of the table's distribution keys, the
-- duplicates can be eliminated independently in the segments.
explain select distinct a, b from distinct_test;
select distinct a, b from distinct_test;
-- Otherwise, redistribution is needed
explain select distinct b from distinct_test;
select distinct b from distinct_test;
-- If the input is highly unique already the pre-Unique step is not worthwhile.
-- (Only print count(*) of the result because it returns so many rows)
explain select distinct c from distinct_test;
select count(*) from (
select distinct c from distinct_test
offset 0) as x;
--
-- Also test paths where the explicit Sort is not needed
--
create index on distinct_test (a, b);
create index on distinct_test (b);
create index on distinct_test (c);
set random_page_cost=1;
-- If the DISTINCT is a superset of the table's distribution keys, the
-- duplicates can be eliminated independently in the segments.
explain select distinct a, b from distinct_test;
select distinct a, b from distinct_test;
-- Otherwise, redistribution is needed
explain select distinct b from distinct_test;
select distinct b from distinct_test;
-- If the input is highly unique already the pre-Unique step is not worthwhile.
-- (Only print count(*) of the result because it returns so many rows)
explain select distinct c from distinct_test;
select count(*) from (
select distinct c from distinct_test
offset 0) as x;