src/test/regress/sql/select_distinct.sql - cloudberry - Git at Google

 --
 -- SELECT_DISTINCT
 --

 --
 -- awk '{print $3;}' onek.data | sort -n | uniq
 --
 SELECT DISTINCT two FROM onek ORDER BY 1;

 --
 -- awk '{print $5;}' onek.data | sort -n | uniq
 --
 SELECT DISTINCT ten FROM onek ORDER BY 1;

 --
 -- awk '{print $16;}' onek.data | sort -d | uniq
 --
 SELECT DISTINCT string4 FROM onek ORDER BY 1;

 --
 -- awk '{print $3,$16,$5;}' onek.data | sort -d | uniq |
 -- sort +0n -1 +1d -2 +2n -3
 --
 SELECT DISTINCT two, string4, ten
    FROM onek
    ORDER BY two using <, string4 using <, ten using <;

 --
 -- awk '{print $2;}' person.data |
 -- awk '{if(NF!=1){print $2;}else{print;}}' - emp.data |
 -- awk '{if(NF!=1){print $2;}else{print;}}' - student.data |
 -- awk 'BEGIN{FS="      ";}{if(NF!=1){print $5;}else{print;}}' - stud_emp.data |
 -- sort -n -r | uniq
 --
 SELECT DISTINCT p.age FROM person* p ORDER BY age using >;

 --
 -- Check mentioning same column more than once
 --

 EXPLAIN (VERBOSE, COSTS OFF)
 SELECT count(*) FROM
   (SELECT DISTINCT two, four, two FROM tenk1) ss;

 SELECT count(*) FROM
   (SELECT DISTINCT two, four, two FROM tenk1) ss;

 --
 -- Compare results between plans using sorting and plans using hash
 -- aggregation. Force spilling in both cases by setting work_mem low.
 --

 SET work_mem='64kB';

 -- Produce results with sorting.

 SET enable_hashagg=FALSE;
 SET optimizer_enable_hashagg=FALSE;

 SET jit_above_cost=0;

 EXPLAIN (costs off)
 SELECT DISTINCT g%1000 FROM generate_series(0,9999) g;

 CREATE TABLE distinct_group_1 AS
 SELECT DISTINCT g%1000 FROM generate_series(0,9999) g;

 SET jit_above_cost TO DEFAULT;

 CREATE TABLE distinct_group_2 AS
 SELECT DISTINCT (g%1000)::text FROM generate_series(0,9999) g;

 SET enable_seqscan = 0;

 -- Check to see we get an incremental sort plan
 EXPLAIN (costs off)
 SELECT DISTINCT hundred, two FROM tenk1;

 RESET enable_seqscan;

 SET enable_hashagg=TRUE;
 SET optimizer_enable_hashagg=TRUE;

 -- Produce results with hash aggregation.

 SET enable_sort=FALSE;

 SET jit_above_cost=0;

 EXPLAIN (costs off)
 SELECT DISTINCT g%1000 FROM generate_series(0,9999) g;

 CREATE TABLE distinct_hash_1 AS
 SELECT DISTINCT g%1000 FROM generate_series(0,9999) g;

 SET jit_above_cost TO DEFAULT;

 CREATE TABLE distinct_hash_2 AS
 SELECT DISTINCT (g%1000)::text FROM generate_series(0,9999) g;

 SET enable_sort=TRUE;

 SET work_mem TO DEFAULT;

 -- Compare results

 (SELECT * FROM distinct_hash_1 EXCEPT SELECT * FROM distinct_group_1)
   UNION ALL
 (SELECT * FROM distinct_group_1 EXCEPT SELECT * FROM distinct_hash_1);

 (SELECT * FROM distinct_hash_1 EXCEPT SELECT * FROM distinct_group_1)
   UNION ALL
 (SELECT * FROM distinct_group_1 EXCEPT SELECT * FROM distinct_hash_1);

 DROP TABLE distinct_hash_1;
 DROP TABLE distinct_hash_2;
 DROP TABLE distinct_group_1;
 DROP TABLE distinct_group_2;

 -- Test parallel DISTINCT
 SET parallel_tuple_cost=0;
 SET parallel_setup_cost=0;
 SET min_parallel_table_scan_size=0;
 SET max_parallel_workers_per_gather=2;

 -- Ensure we get a parallel plan
 EXPLAIN (costs off)
 SELECT DISTINCT four FROM tenk1;

 -- Ensure the parallel plan produces the correct results
 SELECT DISTINCT four FROM tenk1;

 CREATE OR REPLACE FUNCTION distinct_func(a INT) RETURNS INT AS $$
   BEGIN
     RETURN a;
   END;
 $$ LANGUAGE plpgsql PARALLEL UNSAFE;

 -- Ensure we don't do parallel distinct with a parallel unsafe function
 EXPLAIN (COSTS OFF)
 SELECT DISTINCT distinct_func(1) FROM tenk1;

 -- make the function parallel safe
 CREATE OR REPLACE FUNCTION distinct_func(a INT) RETURNS INT AS $$
   BEGIN
     RETURN a;
   END;
 $$ LANGUAGE plpgsql PARALLEL SAFE;

 -- Ensure we do parallel distinct now that the function is parallel safe
 EXPLAIN (COSTS OFF)
 SELECT DISTINCT distinct_func(1) FROM tenk1;

 RESET max_parallel_workers_per_gather;
 RESET min_parallel_table_scan_size;
 RESET parallel_setup_cost;
 RESET parallel_tuple_cost;

 --
 -- Test the planner's ability to use a LIMIT 1 instead of a Unique node when
 -- all of the distinct_pathkeys have been marked as redundant
 --

 -- Ensure we get a plan with a Limit 1
 EXPLAIN (COSTS OFF)
 SELECT DISTINCT four FROM tenk1 WHERE four = 0;

 -- Ensure the above gives us the correct result
 SELECT DISTINCT four FROM tenk1 WHERE four = 0;

 -- Ensure we get a plan with a Limit 1
 EXPLAIN (COSTS OFF)
 SELECT DISTINCT four FROM tenk1 WHERE four = 0 AND two <> 0;

 -- Ensure no rows are returned
 SELECT DISTINCT four FROM tenk1 WHERE four = 0 AND two <> 0;

 -- Ensure we get a plan with a Limit 1 when the SELECT list contains constants
 EXPLAIN (COSTS OFF)
 SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0;

 -- Ensure we only get 1 row
 SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0;

 --
 -- Also, some tests of IS DISTINCT FROM, which doesn't quite deserve its
 -- very own regression file.
 --

 CREATE TEMP TABLE disttable (f1 integer);
 INSERT INTO DISTTABLE VALUES(1);
 INSERT INTO DISTTABLE VALUES(2);
 INSERT INTO DISTTABLE VALUES(3);
 INSERT INTO DISTTABLE VALUES(NULL);

 -- basic cases
 SELECT f1, f1 IS DISTINCT FROM 2 as "not 2" FROM disttable;
 SELECT f1, f1 IS DISTINCT FROM NULL as "not null" FROM disttable;
 SELECT f1, f1 IS DISTINCT FROM f1 as "false" FROM disttable;
 SELECT f1, f1 IS DISTINCT FROM f1+1 as "not null" FROM disttable;

 -- check that optimizer constant-folds it properly
 SELECT 1 IS DISTINCT FROM 2 as "yes";
 SELECT 2 IS DISTINCT FROM 2 as "no";
 SELECT 2 IS DISTINCT FROM null as "yes";
 SELECT null IS DISTINCT FROM null as "no";

 -- negated form
 SELECT 1 IS NOT DISTINCT FROM 2 as "no";
 SELECT 2 IS NOT DISTINCT FROM 2 as "yes";
 SELECT 2 IS NOT DISTINCT FROM null as "no";
 SELECT null IS NOT DISTINCT FROM null as "yes";

 -- join cases
 -- test IS DISTINCT FROM and IS NOT DISTINCT FROM join qual.The postgres planner doesn't support hash join on
 -- IS NOT DISTINCT FROM for now, ORCA supports Hash Join on "IS NOT DISTINCT FROM".

 CREATE TABLE distinct_1(a int);
 CREATE TABLE distinct_2(a int);
 INSERT INTO distinct_1 VALUES(1),(2),(NULL);
 INSERT INTO distinct_2 VALUES(1),(NULL);

 EXPLAIN SELECT * FROM distinct_1, distinct_2 WHERE distinct_1.a IS DISTINCT FROM distinct_2.a;
 EXPLAIN SELECT * FROM distinct_1 left join distinct_2 on distinct_1.a IS DISTINCT FROM distinct_2.a;
 EXPLAIN SELECT * FROM distinct_1 right join distinct_2 on distinct_1.a IS DISTINCT FROM distinct_2.a;

 EXPLAIN SELECT * FROM distinct_1, distinct_2 WHERE distinct_1.a IS NOT DISTINCT FROM distinct_2.a;
 EXPLAIN SELECT * FROM distinct_1 left join distinct_2 on distinct_1.a IS NOT DISTINCT FROM distinct_2.a;
 EXPLAIN SELECT * FROM distinct_1 right join  distinct_2 on distinct_1.a IS NOT DISTINCT FROM distinct_2.a;

 SELECT * FROM distinct_1, distinct_2 WHERE distinct_1.a IS DISTINCT FROM distinct_2.a;
 SELECT * FROM distinct_1 left join  distinct_2 on distinct_1.a IS DISTINCT FROM distinct_2.a;
 SELECT * FROM distinct_1 right join  distinct_2 on distinct_1.a IS DISTINCT FROM distinct_2.a;

 SELECT * FROM distinct_1, distinct_2 WHERE distinct_1.a IS NOT DISTINCT FROM distinct_2.a;
 SELECT * FROM distinct_1 left join distinct_2 on distinct_1.a IS NOT DISTINCT FROM distinct_2.a;
 SELECT * FROM distinct_1 right join distinct_2 on distinct_1.a IS NOT DISTINCT FROM distinct_2.a;

 DROP TABLE distinct_1;
 DROP TABLE distinct_2;


 -- gpdb start: test inherit/partition table distinct when gp_statistics_pullup_from_child_partition is on
 set gp_statistics_pullup_from_child_partition to on;
 CREATE TABLE sales (id int, date date, amt decimal(10,2))
 DISTRIBUTED BY (id);
 insert into sales values (1,'20210202',20), (2,'20210602',9) ,(3,'20211002',100);
 select distinct * from sales order by 1;
 select distinct sales from sales order by 1;
 CREATE TABLE sales_partition (id int, date date, amt decimal(10,2))
 DISTRIBUTED BY (id)
 PARTITION BY RANGE (date)
 ( START (date '2021-01-01') INCLUSIVE
   END (date '2022-01-01') EXCLUSIVE
   EVERY (INTERVAL '1 month') );
 insert into sales_partition values (1,'20210202',20), (2,'20210602',9) ,(3,'20211002',100);
 select distinct * from sales_partition order by 1;
 select distinct sales_partition from sales_partition order by 1;
 DROP TABLE sales;
 DROP TABLE sales_partition;

 CREATE TABLE cities (
     name            text,
     population      float,
     altitude        int
 );
 CREATE TABLE capitals (
     state           char(2)
 ) INHERITS (cities);
 select distinct * from cities;
 select distinct cities from cities;
 DROP TABLE capitals;
 DROP TABLE cities;
 set gp_statistics_pullup_from_child_partition to off;
 -- gpdb end: test inherit/partition table distinct when gp_statistics_pullup_from_child_partition is on

 create table t_distinct_sort(a int, b int, c int);
 insert into t_distinct_sort select i, i+1, i+2 from generate_series(1, 10)i;
 insert into t_distinct_sort select i, i+1, i+2  from generate_series(1, 10)i;
 insert into t_distinct_sort select i, i+1, i+2  from generate_series(1, 10)i;
 analyze t_distinct_sort;

 explain(verbose, costs off)
 select distinct count(a), sum(b) from t_distinct_sort order by sum(b), count(a);
 select distinct count(a), sum(b) from t_distinct_sort order by sum(b), count(a);
 explain(verbose, costs off)
 select distinct on(count(b), count(c)) count(a), sum(b) from t_distinct_sort order by count(c);
 select distinct on(count(b), count(c)) count(a), sum(b) from t_distinct_sort order by count(c);
 explain(verbose, costs off)
 select count(a), sum(b) from t_distinct_sort order by sum(a), count(c);
 select count(a), sum(b) from t_distinct_sort order by sum(a), count(c);
 explain(verbose, costs off)
 select distinct count(a), sum(b) from t_distinct_sort ;
 select distinct count(a), sum(b) from t_distinct_sort ;

 -- should keep distinct clause
 explain(verbose, costs off)
 select distinct on(count(random())) count(a), sum(b) from t_distinct_sort;
 select distinct on(count(random())) count(a), sum(b) from t_distinct_sort;

 explain(verbose, costs off)
 select distinct(count(a)) from t_distinct_sort, (select distinct(count(*)), generate_series(
 0, 2) from t_distinct_sort)as xx;
 select distinct(count(a)) from t_distinct_sort, (select distinct(count(*)), generate_series(
 0, 2) from t_distinct_sort)as xx;
 drop table t_distinct_sort;

 explain(verbose, costs off)
 select distinct(count(a)) from generate_series(0, 1) as a;
 select distinct(count(a)) from generate_series(0, 1) as a;
 explain(verbose, costs off)
 select distinct(count(*)) from generate_series(0, 1) a join generate_series(0, 2) b on true;
 select distinct(count(*)) from generate_series(0, 1) a join generate_series(0, 2) b on true;
 -- please refer to https://github.com/greenplum-db/gpdb/issues/15033
 CREATE TABLE t1_issue_15033(c DECIMAL CHECK (0.4 IS DISTINCT FROM 0.3));
 CREATE TABLE t2_issue_15033(c DECIMAL CHECK (0.4 IS NOT DISTINCT FROM 0.3));
 INSERT INTO t1_issue_15033 VALUES(10);
 SELECT * FROM t1_issue_15033;
 INSERT INTO t2_issue_15033 VALUES(10);
 SELECT * FROM t2_issue_15033;
	--
	-- SELECT_DISTINCT
	--

	--
	-- awk '{print $3;}' onek.data \| sort -n \| uniq
	--
	SELECT DISTINCT two FROM onek ORDER BY 1;

	--
	-- awk '{print $5;}' onek.data \| sort -n \| uniq
	--
	SELECT DISTINCT ten FROM onek ORDER BY 1;

	--
	-- awk '{print $16;}' onek.data \| sort -d \| uniq
	--
	SELECT DISTINCT string4 FROM onek ORDER BY 1;

	--
	-- awk '{print $3,$16,$5;}' onek.data \| sort -d \| uniq \|
	-- sort +0n -1 +1d -2 +2n -3
	--
	SELECT DISTINCT two, string4, ten
	FROM onek
	ORDER BY two using <, string4 using <, ten using <;

	--
	-- awk '{print $2;}' person.data \|
	-- awk '{if(NF!=1){print $2;}else{print;}}' - emp.data \|
	-- awk '{if(NF!=1){print $2;}else{print;}}' - student.data \|
	-- awk 'BEGIN{FS=" ";}{if(NF!=1){print $5;}else{print;}}' - stud_emp.data \|
	-- sort -n -r \| uniq
	--
	SELECT DISTINCT p.age FROM person* p ORDER BY age using >;

	--
	-- Check mentioning same column more than once
	--

	EXPLAIN (VERBOSE, COSTS OFF)
	SELECT count(*) FROM
	(SELECT DISTINCT two, four, two FROM tenk1) ss;

	SELECT count(*) FROM
	(SELECT DISTINCT two, four, two FROM tenk1) ss;

	--
	-- Compare results between plans using sorting and plans using hash
	-- aggregation. Force spilling in both cases by setting work_mem low.
	--

	SET work_mem='64kB';

	-- Produce results with sorting.

	SET enable_hashagg=FALSE;
	SET optimizer_enable_hashagg=FALSE;

	SET jit_above_cost=0;

	EXPLAIN (costs off)
	SELECT DISTINCT g%1000 FROM generate_series(0,9999) g;

	CREATE TABLE distinct_group_1 AS
	SELECT DISTINCT g%1000 FROM generate_series(0,9999) g;

	SET jit_above_cost TO DEFAULT;

	CREATE TABLE distinct_group_2 AS
	SELECT DISTINCT (g%1000)::text FROM generate_series(0,9999) g;

	SET enable_seqscan = 0;

	-- Check to see we get an incremental sort plan
	EXPLAIN (costs off)
	SELECT DISTINCT hundred, two FROM tenk1;

	RESET enable_seqscan;

	SET enable_hashagg=TRUE;
	SET optimizer_enable_hashagg=TRUE;

	-- Produce results with hash aggregation.

	SET enable_sort=FALSE;

	SET jit_above_cost=0;

	EXPLAIN (costs off)
	SELECT DISTINCT g%1000 FROM generate_series(0,9999) g;

	CREATE TABLE distinct_hash_1 AS
	SELECT DISTINCT g%1000 FROM generate_series(0,9999) g;

	SET jit_above_cost TO DEFAULT;

	CREATE TABLE distinct_hash_2 AS
	SELECT DISTINCT (g%1000)::text FROM generate_series(0,9999) g;

	SET enable_sort=TRUE;

	SET work_mem TO DEFAULT;

	-- Compare results

	(SELECT * FROM distinct_hash_1 EXCEPT SELECT * FROM distinct_group_1)
	UNION ALL
	(SELECT * FROM distinct_group_1 EXCEPT SELECT * FROM distinct_hash_1);

	(SELECT * FROM distinct_hash_1 EXCEPT SELECT * FROM distinct_group_1)
	UNION ALL
	(SELECT * FROM distinct_group_1 EXCEPT SELECT * FROM distinct_hash_1);

	DROP TABLE distinct_hash_1;
	DROP TABLE distinct_hash_2;
	DROP TABLE distinct_group_1;
	DROP TABLE distinct_group_2;

	-- Test parallel DISTINCT
	SET parallel_tuple_cost=0;
	SET parallel_setup_cost=0;
	SET min_parallel_table_scan_size=0;
	SET max_parallel_workers_per_gather=2;

	-- Ensure we get a parallel plan
	EXPLAIN (costs off)
	SELECT DISTINCT four FROM tenk1;

	-- Ensure the parallel plan produces the correct results
	SELECT DISTINCT four FROM tenk1;

	CREATE OR REPLACE FUNCTION distinct_func(a INT) RETURNS INT AS $$
	BEGIN
	RETURN a;
	END;
	$$ LANGUAGE plpgsql PARALLEL UNSAFE;

	-- Ensure we don't do parallel distinct with a parallel unsafe function
	EXPLAIN (COSTS OFF)
	SELECT DISTINCT distinct_func(1) FROM tenk1;

	-- make the function parallel safe
	CREATE OR REPLACE FUNCTION distinct_func(a INT) RETURNS INT AS $$
	BEGIN
	RETURN a;
	END;
	$$ LANGUAGE plpgsql PARALLEL SAFE;

	-- Ensure we do parallel distinct now that the function is parallel safe
	EXPLAIN (COSTS OFF)
	SELECT DISTINCT distinct_func(1) FROM tenk1;

	RESET max_parallel_workers_per_gather;
	RESET min_parallel_table_scan_size;
	RESET parallel_setup_cost;
	RESET parallel_tuple_cost;

	--
	-- Test the planner's ability to use a LIMIT 1 instead of a Unique node when
	-- all of the distinct_pathkeys have been marked as redundant
	--

	-- Ensure we get a plan with a Limit 1
	EXPLAIN (COSTS OFF)
	SELECT DISTINCT four FROM tenk1 WHERE four = 0;

	-- Ensure the above gives us the correct result
	SELECT DISTINCT four FROM tenk1 WHERE four = 0;

	-- Ensure we get a plan with a Limit 1
	EXPLAIN (COSTS OFF)
	SELECT DISTINCT four FROM tenk1 WHERE four = 0 AND two <> 0;

	-- Ensure no rows are returned
	SELECT DISTINCT four FROM tenk1 WHERE four = 0 AND two <> 0;

	-- Ensure we get a plan with a Limit 1 when the SELECT list contains constants
	EXPLAIN (COSTS OFF)
	SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0;

	-- Ensure we only get 1 row
	SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0;

	--
	-- Also, some tests of IS DISTINCT FROM, which doesn't quite deserve its
	-- very own regression file.
	--

	CREATE TEMP TABLE disttable (f1 integer);
	INSERT INTO DISTTABLE VALUES(1);
	INSERT INTO DISTTABLE VALUES(2);
	INSERT INTO DISTTABLE VALUES(3);
	INSERT INTO DISTTABLE VALUES(NULL);

	-- basic cases
	SELECT f1, f1 IS DISTINCT FROM 2 as "not 2" FROM disttable;
	SELECT f1, f1 IS DISTINCT FROM NULL as "not null" FROM disttable;
	SELECT f1, f1 IS DISTINCT FROM f1 as "false" FROM disttable;
	SELECT f1, f1 IS DISTINCT FROM f1+1 as "not null" FROM disttable;

	-- check that optimizer constant-folds it properly
	SELECT 1 IS DISTINCT FROM 2 as "yes";
	SELECT 2 IS DISTINCT FROM 2 as "no";
	SELECT 2 IS DISTINCT FROM null as "yes";
	SELECT null IS DISTINCT FROM null as "no";

	-- negated form
	SELECT 1 IS NOT DISTINCT FROM 2 as "no";
	SELECT 2 IS NOT DISTINCT FROM 2 as "yes";
	SELECT 2 IS NOT DISTINCT FROM null as "no";
	SELECT null IS NOT DISTINCT FROM null as "yes";

	-- join cases
	-- test IS DISTINCT FROM and IS NOT DISTINCT FROM join qual.The postgres planner doesn't support hash join on
	-- IS NOT DISTINCT FROM for now, ORCA supports Hash Join on "IS NOT DISTINCT FROM".

	CREATE TABLE distinct_1(a int);
	CREATE TABLE distinct_2(a int);
	INSERT INTO distinct_1 VALUES(1),(2),(NULL);
	INSERT INTO distinct_2 VALUES(1),(NULL);

	EXPLAIN SELECT * FROM distinct_1, distinct_2 WHERE distinct_1.a IS DISTINCT FROM distinct_2.a;
	EXPLAIN SELECT * FROM distinct_1 left join distinct_2 on distinct_1.a IS DISTINCT FROM distinct_2.a;
	EXPLAIN SELECT * FROM distinct_1 right join distinct_2 on distinct_1.a IS DISTINCT FROM distinct_2.a;

	EXPLAIN SELECT * FROM distinct_1, distinct_2 WHERE distinct_1.a IS NOT DISTINCT FROM distinct_2.a;
	EXPLAIN SELECT * FROM distinct_1 left join distinct_2 on distinct_1.a IS NOT DISTINCT FROM distinct_2.a;
	EXPLAIN SELECT * FROM distinct_1 right join distinct_2 on distinct_1.a IS NOT DISTINCT FROM distinct_2.a;

	SELECT * FROM distinct_1, distinct_2 WHERE distinct_1.a IS DISTINCT FROM distinct_2.a;
	SELECT * FROM distinct_1 left join distinct_2 on distinct_1.a IS DISTINCT FROM distinct_2.a;
	SELECT * FROM distinct_1 right join distinct_2 on distinct_1.a IS DISTINCT FROM distinct_2.a;

	SELECT * FROM distinct_1, distinct_2 WHERE distinct_1.a IS NOT DISTINCT FROM distinct_2.a;
	SELECT * FROM distinct_1 left join distinct_2 on distinct_1.a IS NOT DISTINCT FROM distinct_2.a;
	SELECT * FROM distinct_1 right join distinct_2 on distinct_1.a IS NOT DISTINCT FROM distinct_2.a;

	DROP TABLE distinct_1;
	DROP TABLE distinct_2;


	-- gpdb start: test inherit/partition table distinct when gp_statistics_pullup_from_child_partition is on
	set gp_statistics_pullup_from_child_partition to on;
	CREATE TABLE sales (id int, date date, amt decimal(10,2))
	DISTRIBUTED BY (id);
	insert into sales values (1,'20210202',20), (2,'20210602',9) ,(3,'20211002',100);
	select distinct * from sales order by 1;
	select distinct sales from sales order by 1;
	CREATE TABLE sales_partition (id int, date date, amt decimal(10,2))
	DISTRIBUTED BY (id)
	PARTITION BY RANGE (date)
	( START (date '2021-01-01') INCLUSIVE
	END (date '2022-01-01') EXCLUSIVE
	EVERY (INTERVAL '1 month') );
	insert into sales_partition values (1,'20210202',20), (2,'20210602',9) ,(3,'20211002',100);
	select distinct * from sales_partition order by 1;
	select distinct sales_partition from sales_partition order by 1;
	DROP TABLE sales;
	DROP TABLE sales_partition;

	CREATE TABLE cities (
	name text,
	population float,
	altitude int
	);
	CREATE TABLE capitals (
	state char(2)
	) INHERITS (cities);
	select distinct * from cities;
	select distinct cities from cities;
	DROP TABLE capitals;
	DROP TABLE cities;
	set gp_statistics_pullup_from_child_partition to off;
	-- gpdb end: test inherit/partition table distinct when gp_statistics_pullup_from_child_partition is on

	create table t_distinct_sort(a int, b int, c int);
	insert into t_distinct_sort select i, i+1, i+2 from generate_series(1, 10)i;
	insert into t_distinct_sort select i, i+1, i+2 from generate_series(1, 10)i;
	insert into t_distinct_sort select i, i+1, i+2 from generate_series(1, 10)i;
	analyze t_distinct_sort;

	explain(verbose, costs off)
	select distinct count(a), sum(b) from t_distinct_sort order by sum(b), count(a);
	select distinct count(a), sum(b) from t_distinct_sort order by sum(b), count(a);
	explain(verbose, costs off)
	select distinct on(count(b), count(c)) count(a), sum(b) from t_distinct_sort order by count(c);
	select distinct on(count(b), count(c)) count(a), sum(b) from t_distinct_sort order by count(c);
	explain(verbose, costs off)
	select count(a), sum(b) from t_distinct_sort order by sum(a), count(c);
	select count(a), sum(b) from t_distinct_sort order by sum(a), count(c);
	explain(verbose, costs off)
	select distinct count(a), sum(b) from t_distinct_sort ;
	select distinct count(a), sum(b) from t_distinct_sort ;

	-- should keep distinct clause
	explain(verbose, costs off)
	select distinct on(count(random())) count(a), sum(b) from t_distinct_sort;
	select distinct on(count(random())) count(a), sum(b) from t_distinct_sort;

	explain(verbose, costs off)
	select distinct(count(a)) from t_distinct_sort, (select distinct(count(*)), generate_series(
	0, 2) from t_distinct_sort)as xx;
	select distinct(count(a)) from t_distinct_sort, (select distinct(count(*)), generate_series(
	0, 2) from t_distinct_sort)as xx;
	drop table t_distinct_sort;

	explain(verbose, costs off)
	select distinct(count(a)) from generate_series(0, 1) as a;
	select distinct(count(a)) from generate_series(0, 1) as a;
	explain(verbose, costs off)
	select distinct(count(*)) from generate_series(0, 1) a join generate_series(0, 2) b on true;
	select distinct(count(*)) from generate_series(0, 1) a join generate_series(0, 2) b on true;
	-- please refer to https://github.com/greenplum-db/gpdb/issues/15033
	CREATE TABLE t1_issue_15033(c DECIMAL CHECK (0.4 IS DISTINCT FROM 0.3));
	CREATE TABLE t2_issue_15033(c DECIMAL CHECK (0.4 IS NOT DISTINCT FROM 0.3));
	INSERT INTO t1_issue_15033 VALUES(10);
	SELECT * FROM t1_issue_15033;
	INSERT INTO t2_issue_15033 VALUES(10);
	SELECT * FROM t2_issue_15033;