src/test/regress/expected/select_distinct.out - cloudberry - Git at Google

 --
 -- SELECT_DISTINCT
 --
 --
 -- awk '{print $3;}' onek.data | sort -n | uniq
 --
 SELECT DISTINCT two FROM onek ORDER BY 1;
  two
 -----
    0
    1
 (2 rows)

 --
 -- awk '{print $5;}' onek.data | sort -n | uniq
 --
 SELECT DISTINCT ten FROM onek ORDER BY 1;
  ten
 -----
    0
    1
    2
    3
    4
    5
    6
    7
    8
    9
 (10 rows)

 --
 -- awk '{print $16;}' onek.data | sort -d | uniq
 --
 SELECT DISTINCT string4 FROM onek ORDER BY 1;
  string4
 ---------
  AAAAxx
  HHHHxx
  OOOOxx
  VVVVxx
 (4 rows)

 --
 -- awk '{print $3,$16,$5;}' onek.data | sort -d | uniq |
 -- sort +0n -1 +1d -2 +2n -3
 --
 SELECT DISTINCT two, string4, ten
    FROM onek
    ORDER BY two using <, string4 using <, ten using <;
  two | string4 | ten
 -----+---------+-----
    0 | AAAAxx  |   0
    0 | AAAAxx  |   2
    0 | AAAAxx  |   4
    0 | AAAAxx  |   6
    0 | AAAAxx  |   8
    0 | HHHHxx  |   0
    0 | HHHHxx  |   2
    0 | HHHHxx  |   4
    0 | HHHHxx  |   6
    0 | HHHHxx  |   8
    0 | OOOOxx  |   0
    0 | OOOOxx  |   2
    0 | OOOOxx  |   4
    0 | OOOOxx  |   6
    0 | OOOOxx  |   8
    0 | VVVVxx  |   0
    0 | VVVVxx  |   2
    0 | VVVVxx  |   4
    0 | VVVVxx  |   6
    0 | VVVVxx  |   8
    1 | AAAAxx  |   1
    1 | AAAAxx  |   3
    1 | AAAAxx  |   5
    1 | AAAAxx  |   7
    1 | AAAAxx  |   9
    1 | HHHHxx  |   1
    1 | HHHHxx  |   3
    1 | HHHHxx  |   5
    1 | HHHHxx  |   7
    1 | HHHHxx  |   9
    1 | OOOOxx  |   1
    1 | OOOOxx  |   3
    1 | OOOOxx  |   5
    1 | OOOOxx  |   7
    1 | OOOOxx  |   9
    1 | VVVVxx  |   1
    1 | VVVVxx  |   3
    1 | VVVVxx  |   5
    1 | VVVVxx  |   7
    1 | VVVVxx  |   9
 (40 rows)

 --
 -- awk '{print $2;}' person.data |
 -- awk '{if(NF!=1){print $2;}else{print;}}' - emp.data |
 -- awk '{if(NF!=1){print $2;}else{print;}}' - student.data |
 -- awk 'BEGIN{FS="      ";}{if(NF!=1){print $5;}else{print;}}' - stud_emp.data |
 -- sort -n -r | uniq
 --
 SELECT DISTINCT p.age FROM person* p ORDER BY age using >;
  age
 -----
   98
   88
   78
   68
   60
   58
   50
   48
   40
   38
   34
   30
   28
   25
   24
   23
   20
   19
   18
    8
 (20 rows)

 --
 -- Check mentioning same column more than once
 --
 EXPLAIN (VERBOSE, COSTS OFF)
 SELECT count(*) FROM
   (SELECT DISTINCT two, four, two FROM tenk1) ss;
                                    QUERY PLAN
 --------------------------------------------------------------------------------
  Finalize Aggregate
    Output: count(*)
    ->  Gather Motion 3:1  (slice1; segments: 3)
          Output: (PARTIAL count(*))
          ->  Partial Aggregate
                Output: PARTIAL count(*)
                ->  HashAggregate
                      Output: tenk1.two, tenk1.four, tenk1.two
                      Group Key: tenk1.two, tenk1.four, tenk1.two
                      ->  Redistribute Motion 3:3  (slice2; segments: 3)
                            Output: tenk1.two, tenk1.four, tenk1.two
                            Hash Key: tenk1.two, tenk1.four, tenk1.two
                            ->  HashAggregate
                                  Output: tenk1.two, tenk1.four, tenk1.two
                                  Group Key: tenk1.two, tenk1.four, tenk1.two
                                  ->  Seq Scan on public.tenk1
                                        Output: tenk1.two, tenk1.four, tenk1.two
  Optimizer: Postgres query optimizer
 (18 rows)

 SELECT count(*) FROM
   (SELECT DISTINCT two, four, two FROM tenk1) ss;
  count
 -------
      4
 (1 row)

 --
 -- Compare results between plans using sorting and plans using hash
 -- aggregation. Force spilling in both cases by setting work_mem low.
 --
 SET work_mem='64kB';
 -- Produce results with sorting.
 SET enable_hashagg=FALSE;
 SET optimizer_enable_hashagg=FALSE;
 SET jit_above_cost=0;
 EXPLAIN (costs off)
 SELECT DISTINCT g%1000 FROM generate_series(0,9999) g;
                    QUERY PLAN
 ------------------------------------------------
  Unique
    Group Key: ((g % 1000))
    ->  Sort
          Sort Key: ((g % 1000))
          ->  Function Scan on generate_series g
 (4 rows)

 CREATE TABLE distinct_group_1 AS
 SELECT DISTINCT g%1000 FROM generate_series(0,9999) g;
 SET jit_above_cost TO DEFAULT;
 CREATE TABLE distinct_group_2 AS
 SELECT DISTINCT (g%1000)::text FROM generate_series(0,9999) g;
 SET enable_seqscan = 0;
 -- Check to see we get an incremental sort plan
 EXPLAIN (costs off)
 SELECT DISTINCT hundred, two FROM tenk1;
                                QUERY PLAN
 ------------------------------------------------------------------------
  GroupAggregate
    Group Key: hundred, two
    ->  Gather Motion 3:1  (slice1; segments: 3)
          Merge Key: hundred, two
          ->  Sort
                Sort Key: hundred, two
                ->  GroupAggregate
                      Group Key: hundred, two
                      ->  Sort
                            Sort Key: hundred, two
                            ->  Bitmap Heap Scan on tenk1
                                  ->  Bitmap Index Scan on tenk1_hundred
  Optimizer: Postgres query optimizer
 (13 rows)

 RESET enable_seqscan;
 SET enable_hashagg=TRUE;
 SET optimizer_enable_hashagg=TRUE;
 -- Produce results with hash aggregation.
 SET enable_sort=FALSE;
 SET jit_above_cost=0;
 EXPLAIN (costs off)
 SELECT DISTINCT g%1000 FROM generate_series(0,9999) g;
                 QUERY PLAN
 ------------------------------------------
  HashAggregate
    Group Key: (g % 1000)
    ->  Function Scan on generate_series g
 (3 rows)

 CREATE TABLE distinct_hash_1 AS
 SELECT DISTINCT g%1000 FROM generate_series(0,9999) g;
 SET jit_above_cost TO DEFAULT;
 CREATE TABLE distinct_hash_2 AS
 SELECT DISTINCT (g%1000)::text FROM generate_series(0,9999) g;
 SET enable_sort=TRUE;
 SET work_mem TO DEFAULT;
 -- Compare results
 (SELECT * FROM distinct_hash_1 EXCEPT SELECT * FROM distinct_group_1)
   UNION ALL
 (SELECT * FROM distinct_group_1 EXCEPT SELECT * FROM distinct_hash_1);
  ?column?
 ----------
 (0 rows)

 (SELECT * FROM distinct_hash_1 EXCEPT SELECT * FROM distinct_group_1)
   UNION ALL
 (SELECT * FROM distinct_group_1 EXCEPT SELECT * FROM distinct_hash_1);
  ?column?
 ----------
 (0 rows)

 DROP TABLE distinct_hash_1;
 DROP TABLE distinct_hash_2;
 DROP TABLE distinct_group_1;
 DROP TABLE distinct_group_2;
 -- Test parallel DISTINCT
 SET parallel_tuple_cost=0;
 SET parallel_setup_cost=0;
 SET min_parallel_table_scan_size=0;
 SET max_parallel_workers_per_gather=2;
 -- Ensure we get a parallel plan
 EXPLAIN (costs off)
 SELECT DISTINCT four FROM tenk1;
                          QUERY PLAN
 ------------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)
    ->  HashAggregate
          Group Key: four
          ->  Redistribute Motion 3:3  (slice2; segments: 3)
                Hash Key: four
                ->  HashAggregate
                      Group Key: four
                      ->  Seq Scan on tenk1
  Optimizer: Postgres query optimizer
 (9 rows)

 -- Ensure the parallel plan produces the correct results
 SELECT DISTINCT four FROM tenk1;
  four
 ------
     0
     1
     2
     3
 (4 rows)

 CREATE OR REPLACE FUNCTION distinct_func(a INT) RETURNS INT AS $$
   BEGIN
     RETURN a;
   END;
 $$ LANGUAGE plpgsql PARALLEL UNSAFE;
 -- Ensure we don't do parallel distinct with a parallel unsafe function
 EXPLAIN (COSTS OFF)
 SELECT DISTINCT distinct_func(1) FROM tenk1;
                             QUERY PLAN
 ------------------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)
    Merge Key: (distinct_func(1))
    ->  Unique
          Group Key: (distinct_func(1))
          ->  Sort
                Sort Key: (distinct_func(1))
                ->  Redistribute Motion 3:3  (slice2; segments: 3)
                      Hash Key: (distinct_func(1))
                      ->  Seq Scan on tenk1
  Optimizer: Postgres query optimizer
 (10 rows)

 -- make the function parallel safe
 CREATE OR REPLACE FUNCTION distinct_func(a INT) RETURNS INT AS $$
   BEGIN
     RETURN a;
   END;
 $$ LANGUAGE plpgsql PARALLEL SAFE;
 -- Ensure we do parallel distinct now that the function is parallel safe
 EXPLAIN (COSTS OFF)
 SELECT DISTINCT distinct_func(1) FROM tenk1;
                             QUERY PLAN
 ------------------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)
    Merge Key: (distinct_func(1))
    ->  Unique
          Group Key: (distinct_func(1))
          ->  Sort
                Sort Key: (distinct_func(1))
                ->  Redistribute Motion 3:3  (slice2; segments: 3)
                      Hash Key: (distinct_func(1))
                      ->  Seq Scan on tenk1
  Optimizer: Postgres query optimizer
 (10 rows)

 RESET max_parallel_workers_per_gather;
 RESET min_parallel_table_scan_size;
 RESET parallel_setup_cost;
 RESET parallel_tuple_cost;
 --
 -- Test the planner's ability to use a LIMIT 1 instead of a Unique node when
 -- all of the distinct_pathkeys have been marked as redundant
 --
 -- Ensure we get a plan with a Limit 1
 EXPLAIN (COSTS OFF)
 SELECT DISTINCT four FROM tenk1 WHERE four = 0;
                    QUERY PLAN
 ------------------------------------------------
  GroupAggregate
    Group Key: four
    ->  Gather Motion 3:1  (slice1; segments: 3)
          ->  GroupAggregate
                Group Key: four
                ->  Seq Scan on tenk1
                      Filter: (four = 0)
  Optimizer: Postgres query optimizer
 (8 rows)

 -- Ensure the above gives us the correct result
 SELECT DISTINCT four FROM tenk1 WHERE four = 0;
  four
 ------
     0
 (1 row)

 -- Ensure we get a plan with a Limit 1
 EXPLAIN (COSTS OFF)
 SELECT DISTINCT four FROM tenk1 WHERE four = 0 AND two <> 0;
                        QUERY PLAN
 ---------------------------------------------------------
  GroupAggregate
    Group Key: four
    ->  Gather Motion 3:1  (slice1; segments: 3)
          ->  GroupAggregate
                Group Key: four
                ->  Seq Scan on tenk1
                      Filter: ((two <> 0) AND (four = 0))
  Optimizer: Postgres query optimizer
 (8 rows)

 -- Ensure no rows are returned
 SELECT DISTINCT four FROM tenk1 WHERE four = 0 AND two <> 0;
  four
 ------
 (0 rows)

 -- Ensure we get a plan with a Limit 1 when the SELECT list contains constants
 EXPLAIN (COSTS OFF)
 SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0;
                    QUERY PLAN
 ------------------------------------------------
  GroupAggregate
    Group Key: four, 1, 2, 3
    ->  Gather Motion 3:1  (slice1; segments: 3)
          ->  GroupAggregate
                Group Key: four, 1, 2, 3
                ->  Seq Scan on tenk1
                      Filter: (four = 0)
  Optimizer: Postgres query optimizer
 (8 rows)

 -- Ensure we only get 1 row
 SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0;
  four | ?column? | ?column? | ?column?
 ------+----------+----------+----------
     0 |        1 |        2 |        3
 (1 row)

 --
 -- Also, some tests of IS DISTINCT FROM, which doesn't quite deserve its
 -- very own regression file.
 --
 CREATE TEMP TABLE disttable (f1 integer);
 INSERT INTO DISTTABLE VALUES(1);
 INSERT INTO DISTTABLE VALUES(2);
 INSERT INTO DISTTABLE VALUES(3);
 INSERT INTO DISTTABLE VALUES(NULL);
 -- basic cases
 SELECT f1, f1 IS DISTINCT FROM 2 as "not 2" FROM disttable;
  f1 | not 2
 ----+-------
   1 | t
   2 | f
   3 | t
     | t
 (4 rows)

 SELECT f1, f1 IS DISTINCT FROM NULL as "not null" FROM disttable;
  f1 | not null
 ----+----------
   1 | t
   2 | t
   3 | t
     | f
 (4 rows)

 SELECT f1, f1 IS DISTINCT FROM f1 as "false" FROM disttable;
  f1 | false
 ----+-------
   1 | f
   2 | f
   3 | f
     | f
 (4 rows)

 SELECT f1, f1 IS DISTINCT FROM f1+1 as "not null" FROM disttable;
  f1 | not null
 ----+----------
   1 | t
   2 | t
   3 | t
     | f
 (4 rows)

 -- check that optimizer constant-folds it properly
 SELECT 1 IS DISTINCT FROM 2 as "yes";
  yes
 -----
  t
 (1 row)

 SELECT 2 IS DISTINCT FROM 2 as "no";
  no
 ----
  f
 (1 row)

 SELECT 2 IS DISTINCT FROM null as "yes";
  yes
 -----
  t
 (1 row)

 SELECT null IS DISTINCT FROM null as "no";
  no
 ----
  f
 (1 row)

 -- negated form
 SELECT 1 IS NOT DISTINCT FROM 2 as "no";
  no
 ----
  f
 (1 row)

 SELECT 2 IS NOT DISTINCT FROM 2 as "yes";
  yes
 -----
  t
 (1 row)

 SELECT 2 IS NOT DISTINCT FROM null as "no";
  no
 ----
  f
 (1 row)

 SELECT null IS NOT DISTINCT FROM null as "yes";
  yes
 -----
  t
 (1 row)

 -- join cases
 -- test IS DISTINCT FROM and IS NOT DISTINCT FROM join qual.The postgres planner doesn't support hash join on
 -- IS NOT DISTINCT FROM for now, ORCA supports Hash Join on "IS NOT DISTINCT FROM".
 CREATE TABLE distinct_1(a int);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 CREATE TABLE distinct_2(a int);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 INSERT INTO distinct_1 VALUES(1),(2),(NULL);
 INSERT INTO distinct_2 VALUES(1),(NULL);
 EXPLAIN SELECT * FROM distinct_1, distinct_2 WHERE distinct_1.a IS DISTINCT FROM distinct_2.a;
                                                QUERY PLAN
 ---------------------------------------------------------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)  (cost=10000000000.00..10214778180.80 rows=9264416310 width=8)
    ->  Nested Loop  (cost=10000000000.00..10091252630.00 rows=3088138770 width=8)
          Join Filter: (distinct_1.a IS DISTINCT FROM distinct_2.a)
          ->  Seq Scan on distinct_1  (cost=0.00..355.00 rows=32100 width=4)
          ->  Materialize  (cost=0.00..2120.50 rows=96300 width=4)
                ->  Broadcast Motion 3:3  (slice2; segments: 3)  (cost=0.00..1639.00 rows=96300 width=4)
                      ->  Seq Scan on distinct_2  (cost=0.00..355.00 rows=32100 width=4)
  Optimizer: Postgres query optimizer
 (8 rows)

 EXPLAIN SELECT * FROM distinct_1 left join distinct_2 on distinct_1.a IS DISTINCT FROM distinct_2.a;
                                                QUERY PLAN
 ---------------------------------------------------------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)  (cost=10000000000.00..10214778180.80 rows=9264416310 width=8)
    ->  Nested Loop Left Join  (cost=10000000000.00..10091252630.00 rows=3088138770 width=8)
          Join Filter: (distinct_1.a IS DISTINCT FROM distinct_2.a)
          ->  Seq Scan on distinct_1  (cost=0.00..355.00 rows=32100 width=4)
          ->  Materialize  (cost=0.00..2120.50 rows=96300 width=4)
                ->  Broadcast Motion 3:3  (slice2; segments: 3)  (cost=0.00..1639.00 rows=96300 width=4)
                      ->  Seq Scan on distinct_2  (cost=0.00..355.00 rows=32100 width=4)
  Optimizer: Postgres query optimizer
 (8 rows)

 EXPLAIN SELECT * FROM distinct_1 right join distinct_2 on distinct_1.a IS DISTINCT FROM distinct_2.a;
                                                QUERY PLAN
 ---------------------------------------------------------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)  (cost=10000000000.00..10214778180.80 rows=9264416310 width=8)
    ->  Nested Loop Left Join  (cost=10000000000.00..10091252630.00 rows=3088138770 width=8)
          Join Filter: (distinct_1.a IS DISTINCT FROM distinct_2.a)
          ->  Seq Scan on distinct_2  (cost=0.00..355.00 rows=32100 width=4)
          ->  Materialize  (cost=0.00..2120.50 rows=96300 width=4)
                ->  Broadcast Motion 3:3  (slice2; segments: 3)  (cost=0.00..1639.00 rows=96300 width=4)
                      ->  Seq Scan on distinct_1  (cost=0.00..355.00 rows=32100 width=4)
  Optimizer: Postgres query optimizer
 (8 rows)

 EXPLAIN SELECT * FROM distinct_1, distinct_2 WHERE distinct_1.a IS NOT DISTINCT FROM distinct_2.a;
                                                QUERY PLAN
 --------------------------------------------------------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)  (cost=10000000000.00..10091376279.20 rows=9273690 width=8)
    ->  Nested Loop  (cost=10000000000.00..10091252630.00 rows=3091230 width=8)
          Join Filter: (NOT (distinct_1.a IS DISTINCT FROM distinct_2.a))
          ->  Seq Scan on distinct_1  (cost=0.00..355.00 rows=32100 width=4)
          ->  Materialize  (cost=0.00..2120.50 rows=96300 width=4)
                ->  Broadcast Motion 3:3  (slice2; segments: 3)  (cost=0.00..1639.00 rows=96300 width=4)
                      ->  Seq Scan on distinct_2  (cost=0.00..355.00 rows=32100 width=4)
  Optimizer: Postgres query optimizer
 (8 rows)

 EXPLAIN SELECT * FROM distinct_1 left join distinct_2 on distinct_1.a IS NOT DISTINCT FROM distinct_2.a;
                                                QUERY PLAN
 --------------------------------------------------------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)  (cost=10000000000.00..10091376279.20 rows=9273690 width=8)
    ->  Nested Loop Left Join  (cost=10000000000.00..10091252630.00 rows=3091230 width=8)
          Join Filter: (NOT (distinct_1.a IS DISTINCT FROM distinct_2.a))
          ->  Seq Scan on distinct_1  (cost=0.00..355.00 rows=32100 width=4)
          ->  Materialize  (cost=0.00..2120.50 rows=96300 width=4)
                ->  Broadcast Motion 3:3  (slice2; segments: 3)  (cost=0.00..1639.00 rows=96300 width=4)
                      ->  Seq Scan on distinct_2  (cost=0.00..355.00 rows=32100 width=4)
  Optimizer: Postgres query optimizer
 (8 rows)

 EXPLAIN SELECT * FROM distinct_1 right join  distinct_2 on distinct_1.a IS NOT DISTINCT FROM distinct_2.a;
                                                QUERY PLAN
 --------------------------------------------------------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)  (cost=10000000000.00..10091376279.20 rows=9273690 width=8)
    ->  Nested Loop Left Join  (cost=10000000000.00..10091252630.00 rows=3091230 width=8)
          Join Filter: (NOT (distinct_1.a IS DISTINCT FROM distinct_2.a))
          ->  Seq Scan on distinct_2  (cost=0.00..355.00 rows=32100 width=4)
          ->  Materialize  (cost=0.00..2120.50 rows=96300 width=4)
                ->  Broadcast Motion 3:3  (slice2; segments: 3)  (cost=0.00..1639.00 rows=96300 width=4)
                      ->  Seq Scan on distinct_1  (cost=0.00..355.00 rows=32100 width=4)
  Optimizer: Postgres query optimizer
 (8 rows)

 SELECT * FROM distinct_1, distinct_2 WHERE distinct_1.a IS DISTINCT FROM distinct_2.a;
  a | a
 ---+---
  2 |
  2 | 1
    | 1
  1 |
 (4 rows)

 SELECT * FROM distinct_1 left join  distinct_2 on distinct_1.a IS DISTINCT FROM distinct_2.a;
  a | a
 ---+---
  2 |
  2 | 1
    | 1
  1 |
 (4 rows)

 SELECT * FROM distinct_1 right join  distinct_2 on distinct_1.a IS DISTINCT FROM distinct_2.a;
  a | a
 ---+---
  1 |
  2 |
  2 | 1
    | 1
 (4 rows)

 SELECT * FROM distinct_1, distinct_2 WHERE distinct_1.a IS NOT DISTINCT FROM distinct_2.a;
  a | a
 ---+---
  1 | 1
    |
 (2 rows)

 SELECT * FROM distinct_1 left join distinct_2 on distinct_1.a IS NOT DISTINCT FROM distinct_2.a;
  a | a
 ---+---
  1 | 1
  2 |
    |
 (3 rows)

 SELECT * FROM distinct_1 right join distinct_2 on distinct_1.a IS NOT DISTINCT FROM distinct_2.a;
  a | a
 ---+---
    |
  1 | 1
 (2 rows)

 DROP TABLE distinct_1;
 DROP TABLE distinct_2;
 -- gpdb start: test inherit/partition table distinct when gp_statistics_pullup_from_child_partition is on
 set gp_statistics_pullup_from_child_partition to on;
 CREATE TABLE sales (id int, date date, amt decimal(10,2))
 DISTRIBUTED BY (id);
 insert into sales values (1,'20210202',20), (2,'20210602',9) ,(3,'20211002',100);
 select distinct * from sales order by 1;
  id |    date    |  amt
 ----+------------+--------
   1 | 02-02-2021 |  20.00
   2 | 06-02-2021 |   9.00
   3 | 10-02-2021 | 100.00
 (3 rows)

 select distinct sales from sales order by 1;
          sales
 -----------------------
  (1,02-02-2021,20.00)
  (2,06-02-2021,9.00)
  (3,10-02-2021,100.00)
 (3 rows)

 CREATE TABLE sales_partition (id int, date date, amt decimal(10,2))
 DISTRIBUTED BY (id)
 PARTITION BY RANGE (date)
 ( START (date '2021-01-01') INCLUSIVE
   END (date '2022-01-01') EXCLUSIVE
   EVERY (INTERVAL '1 month') );
 insert into sales_partition values (1,'20210202',20), (2,'20210602',9) ,(3,'20211002',100);
 select distinct * from sales_partition order by 1;
  id |    date    |  amt
 ----+------------+--------
   1 | 02-02-2021 |  20.00
   2 | 06-02-2021 |   9.00
   3 | 10-02-2021 | 100.00
 (3 rows)

 select distinct sales_partition from sales_partition order by 1;
     sales_partition
 -----------------------
  (1,02-02-2021,20.00)
  (2,06-02-2021,9.00)
  (3,10-02-2021,100.00)
 (3 rows)

 DROP TABLE sales;
 DROP TABLE sales_partition;
 CREATE TABLE cities (
     name            text,
     population      float,
     altitude        int
 );
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'name' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 CREATE TABLE capitals (
     state           char(2)
 ) INHERITS (cities);
 NOTICE:  table has parent, setting distribution columns to match parent table
 select distinct * from cities;
  name | population | altitude
 ------+------------+----------
 (0 rows)

 select distinct cities from cities;
  cities
 --------
 (0 rows)

 DROP TABLE capitals;
 DROP TABLE cities;
 set gp_statistics_pullup_from_child_partition to off;
 -- gpdb end: test inherit/partition table distinct when gp_statistics_pullup_from_child_partition is on
 create table t_distinct_sort(a int, b int, c int);
 insert into t_distinct_sort select i, i+1, i+2 from generate_series(1, 10)i;
 insert into t_distinct_sort select i, i+1, i+2  from generate_series(1, 10)i;
 insert into t_distinct_sort select i, i+1, i+2  from generate_series(1, 10)i;
 analyze t_distinct_sort;
 explain(verbose, costs off)
 select distinct count(a), sum(b) from t_distinct_sort order by sum(b), count(a);
                                                         QUERY PLAN
 ---------------------------------------------------------------------------------------------------------------------------
  Finalize Aggregate
    Output: count(a), sum(b)
    ->  Gather Motion 3:1  (slice1; segments: 3)
          Output: (PARTIAL count(a)), (PARTIAL sum(b))
          ->  Partial Aggregate
                Output: PARTIAL count(a), PARTIAL sum(b)
                ->  Seq Scan on public.t_distinct_sort
                      Output: a, b, c
  Settings: enable_hashagg = 'on', enable_sort = 'on', gp_statistics_pullup_from_child_partition = 'off', optimizer = 'off'
  Optimizer: Postgres query optimizer
 (10 rows)

 select distinct count(a), sum(b) from t_distinct_sort order by sum(b), count(a);
  count | sum
 -------+-----
     30 | 195
 (1 row)

 explain(verbose, costs off)
 select distinct on(count(b), count(c)) count(a), sum(b) from t_distinct_sort order by count(c);
                                                         QUERY PLAN
 ---------------------------------------------------------------------------------------------------------------------------
  Finalize Aggregate
    Output: count(a), sum(b)
    ->  Gather Motion 3:1  (slice1; segments: 3)
          Output: (PARTIAL count(a)), (PARTIAL sum(b))
          ->  Partial Aggregate
                Output: PARTIAL count(a), PARTIAL sum(b)
                ->  Seq Scan on public.t_distinct_sort
                      Output: a, b, c
  Settings: enable_hashagg = 'on', enable_sort = 'on', gp_statistics_pullup_from_child_partition = 'off', optimizer = 'off'
  Optimizer: Postgres query optimizer
 (10 rows)

 select distinct on(count(b), count(c)) count(a), sum(b) from t_distinct_sort order by count(c);
  count | sum
 -------+-----
     30 | 195
 (1 row)

 explain(verbose, costs off)
 select count(a), sum(b) from t_distinct_sort order by sum(a), count(c);
                                                         QUERY PLAN
 ---------------------------------------------------------------------------------------------------------------------------
  Finalize Aggregate
    Output: count(a), sum(b)
    ->  Gather Motion 3:1  (slice1; segments: 3)
          Output: (PARTIAL count(a)), (PARTIAL sum(b))
          ->  Partial Aggregate
                Output: PARTIAL count(a), PARTIAL sum(b)
                ->  Seq Scan on public.t_distinct_sort
                      Output: a, b, c
  Settings: enable_hashagg = 'on', enable_sort = 'on', gp_statistics_pullup_from_child_partition = 'off', optimizer = 'off'
  Optimizer: Postgres query optimizer
 (10 rows)

 select count(a), sum(b) from t_distinct_sort order by sum(a), count(c);
  count | sum
 -------+-----
     30 | 195
 (1 row)

 explain(verbose, costs off)
 select distinct count(a), sum(b) from t_distinct_sort ;
                                                         QUERY PLAN
 ---------------------------------------------------------------------------------------------------------------------------
  Finalize Aggregate
    Output: count(a), sum(b)
    ->  Gather Motion 3:1  (slice1; segments: 3)
          Output: (PARTIAL count(a)), (PARTIAL sum(b))
          ->  Partial Aggregate
                Output: PARTIAL count(a), PARTIAL sum(b)
                ->  Seq Scan on public.t_distinct_sort
                      Output: a, b, c
  Settings: enable_hashagg = 'on', enable_sort = 'on', gp_statistics_pullup_from_child_partition = 'off', optimizer = 'off'
  Optimizer: Postgres query optimizer
 (10 rows)

 select distinct count(a), sum(b) from t_distinct_sort ;
  count | sum
 -------+-----
     30 | 195
 (1 row)

 -- should keep distinct clause
 explain(verbose, costs off)
 select distinct on(count(random())) count(a), sum(b) from t_distinct_sort;
                                                         QUERY PLAN
 ---------------------------------------------------------------------------------------------------------------------------
  Unique
    Output: (count(a)), (sum(b)), (count(random()))
    Group Key: (count(random()))
    ->  Sort
          Output: (count(a)), (sum(b)), (count(random()))
          Sort Key: (count(random()))
          ->  Finalize Aggregate
                Output: count(a), sum(b), count(random())
                ->  Gather Motion 3:1  (slice1; segments: 3)
                      Output: (PARTIAL count(a)), (PARTIAL sum(b)), (PARTIAL count(random()))
                      ->  Partial Aggregate
                            Output: PARTIAL count(a), PARTIAL sum(b), PARTIAL count(random())
                            ->  Seq Scan on public.t_distinct_sort
                                  Output: a, b, c
  Settings: enable_hashagg = 'on', enable_sort = 'on', gp_statistics_pullup_from_child_partition = 'off', optimizer = 'off'
  Optimizer: Postgres query optimizer
 (16 rows)

 select distinct on(count(random())) count(a), sum(b) from t_distinct_sort;
  count | sum
 -------+-----
     30 | 195
 (1 row)

 explain(verbose, costs off)
 select distinct(count(a)) from t_distinct_sort, (select distinct(count(*)), generate_series(
 0, 2) from t_distinct_sort)as xx;
                                                                    QUERY PLAN
 -------------------------------------------------------------------------------------------------------------------------------------------------
  Finalize Aggregate
    Output: count(t_distinct_sort.a)
    ->  Gather Motion 3:1  (slice1; segments: 3)
          Output: (PARTIAL count(t_distinct_sort.a))
          ->  Partial Aggregate
                Output: PARTIAL count(t_distinct_sort.a)
                ->  Nested Loop
                      Output: t_distinct_sort.a
                      ->  Broadcast Motion 1:3  (slice2; segments: 1)
                            ->  Subquery Scan on xx
                                  ->  Unique
                                        Output: (count(*)), (generate_series(0, 2))
                                        Group Key: (count(*)), (generate_series(0, 2))
                                        ->  Sort
                                              Output: (count(*)), (generate_series(0, 2))
                                              Sort Key: (count(*)), (generate_series(0, 2))
                                              ->  ProjectSet
                                                    Output: (count(*)), generate_series(0, 2)
                                                    ->  Finalize Aggregate
                                                          Output: count(*)
                                                          ->  Gather Motion 3:1  (slice3; segments: 3)
                                                                Output: (PARTIAL count(*))
                                                                ->  Partial Aggregate
                                                                      Output: PARTIAL count(*)
                                                                      ->  Seq Scan on public.t_distinct_sort t_distinct_sort_1
                                                                            Output: t_distinct_sort_1.a, t_distinct_sort_1.b, t_distinct_sort_1.c
                      ->  Materialize
                            Output: t_distinct_sort.a
                            ->  Seq Scan on public.t_distinct_sort
                                  Output: t_distinct_sort.a
  Settings: enable_hashagg = 'on', enable_sort = 'on', gp_statistics_pullup_from_child_partition = 'off', optimizer = 'off'
  Optimizer: Postgres query optimizer
 (32 rows)

 select distinct(count(a)) from t_distinct_sort, (select distinct(count(*)), generate_series(
 0, 2) from t_distinct_sort)as xx;
  count
 -------
     90
 (1 row)

 drop table t_distinct_sort;
 explain(verbose, costs off)
 select distinct(count(a)) from generate_series(0, 1) as a;
                                                         QUERY PLAN
 ---------------------------------------------------------------------------------------------------------------------------
  Aggregate
    Output: count(a)
    ->  Function Scan on pg_catalog.generate_series a
          Output: a
          Function Call: generate_series(0, 1)
  Settings: enable_hashagg = 'on', enable_sort = 'on', gp_statistics_pullup_from_child_partition = 'off', optimizer = 'off'
  Optimizer: Postgres query optimizer
 (7 rows)

 select distinct(count(a)) from generate_series(0, 1) as a;
  count
 -------
      2
 (1 row)

 explain(verbose, costs off)
 select distinct(count(*)) from generate_series(0, 1) a join generate_series(0, 2) b on true;
                                                         QUERY PLAN
 ---------------------------------------------------------------------------------------------------------------------------
  Aggregate
    Output: count(*)
    ->  Nested Loop
          ->  Function Scan on pg_catalog.generate_series a
                Output: a.a
                Function Call: generate_series(0, 1)
          ->  Function Scan on pg_catalog.generate_series b
                Output: b.b
                Function Call: generate_series(0, 2)
  Settings: enable_hashagg = 'on', enable_sort = 'on', gp_statistics_pullup_from_child_partition = 'off', optimizer = 'off'
  Optimizer: Postgres query optimizer
 (11 rows)

 select distinct(count(*)) from generate_series(0, 1) a join generate_series(0, 2) b on true;
  count
 -------
      6
 (1 row)

 -- please refer to https://github.com/greenplum-db/gpdb/issues/15033
 CREATE TABLE t1_issue_15033(c DECIMAL CHECK (0.4 IS DISTINCT FROM 0.3));
 CREATE TABLE t2_issue_15033(c DECIMAL CHECK (0.4 IS NOT DISTINCT FROM 0.3));
 INSERT INTO t1_issue_15033 VALUES(10);
 SELECT * FROM t1_issue_15033;
  c
 ----
  10
 (1 row)

 INSERT INTO t2_issue_15033 VALUES(10);
 DETAIL:  Failing row contains (10).
 ERROR:  new row for relation "t2_issue_15033" violates check constraint "t2_issue_15033_check"
 SELECT * FROM t2_issue_15033;
  c
 ---
 (0 rows)