| -- |
| -- SELECT_DISTINCT |
| -- |
| -- |
| -- awk '{print $3;}' onek.data | sort -n | uniq |
| -- |
| SELECT DISTINCT two FROM onek ORDER BY 1; |
| two |
| ----- |
| 0 |
| 1 |
| (2 rows) |
| |
| -- |
| -- awk '{print $5;}' onek.data | sort -n | uniq |
| -- |
| SELECT DISTINCT ten FROM onek ORDER BY 1; |
| ten |
| ----- |
| 0 |
| 1 |
| 2 |
| 3 |
| 4 |
| 5 |
| 6 |
| 7 |
| 8 |
| 9 |
| (10 rows) |
| |
| -- |
| -- awk '{print $16;}' onek.data | sort -d | uniq |
| -- |
| SELECT DISTINCT string4 FROM onek ORDER BY 1; |
| string4 |
| --------- |
| AAAAxx |
| HHHHxx |
| OOOOxx |
| VVVVxx |
| (4 rows) |
| |
| -- |
| -- awk '{print $3,$16,$5;}' onek.data | sort -d | uniq | |
| -- sort +0n -1 +1d -2 +2n -3 |
| -- |
| SELECT DISTINCT two, string4, ten |
| FROM onek |
| ORDER BY two using <, string4 using <, ten using <; |
| two | string4 | ten |
| -----+---------+----- |
| 0 | AAAAxx | 0 |
| 0 | AAAAxx | 2 |
| 0 | AAAAxx | 4 |
| 0 | AAAAxx | 6 |
| 0 | AAAAxx | 8 |
| 0 | HHHHxx | 0 |
| 0 | HHHHxx | 2 |
| 0 | HHHHxx | 4 |
| 0 | HHHHxx | 6 |
| 0 | HHHHxx | 8 |
| 0 | OOOOxx | 0 |
| 0 | OOOOxx | 2 |
| 0 | OOOOxx | 4 |
| 0 | OOOOxx | 6 |
| 0 | OOOOxx | 8 |
| 0 | VVVVxx | 0 |
| 0 | VVVVxx | 2 |
| 0 | VVVVxx | 4 |
| 0 | VVVVxx | 6 |
| 0 | VVVVxx | 8 |
| 1 | AAAAxx | 1 |
| 1 | AAAAxx | 3 |
| 1 | AAAAxx | 5 |
| 1 | AAAAxx | 7 |
| 1 | AAAAxx | 9 |
| 1 | HHHHxx | 1 |
| 1 | HHHHxx | 3 |
| 1 | HHHHxx | 5 |
| 1 | HHHHxx | 7 |
| 1 | HHHHxx | 9 |
| 1 | OOOOxx | 1 |
| 1 | OOOOxx | 3 |
| 1 | OOOOxx | 5 |
| 1 | OOOOxx | 7 |
| 1 | OOOOxx | 9 |
| 1 | VVVVxx | 1 |
| 1 | VVVVxx | 3 |
| 1 | VVVVxx | 5 |
| 1 | VVVVxx | 7 |
| 1 | VVVVxx | 9 |
| (40 rows) |
| |
| -- |
| -- awk '{print $2;}' person.data | |
| -- awk '{if(NF!=1){print $2;}else{print;}}' - emp.data | |
| -- awk '{if(NF!=1){print $2;}else{print;}}' - student.data | |
| -- awk 'BEGIN{FS=" ";}{if(NF!=1){print $5;}else{print;}}' - stud_emp.data | |
| -- sort -n -r | uniq |
| -- |
| SELECT DISTINCT p.age FROM person* p ORDER BY age using >; |
| age |
| ----- |
| 98 |
| 88 |
| 78 |
| 68 |
| 60 |
| 58 |
| 50 |
| 48 |
| 40 |
| 38 |
| 34 |
| 30 |
| 28 |
| 25 |
| 24 |
| 23 |
| 20 |
| 19 |
| 18 |
| 8 |
| (20 rows) |
| |
| -- |
| -- Check mentioning same column more than once |
| -- |
| EXPLAIN (VERBOSE, COSTS OFF) |
| SELECT count(*) FROM |
| (SELECT DISTINCT two, four, two FROM tenk1) ss; |
| QUERY PLAN |
| -------------------------------------------------------------------------------- |
| Finalize Aggregate |
| Output: count(*) |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| Output: (PARTIAL count(*)) |
| -> Partial Aggregate |
| Output: PARTIAL count(*) |
| -> HashAggregate |
| Output: tenk1.two, tenk1.four, tenk1.two |
| Group Key: tenk1.two, tenk1.four, tenk1.two |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Output: tenk1.two, tenk1.four, tenk1.two |
| Hash Key: tenk1.two, tenk1.four, tenk1.two |
| -> HashAggregate |
| Output: tenk1.two, tenk1.four, tenk1.two |
| Group Key: tenk1.two, tenk1.four, tenk1.two |
| -> Seq Scan on public.tenk1 |
| Output: tenk1.two, tenk1.four, tenk1.two |
| Optimizer: Postgres query optimizer |
| (18 rows) |
| |
| SELECT count(*) FROM |
| (SELECT DISTINCT two, four, two FROM tenk1) ss; |
| count |
| ------- |
| 4 |
| (1 row) |
| |
| -- |
| -- Compare results between plans using sorting and plans using hash |
| -- aggregation. Force spilling in both cases by setting work_mem low. |
| -- |
| SET work_mem='64kB'; |
| -- Produce results with sorting. |
| SET enable_hashagg=FALSE; |
| SET optimizer_enable_hashagg=FALSE; |
| SET jit_above_cost=0; |
| EXPLAIN (costs off) |
| SELECT DISTINCT g%1000 FROM generate_series(0,9999) g; |
| QUERY PLAN |
| ------------------------------------------------ |
| Unique |
| Group Key: ((g % 1000)) |
| -> Sort |
| Sort Key: ((g % 1000)) |
| -> Function Scan on generate_series g |
| (4 rows) |
| |
| CREATE TABLE distinct_group_1 AS |
| SELECT DISTINCT g%1000 FROM generate_series(0,9999) g; |
| SET jit_above_cost TO DEFAULT; |
| CREATE TABLE distinct_group_2 AS |
| SELECT DISTINCT (g%1000)::text FROM generate_series(0,9999) g; |
| SET enable_seqscan = 0; |
| -- Check to see we get an incremental sort plan |
| EXPLAIN (costs off) |
| SELECT DISTINCT hundred, two FROM tenk1; |
| QUERY PLAN |
| ------------------------------------------------------------------------ |
| GroupAggregate |
| Group Key: hundred, two |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| Merge Key: hundred, two |
| -> Sort |
| Sort Key: hundred, two |
| -> GroupAggregate |
| Group Key: hundred, two |
| -> Sort |
| Sort Key: hundred, two |
| -> Bitmap Heap Scan on tenk1 |
| -> Bitmap Index Scan on tenk1_hundred |
| Optimizer: Postgres query optimizer |
| (13 rows) |
| |
| RESET enable_seqscan; |
| SET enable_hashagg=TRUE; |
| SET optimizer_enable_hashagg=TRUE; |
| -- Produce results with hash aggregation. |
| SET enable_sort=FALSE; |
| SET jit_above_cost=0; |
| EXPLAIN (costs off) |
| SELECT DISTINCT g%1000 FROM generate_series(0,9999) g; |
| QUERY PLAN |
| ------------------------------------------ |
| HashAggregate |
| Group Key: (g % 1000) |
| -> Function Scan on generate_series g |
| (3 rows) |
| |
| CREATE TABLE distinct_hash_1 AS |
| SELECT DISTINCT g%1000 FROM generate_series(0,9999) g; |
| SET jit_above_cost TO DEFAULT; |
| CREATE TABLE distinct_hash_2 AS |
| SELECT DISTINCT (g%1000)::text FROM generate_series(0,9999) g; |
| SET enable_sort=TRUE; |
| SET work_mem TO DEFAULT; |
| -- Compare results |
| (SELECT * FROM distinct_hash_1 EXCEPT SELECT * FROM distinct_group_1) |
| UNION ALL |
| (SELECT * FROM distinct_group_1 EXCEPT SELECT * FROM distinct_hash_1); |
| ?column? |
| ---------- |
| (0 rows) |
| |
| (SELECT * FROM distinct_hash_1 EXCEPT SELECT * FROM distinct_group_1) |
| UNION ALL |
| (SELECT * FROM distinct_group_1 EXCEPT SELECT * FROM distinct_hash_1); |
| ?column? |
| ---------- |
| (0 rows) |
| |
| DROP TABLE distinct_hash_1; |
| DROP TABLE distinct_hash_2; |
| DROP TABLE distinct_group_1; |
| DROP TABLE distinct_group_2; |
| -- Test parallel DISTINCT |
| SET parallel_tuple_cost=0; |
| SET parallel_setup_cost=0; |
| SET min_parallel_table_scan_size=0; |
| SET max_parallel_workers_per_gather=2; |
| -- Ensure we get a parallel plan |
| EXPLAIN (costs off) |
| SELECT DISTINCT four FROM tenk1; |
| QUERY PLAN |
| ------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> HashAggregate |
| Group Key: four |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: four |
| -> HashAggregate |
| Group Key: four |
| -> Seq Scan on tenk1 |
| Optimizer: Postgres query optimizer |
| (9 rows) |
| |
| -- Ensure the parallel plan produces the correct results |
| SELECT DISTINCT four FROM tenk1; |
| four |
| ------ |
| 0 |
| 1 |
| 2 |
| 3 |
| (4 rows) |
| |
| CREATE OR REPLACE FUNCTION distinct_func(a INT) RETURNS INT AS $$ |
| BEGIN |
| RETURN a; |
| END; |
| $$ LANGUAGE plpgsql PARALLEL UNSAFE; |
| -- Ensure we don't do parallel distinct with a parallel unsafe function |
| EXPLAIN (COSTS OFF) |
| SELECT DISTINCT distinct_func(1) FROM tenk1; |
| QUERY PLAN |
| ------------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| Merge Key: (distinct_func(1)) |
| -> Unique |
| Group Key: (distinct_func(1)) |
| -> Sort |
| Sort Key: (distinct_func(1)) |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: (distinct_func(1)) |
| -> Seq Scan on tenk1 |
| Optimizer: Postgres query optimizer |
| (10 rows) |
| |
| -- make the function parallel safe |
| CREATE OR REPLACE FUNCTION distinct_func(a INT) RETURNS INT AS $$ |
| BEGIN |
| RETURN a; |
| END; |
| $$ LANGUAGE plpgsql PARALLEL SAFE; |
| -- Ensure we do parallel distinct now that the function is parallel safe |
| EXPLAIN (COSTS OFF) |
| SELECT DISTINCT distinct_func(1) FROM tenk1; |
| QUERY PLAN |
| ------------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| Merge Key: (distinct_func(1)) |
| -> Unique |
| Group Key: (distinct_func(1)) |
| -> Sort |
| Sort Key: (distinct_func(1)) |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: (distinct_func(1)) |
| -> Seq Scan on tenk1 |
| Optimizer: Postgres query optimizer |
| (10 rows) |
| |
| RESET max_parallel_workers_per_gather; |
| RESET min_parallel_table_scan_size; |
| RESET parallel_setup_cost; |
| RESET parallel_tuple_cost; |
| -- |
| -- Test the planner's ability to use a LIMIT 1 instead of a Unique node when |
| -- all of the distinct_pathkeys have been marked as redundant |
| -- |
| -- Ensure we get a plan with a Limit 1 |
| EXPLAIN (COSTS OFF) |
| SELECT DISTINCT four FROM tenk1 WHERE four = 0; |
| QUERY PLAN |
| ------------------------------------------------ |
| GroupAggregate |
| Group Key: four |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> GroupAggregate |
| Group Key: four |
| -> Seq Scan on tenk1 |
| Filter: (four = 0) |
| Optimizer: Postgres query optimizer |
| (8 rows) |
| |
| -- Ensure the above gives us the correct result |
| SELECT DISTINCT four FROM tenk1 WHERE four = 0; |
| four |
| ------ |
| 0 |
| (1 row) |
| |
| -- Ensure we get a plan with a Limit 1 |
| EXPLAIN (COSTS OFF) |
| SELECT DISTINCT four FROM tenk1 WHERE four = 0 AND two <> 0; |
| QUERY PLAN |
| --------------------------------------------------------- |
| GroupAggregate |
| Group Key: four |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> GroupAggregate |
| Group Key: four |
| -> Seq Scan on tenk1 |
| Filter: ((two <> 0) AND (four = 0)) |
| Optimizer: Postgres query optimizer |
| (8 rows) |
| |
| -- Ensure no rows are returned |
| SELECT DISTINCT four FROM tenk1 WHERE four = 0 AND two <> 0; |
| four |
| ------ |
| (0 rows) |
| |
| -- Ensure we get a plan with a Limit 1 when the SELECT list contains constants |
| EXPLAIN (COSTS OFF) |
| SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0; |
| QUERY PLAN |
| ------------------------------------------------ |
| GroupAggregate |
| Group Key: four, 1, 2, 3 |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> GroupAggregate |
| Group Key: four, 1, 2, 3 |
| -> Seq Scan on tenk1 |
| Filter: (four = 0) |
| Optimizer: Postgres query optimizer |
| (8 rows) |
| |
| -- Ensure we only get 1 row |
| SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0; |
| four | ?column? | ?column? | ?column? |
| ------+----------+----------+---------- |
| 0 | 1 | 2 | 3 |
| (1 row) |
| |
| -- |
| -- Also, some tests of IS DISTINCT FROM, which doesn't quite deserve its |
| -- very own regression file. |
| -- |
| CREATE TEMP TABLE disttable (f1 integer); |
| INSERT INTO DISTTABLE VALUES(1); |
| INSERT INTO DISTTABLE VALUES(2); |
| INSERT INTO DISTTABLE VALUES(3); |
| INSERT INTO DISTTABLE VALUES(NULL); |
| -- basic cases |
| SELECT f1, f1 IS DISTINCT FROM 2 as "not 2" FROM disttable; |
| f1 | not 2 |
| ----+------- |
| 1 | t |
| 2 | f |
| 3 | t |
| | t |
| (4 rows) |
| |
| SELECT f1, f1 IS DISTINCT FROM NULL as "not null" FROM disttable; |
| f1 | not null |
| ----+---------- |
| 1 | t |
| 2 | t |
| 3 | t |
| | f |
| (4 rows) |
| |
| SELECT f1, f1 IS DISTINCT FROM f1 as "false" FROM disttable; |
| f1 | false |
| ----+------- |
| 1 | f |
| 2 | f |
| 3 | f |
| | f |
| (4 rows) |
| |
| SELECT f1, f1 IS DISTINCT FROM f1+1 as "not null" FROM disttable; |
| f1 | not null |
| ----+---------- |
| 1 | t |
| 2 | t |
| 3 | t |
| | f |
| (4 rows) |
| |
| -- check that optimizer constant-folds it properly |
| SELECT 1 IS DISTINCT FROM 2 as "yes"; |
| yes |
| ----- |
| t |
| (1 row) |
| |
| SELECT 2 IS DISTINCT FROM 2 as "no"; |
| no |
| ---- |
| f |
| (1 row) |
| |
| SELECT 2 IS DISTINCT FROM null as "yes"; |
| yes |
| ----- |
| t |
| (1 row) |
| |
| SELECT null IS DISTINCT FROM null as "no"; |
| no |
| ---- |
| f |
| (1 row) |
| |
| -- negated form |
| SELECT 1 IS NOT DISTINCT FROM 2 as "no"; |
| no |
| ---- |
| f |
| (1 row) |
| |
| SELECT 2 IS NOT DISTINCT FROM 2 as "yes"; |
| yes |
| ----- |
| t |
| (1 row) |
| |
| SELECT 2 IS NOT DISTINCT FROM null as "no"; |
| no |
| ---- |
| f |
| (1 row) |
| |
| SELECT null IS NOT DISTINCT FROM null as "yes"; |
| yes |
| ----- |
| t |
| (1 row) |
| |
| -- join cases |
| -- test IS DISTINCT FROM and IS NOT DISTINCT FROM join qual.The postgres planner doesn't support hash join on |
| -- IS NOT DISTINCT FROM for now, ORCA supports Hash Join on "IS NOT DISTINCT FROM". |
| CREATE TABLE distinct_1(a int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| CREATE TABLE distinct_2(a int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| INSERT INTO distinct_1 VALUES(1),(2),(NULL); |
| INSERT INTO distinct_2 VALUES(1),(NULL); |
| EXPLAIN SELECT * FROM distinct_1, distinct_2 WHERE distinct_1.a IS DISTINCT FROM distinct_2.a; |
| QUERY PLAN |
| --------------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) (cost=10000000000.00..10214778180.80 rows=9264416310 width=8) |
| -> Nested Loop (cost=10000000000.00..10091252630.00 rows=3088138770 width=8) |
| Join Filter: (distinct_1.a IS DISTINCT FROM distinct_2.a) |
| -> Seq Scan on distinct_1 (cost=0.00..355.00 rows=32100 width=4) |
| -> Materialize (cost=0.00..2120.50 rows=96300 width=4) |
| -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..1639.00 rows=96300 width=4) |
| -> Seq Scan on distinct_2 (cost=0.00..355.00 rows=32100 width=4) |
| Optimizer: Postgres query optimizer |
| (8 rows) |
| |
| EXPLAIN SELECT * FROM distinct_1 left join distinct_2 on distinct_1.a IS DISTINCT FROM distinct_2.a; |
| QUERY PLAN |
| --------------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) (cost=10000000000.00..10214778180.80 rows=9264416310 width=8) |
| -> Nested Loop Left Join (cost=10000000000.00..10091252630.00 rows=3088138770 width=8) |
| Join Filter: (distinct_1.a IS DISTINCT FROM distinct_2.a) |
| -> Seq Scan on distinct_1 (cost=0.00..355.00 rows=32100 width=4) |
| -> Materialize (cost=0.00..2120.50 rows=96300 width=4) |
| -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..1639.00 rows=96300 width=4) |
| -> Seq Scan on distinct_2 (cost=0.00..355.00 rows=32100 width=4) |
| Optimizer: Postgres query optimizer |
| (8 rows) |
| |
| EXPLAIN SELECT * FROM distinct_1 right join distinct_2 on distinct_1.a IS DISTINCT FROM distinct_2.a; |
| QUERY PLAN |
| --------------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) (cost=10000000000.00..10214778180.80 rows=9264416310 width=8) |
| -> Nested Loop Left Join (cost=10000000000.00..10091252630.00 rows=3088138770 width=8) |
| Join Filter: (distinct_1.a IS DISTINCT FROM distinct_2.a) |
| -> Seq Scan on distinct_2 (cost=0.00..355.00 rows=32100 width=4) |
| -> Materialize (cost=0.00..2120.50 rows=96300 width=4) |
| -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..1639.00 rows=96300 width=4) |
| -> Seq Scan on distinct_1 (cost=0.00..355.00 rows=32100 width=4) |
| Optimizer: Postgres query optimizer |
| (8 rows) |
| |
| EXPLAIN SELECT * FROM distinct_1, distinct_2 WHERE distinct_1.a IS NOT DISTINCT FROM distinct_2.a; |
| QUERY PLAN |
| -------------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) (cost=10000000000.00..10091376279.20 rows=9273690 width=8) |
| -> Nested Loop (cost=10000000000.00..10091252630.00 rows=3091230 width=8) |
| Join Filter: (NOT (distinct_1.a IS DISTINCT FROM distinct_2.a)) |
| -> Seq Scan on distinct_1 (cost=0.00..355.00 rows=32100 width=4) |
| -> Materialize (cost=0.00..2120.50 rows=96300 width=4) |
| -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..1639.00 rows=96300 width=4) |
| -> Seq Scan on distinct_2 (cost=0.00..355.00 rows=32100 width=4) |
| Optimizer: Postgres query optimizer |
| (8 rows) |
| |
| EXPLAIN SELECT * FROM distinct_1 left join distinct_2 on distinct_1.a IS NOT DISTINCT FROM distinct_2.a; |
| QUERY PLAN |
| -------------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) (cost=10000000000.00..10091376279.20 rows=9273690 width=8) |
| -> Nested Loop Left Join (cost=10000000000.00..10091252630.00 rows=3091230 width=8) |
| Join Filter: (NOT (distinct_1.a IS DISTINCT FROM distinct_2.a)) |
| -> Seq Scan on distinct_1 (cost=0.00..355.00 rows=32100 width=4) |
| -> Materialize (cost=0.00..2120.50 rows=96300 width=4) |
| -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..1639.00 rows=96300 width=4) |
| -> Seq Scan on distinct_2 (cost=0.00..355.00 rows=32100 width=4) |
| Optimizer: Postgres query optimizer |
| (8 rows) |
| |
| EXPLAIN SELECT * FROM distinct_1 right join distinct_2 on distinct_1.a IS NOT DISTINCT FROM distinct_2.a; |
| QUERY PLAN |
| -------------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) (cost=10000000000.00..10091376279.20 rows=9273690 width=8) |
| -> Nested Loop Left Join (cost=10000000000.00..10091252630.00 rows=3091230 width=8) |
| Join Filter: (NOT (distinct_1.a IS DISTINCT FROM distinct_2.a)) |
| -> Seq Scan on distinct_2 (cost=0.00..355.00 rows=32100 width=4) |
| -> Materialize (cost=0.00..2120.50 rows=96300 width=4) |
| -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..1639.00 rows=96300 width=4) |
| -> Seq Scan on distinct_1 (cost=0.00..355.00 rows=32100 width=4) |
| Optimizer: Postgres query optimizer |
| (8 rows) |
| |
| SELECT * FROM distinct_1, distinct_2 WHERE distinct_1.a IS DISTINCT FROM distinct_2.a; |
| a | a |
| ---+--- |
| 2 | |
| 2 | 1 |
| | 1 |
| 1 | |
| (4 rows) |
| |
| SELECT * FROM distinct_1 left join distinct_2 on distinct_1.a IS DISTINCT FROM distinct_2.a; |
| a | a |
| ---+--- |
| 2 | |
| 2 | 1 |
| | 1 |
| 1 | |
| (4 rows) |
| |
| SELECT * FROM distinct_1 right join distinct_2 on distinct_1.a IS DISTINCT FROM distinct_2.a; |
| a | a |
| ---+--- |
| 1 | |
| 2 | |
| 2 | 1 |
| | 1 |
| (4 rows) |
| |
| SELECT * FROM distinct_1, distinct_2 WHERE distinct_1.a IS NOT DISTINCT FROM distinct_2.a; |
| a | a |
| ---+--- |
| 1 | 1 |
| | |
| (2 rows) |
| |
| SELECT * FROM distinct_1 left join distinct_2 on distinct_1.a IS NOT DISTINCT FROM distinct_2.a; |
| a | a |
| ---+--- |
| 1 | 1 |
| 2 | |
| | |
| (3 rows) |
| |
| SELECT * FROM distinct_1 right join distinct_2 on distinct_1.a IS NOT DISTINCT FROM distinct_2.a; |
| a | a |
| ---+--- |
| | |
| 1 | 1 |
| (2 rows) |
| |
| DROP TABLE distinct_1; |
| DROP TABLE distinct_2; |
| -- gpdb start: test inherit/partition table distinct when gp_statistics_pullup_from_child_partition is on |
| set gp_statistics_pullup_from_child_partition to on; |
| CREATE TABLE sales (id int, date date, amt decimal(10,2)) |
| DISTRIBUTED BY (id); |
| insert into sales values (1,'20210202',20), (2,'20210602',9) ,(3,'20211002',100); |
| select distinct * from sales order by 1; |
| id | date | amt |
| ----+------------+-------- |
| 1 | 02-02-2021 | 20.00 |
| 2 | 06-02-2021 | 9.00 |
| 3 | 10-02-2021 | 100.00 |
| (3 rows) |
| |
| select distinct sales from sales order by 1; |
| sales |
| ----------------------- |
| (1,02-02-2021,20.00) |
| (2,06-02-2021,9.00) |
| (3,10-02-2021,100.00) |
| (3 rows) |
| |
| CREATE TABLE sales_partition (id int, date date, amt decimal(10,2)) |
| DISTRIBUTED BY (id) |
| PARTITION BY RANGE (date) |
| ( START (date '2021-01-01') INCLUSIVE |
| END (date '2022-01-01') EXCLUSIVE |
| EVERY (INTERVAL '1 month') ); |
| insert into sales_partition values (1,'20210202',20), (2,'20210602',9) ,(3,'20211002',100); |
| select distinct * from sales_partition order by 1; |
| id | date | amt |
| ----+------------+-------- |
| 1 | 02-02-2021 | 20.00 |
| 2 | 06-02-2021 | 9.00 |
| 3 | 10-02-2021 | 100.00 |
| (3 rows) |
| |
| select distinct sales_partition from sales_partition order by 1; |
| sales_partition |
| ----------------------- |
| (1,02-02-2021,20.00) |
| (2,06-02-2021,9.00) |
| (3,10-02-2021,100.00) |
| (3 rows) |
| |
| DROP TABLE sales; |
| DROP TABLE sales_partition; |
| CREATE TABLE cities ( |
| name text, |
| population float, |
| altitude int |
| ); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'name' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| CREATE TABLE capitals ( |
| state char(2) |
| ) INHERITS (cities); |
| NOTICE: table has parent, setting distribution columns to match parent table |
| select distinct * from cities; |
| name | population | altitude |
| ------+------------+---------- |
| (0 rows) |
| |
| select distinct cities from cities; |
| cities |
| -------- |
| (0 rows) |
| |
| DROP TABLE capitals; |
| DROP TABLE cities; |
| set gp_statistics_pullup_from_child_partition to off; |
| -- gpdb end: test inherit/partition table distinct when gp_statistics_pullup_from_child_partition is on |
| create table t_distinct_sort(a int, b int, c int); |
| insert into t_distinct_sort select i, i+1, i+2 from generate_series(1, 10)i; |
| insert into t_distinct_sort select i, i+1, i+2 from generate_series(1, 10)i; |
| insert into t_distinct_sort select i, i+1, i+2 from generate_series(1, 10)i; |
| analyze t_distinct_sort; |
| explain(verbose, costs off) |
| select distinct count(a), sum(b) from t_distinct_sort order by sum(b), count(a); |
| QUERY PLAN |
| --------------------------------------------------------------------------------------------------------------------------- |
| Finalize Aggregate |
| Output: count(a), sum(b) |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| Output: (PARTIAL count(a)), (PARTIAL sum(b)) |
| -> Partial Aggregate |
| Output: PARTIAL count(a), PARTIAL sum(b) |
| -> Seq Scan on public.t_distinct_sort |
| Output: a, b, c |
| Settings: enable_hashagg = 'on', enable_sort = 'on', gp_statistics_pullup_from_child_partition = 'off', optimizer = 'off' |
| Optimizer: Postgres query optimizer |
| (10 rows) |
| |
| select distinct count(a), sum(b) from t_distinct_sort order by sum(b), count(a); |
| count | sum |
| -------+----- |
| 30 | 195 |
| (1 row) |
| |
| explain(verbose, costs off) |
| select distinct on(count(b), count(c)) count(a), sum(b) from t_distinct_sort order by count(c); |
| QUERY PLAN |
| --------------------------------------------------------------------------------------------------------------------------- |
| Finalize Aggregate |
| Output: count(a), sum(b) |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| Output: (PARTIAL count(a)), (PARTIAL sum(b)) |
| -> Partial Aggregate |
| Output: PARTIAL count(a), PARTIAL sum(b) |
| -> Seq Scan on public.t_distinct_sort |
| Output: a, b, c |
| Settings: enable_hashagg = 'on', enable_sort = 'on', gp_statistics_pullup_from_child_partition = 'off', optimizer = 'off' |
| Optimizer: Postgres query optimizer |
| (10 rows) |
| |
| select distinct on(count(b), count(c)) count(a), sum(b) from t_distinct_sort order by count(c); |
| count | sum |
| -------+----- |
| 30 | 195 |
| (1 row) |
| |
| explain(verbose, costs off) |
| select count(a), sum(b) from t_distinct_sort order by sum(a), count(c); |
| QUERY PLAN |
| --------------------------------------------------------------------------------------------------------------------------- |
| Finalize Aggregate |
| Output: count(a), sum(b) |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| Output: (PARTIAL count(a)), (PARTIAL sum(b)) |
| -> Partial Aggregate |
| Output: PARTIAL count(a), PARTIAL sum(b) |
| -> Seq Scan on public.t_distinct_sort |
| Output: a, b, c |
| Settings: enable_hashagg = 'on', enable_sort = 'on', gp_statistics_pullup_from_child_partition = 'off', optimizer = 'off' |
| Optimizer: Postgres query optimizer |
| (10 rows) |
| |
| select count(a), sum(b) from t_distinct_sort order by sum(a), count(c); |
| count | sum |
| -------+----- |
| 30 | 195 |
| (1 row) |
| |
| explain(verbose, costs off) |
| select distinct count(a), sum(b) from t_distinct_sort ; |
| QUERY PLAN |
| --------------------------------------------------------------------------------------------------------------------------- |
| Finalize Aggregate |
| Output: count(a), sum(b) |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| Output: (PARTIAL count(a)), (PARTIAL sum(b)) |
| -> Partial Aggregate |
| Output: PARTIAL count(a), PARTIAL sum(b) |
| -> Seq Scan on public.t_distinct_sort |
| Output: a, b, c |
| Settings: enable_hashagg = 'on', enable_sort = 'on', gp_statistics_pullup_from_child_partition = 'off', optimizer = 'off' |
| Optimizer: Postgres query optimizer |
| (10 rows) |
| |
| select distinct count(a), sum(b) from t_distinct_sort ; |
| count | sum |
| -------+----- |
| 30 | 195 |
| (1 row) |
| |
| -- should keep distinct clause |
| explain(verbose, costs off) |
| select distinct on(count(random())) count(a), sum(b) from t_distinct_sort; |
| QUERY PLAN |
| --------------------------------------------------------------------------------------------------------------------------- |
| Unique |
| Output: (count(a)), (sum(b)), (count(random())) |
| Group Key: (count(random())) |
| -> Sort |
| Output: (count(a)), (sum(b)), (count(random())) |
| Sort Key: (count(random())) |
| -> Finalize Aggregate |
| Output: count(a), sum(b), count(random()) |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| Output: (PARTIAL count(a)), (PARTIAL sum(b)), (PARTIAL count(random())) |
| -> Partial Aggregate |
| Output: PARTIAL count(a), PARTIAL sum(b), PARTIAL count(random()) |
| -> Seq Scan on public.t_distinct_sort |
| Output: a, b, c |
| Settings: enable_hashagg = 'on', enable_sort = 'on', gp_statistics_pullup_from_child_partition = 'off', optimizer = 'off' |
| Optimizer: Postgres query optimizer |
| (16 rows) |
| |
| select distinct on(count(random())) count(a), sum(b) from t_distinct_sort; |
| count | sum |
| -------+----- |
| 30 | 195 |
| (1 row) |
| |
| explain(verbose, costs off) |
| select distinct(count(a)) from t_distinct_sort, (select distinct(count(*)), generate_series( |
| 0, 2) from t_distinct_sort)as xx; |
| QUERY PLAN |
| ------------------------------------------------------------------------------------------------------------------------------------------------- |
| Finalize Aggregate |
| Output: count(t_distinct_sort.a) |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| Output: (PARTIAL count(t_distinct_sort.a)) |
| -> Partial Aggregate |
| Output: PARTIAL count(t_distinct_sort.a) |
| -> Nested Loop |
| Output: t_distinct_sort.a |
| -> Broadcast Motion 1:3 (slice2; segments: 1) |
| -> Subquery Scan on xx |
| -> Unique |
| Output: (count(*)), (generate_series(0, 2)) |
| Group Key: (count(*)), (generate_series(0, 2)) |
| -> Sort |
| Output: (count(*)), (generate_series(0, 2)) |
| Sort Key: (count(*)), (generate_series(0, 2)) |
| -> ProjectSet |
| Output: (count(*)), generate_series(0, 2) |
| -> Finalize Aggregate |
| Output: count(*) |
| -> Gather Motion 3:1 (slice3; segments: 3) |
| Output: (PARTIAL count(*)) |
| -> Partial Aggregate |
| Output: PARTIAL count(*) |
| -> Seq Scan on public.t_distinct_sort t_distinct_sort_1 |
| Output: t_distinct_sort_1.a, t_distinct_sort_1.b, t_distinct_sort_1.c |
| -> Materialize |
| Output: t_distinct_sort.a |
| -> Seq Scan on public.t_distinct_sort |
| Output: t_distinct_sort.a |
| Settings: enable_hashagg = 'on', enable_sort = 'on', gp_statistics_pullup_from_child_partition = 'off', optimizer = 'off' |
| Optimizer: Postgres query optimizer |
| (32 rows) |
| |
| select distinct(count(a)) from t_distinct_sort, (select distinct(count(*)), generate_series( |
| 0, 2) from t_distinct_sort)as xx; |
| count |
| ------- |
| 90 |
| (1 row) |
| |
| drop table t_distinct_sort; |
| explain(verbose, costs off) |
| select distinct(count(a)) from generate_series(0, 1) as a; |
| QUERY PLAN |
| --------------------------------------------------------------------------------------------------------------------------- |
| Aggregate |
| Output: count(a) |
| -> Function Scan on pg_catalog.generate_series a |
| Output: a |
| Function Call: generate_series(0, 1) |
| Settings: enable_hashagg = 'on', enable_sort = 'on', gp_statistics_pullup_from_child_partition = 'off', optimizer = 'off' |
| Optimizer: Postgres query optimizer |
| (7 rows) |
| |
| select distinct(count(a)) from generate_series(0, 1) as a; |
| count |
| ------- |
| 2 |
| (1 row) |
| |
| explain(verbose, costs off) |
| select distinct(count(*)) from generate_series(0, 1) a join generate_series(0, 2) b on true; |
| QUERY PLAN |
| --------------------------------------------------------------------------------------------------------------------------- |
| Aggregate |
| Output: count(*) |
| -> Nested Loop |
| -> Function Scan on pg_catalog.generate_series a |
| Output: a.a |
| Function Call: generate_series(0, 1) |
| -> Function Scan on pg_catalog.generate_series b |
| Output: b.b |
| Function Call: generate_series(0, 2) |
| Settings: enable_hashagg = 'on', enable_sort = 'on', gp_statistics_pullup_from_child_partition = 'off', optimizer = 'off' |
| Optimizer: Postgres query optimizer |
| (11 rows) |
| |
| select distinct(count(*)) from generate_series(0, 1) a join generate_series(0, 2) b on true; |
| count |
| ------- |
| 6 |
| (1 row) |
| |
| -- please refer to https://github.com/greenplum-db/gpdb/issues/15033 |
| CREATE TABLE t1_issue_15033(c DECIMAL CHECK (0.4 IS DISTINCT FROM 0.3)); |
| CREATE TABLE t2_issue_15033(c DECIMAL CHECK (0.4 IS NOT DISTINCT FROM 0.3)); |
| INSERT INTO t1_issue_15033 VALUES(10); |
| SELECT * FROM t1_issue_15033; |
| c |
| ---- |
| 10 |
| (1 row) |
| |
| INSERT INTO t2_issue_15033 VALUES(10); |
| DETAIL: Failing row contains (10). |
| ERROR: new row for relation "t2_issue_15033" violates check constraint "t2_issue_15033_check" |
| SELECT * FROM t2_issue_15033; |
| c |
| --- |
| (0 rows) |
| |