| -- |
| -- SELECT_DISTINCT |
| -- |
| -- |
| -- awk '{print $3;}' onek.data | sort -n | uniq |
| -- |
| SELECT DISTINCT two FROM onek ORDER BY 1; |
| two |
| ----- |
| 0 |
| 1 |
| (2 rows) |
| |
| -- |
| -- awk '{print $5;}' onek.data | sort -n | uniq |
| -- |
| SELECT DISTINCT ten FROM onek ORDER BY 1; |
| ten |
| ----- |
| 0 |
| 1 |
| 2 |
| 3 |
| 4 |
| 5 |
| 6 |
| 7 |
| 8 |
| 9 |
| (10 rows) |
| |
| -- |
| -- awk '{print $16;}' onek.data | sort -d | uniq |
| -- |
| SELECT DISTINCT string4 FROM onek ORDER BY 1; |
| string4 |
| --------- |
| AAAAxx |
| HHHHxx |
| OOOOxx |
| VVVVxx |
| (4 rows) |
| |
| -- |
| -- awk '{print $3,$16,$5;}' onek.data | sort -d | uniq | |
| -- sort +0n -1 +1d -2 +2n -3 |
| -- |
| SELECT DISTINCT two, string4, ten |
| FROM onek |
| ORDER BY two using <, string4 using <, ten using <; |
| two | string4 | ten |
| -----+---------+----- |
| 0 | AAAAxx | 0 |
| 0 | AAAAxx | 2 |
| 0 | AAAAxx | 4 |
| 0 | AAAAxx | 6 |
| 0 | AAAAxx | 8 |
| 0 | HHHHxx | 0 |
| 0 | HHHHxx | 2 |
| 0 | HHHHxx | 4 |
| 0 | HHHHxx | 6 |
| 0 | HHHHxx | 8 |
| 0 | OOOOxx | 0 |
| 0 | OOOOxx | 2 |
| 0 | OOOOxx | 4 |
| 0 | OOOOxx | 6 |
| 0 | OOOOxx | 8 |
| 0 | VVVVxx | 0 |
| 0 | VVVVxx | 2 |
| 0 | VVVVxx | 4 |
| 0 | VVVVxx | 6 |
| 0 | VVVVxx | 8 |
| 1 | AAAAxx | 1 |
| 1 | AAAAxx | 3 |
| 1 | AAAAxx | 5 |
| 1 | AAAAxx | 7 |
| 1 | AAAAxx | 9 |
| 1 | HHHHxx | 1 |
| 1 | HHHHxx | 3 |
| 1 | HHHHxx | 5 |
| 1 | HHHHxx | 7 |
| 1 | HHHHxx | 9 |
| 1 | OOOOxx | 1 |
| 1 | OOOOxx | 3 |
| 1 | OOOOxx | 5 |
| 1 | OOOOxx | 7 |
| 1 | OOOOxx | 9 |
| 1 | VVVVxx | 1 |
| 1 | VVVVxx | 3 |
| 1 | VVVVxx | 5 |
| 1 | VVVVxx | 7 |
| 1 | VVVVxx | 9 |
| (40 rows) |
| |
| -- |
| -- awk '{print $2;}' person.data | |
| -- awk '{if(NF!=1){print $2;}else{print;}}' - emp.data | |
| -- awk '{if(NF!=1){print $2;}else{print;}}' - student.data | |
| -- awk 'BEGIN{FS=" ";}{if(NF!=1){print $5;}else{print;}}' - stud_emp.data | |
| -- sort -n -r | uniq |
| -- |
| SELECT DISTINCT p.age FROM person* p ORDER BY age using >; |
| age |
| ----- |
| 98 |
| 88 |
| 78 |
| 68 |
| 60 |
| 58 |
| 50 |
| 48 |
| 40 |
| 38 |
| 34 |
| 30 |
| 28 |
| 25 |
| 24 |
| 23 |
| 20 |
| 19 |
| 18 |
| 8 |
| (20 rows) |
| |
| -- |
| -- Check mentioning same column more than once |
| -- |
| EXPLAIN (VERBOSE, COSTS OFF) |
| SELECT count(*) FROM |
| (SELECT DISTINCT two, four, two FROM tenk1) ss; |
| QUERY PLAN |
| ----------------------------------------------------------------------------- |
| Aggregate |
| Output: count(*) |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> GroupAggregate |
| Group Key: tenk1.two, tenk1.four, tenk1.two |
| -> Sort |
| Output: two, four |
| Sort Key: tenk1.two, tenk1.four, tenk1.two |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Output: two, four |
| Hash Key: two, four, two |
| -> Streaming HashAggregate |
| Output: two, four |
| Group Key: tenk1.two, tenk1.four, tenk1.two |
| -> Seq Scan on public.tenk1 |
| Output: two, four |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (17 rows) |
| |
| SELECT count(*) FROM |
| (SELECT DISTINCT two, four, two FROM tenk1) ss; |
| count |
| ------- |
| 4 |
| (1 row) |
| |
| -- |
| -- Compare results between plans using sorting and plans using hash |
| -- aggregation. Force spilling in both cases by setting work_mem low. |
| -- |
| SET work_mem='64kB'; |
| -- Produce results with sorting. |
| SET enable_hashagg=FALSE; |
| SET optimizer_enable_hashagg=FALSE; |
| SET jit_above_cost=0; |
| EXPLAIN (costs off) |
| SELECT DISTINCT g%1000 FROM generate_series(0,9999) g; |
| QUERY PLAN |
| ---------------------------------------------- |
| GroupAggregate |
| Group Key: ((generate_series % 1000)) |
| -> Sort |
| Sort Key: ((generate_series % 1000)) |
| -> Function Scan on generate_series |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (6 rows) |
| |
| CREATE TABLE distinct_group_1 AS |
| SELECT DISTINCT g%1000 FROM generate_series(0,9999) g; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry. |
| SET jit_above_cost TO DEFAULT; |
| CREATE TABLE distinct_group_2 AS |
| SELECT DISTINCT (g%1000)::text FROM generate_series(0,9999) g; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry. |
| SET enable_hashagg=TRUE; |
| SET optimizer_enable_hashagg=TRUE; |
| -- Produce results with hash aggregation. |
| SET enable_sort=FALSE; |
| SET jit_above_cost=0; |
| EXPLAIN (costs off) |
| SELECT DISTINCT g%1000 FROM generate_series(0,9999) g; |
| QUERY PLAN |
| ---------------------------------------- |
| HashAggregate |
| Group Key: (generate_series % 1000) |
| -> Function Scan on generate_series |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (4 rows) |
| |
| CREATE TABLE distinct_hash_1 AS |
| SELECT DISTINCT g%1000 FROM generate_series(0,9999) g; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry. |
| SET jit_above_cost TO DEFAULT; |
| CREATE TABLE distinct_hash_2 AS |
| SELECT DISTINCT (g%1000)::text FROM generate_series(0,9999) g; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry. |
| SET enable_sort=TRUE; |
| SET work_mem TO DEFAULT; |
| -- Compare results |
| (SELECT * FROM distinct_hash_1 EXCEPT SELECT * FROM distinct_group_1) |
| UNION ALL |
| (SELECT * FROM distinct_group_1 EXCEPT SELECT * FROM distinct_hash_1); |
| ?column? |
| ---------- |
| (0 rows) |
| |
| (SELECT * FROM distinct_hash_1 EXCEPT SELECT * FROM distinct_group_1) |
| UNION ALL |
| (SELECT * FROM distinct_group_1 EXCEPT SELECT * FROM distinct_hash_1); |
| ?column? |
| ---------- |
| (0 rows) |
| |
| DROP TABLE distinct_hash_1; |
| DROP TABLE distinct_hash_2; |
| DROP TABLE distinct_group_1; |
| DROP TABLE distinct_group_2; |
| -- |
| -- Also, some tests of IS DISTINCT FROM, which doesn't quite deserve its |
| -- very own regression file. |
| -- |
| CREATE TEMP TABLE disttable (f1 integer); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'f1' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| INSERT INTO DISTTABLE VALUES(1); |
| INSERT INTO DISTTABLE VALUES(2); |
| INSERT INTO DISTTABLE VALUES(3); |
| INSERT INTO DISTTABLE VALUES(NULL); |
| -- basic cases |
| SELECT f1, f1 IS DISTINCT FROM 2 as "not 2" FROM disttable; |
| f1 | not 2 |
| ----+------- |
| 2 | f |
| 3 | t |
| | t |
| 1 | t |
| (4 rows) |
| |
| SELECT f1, f1 IS DISTINCT FROM NULL as "not null" FROM disttable; |
| f1 | not null |
| ----+---------- |
| 2 | t |
| 3 | t |
| | f |
| 1 | t |
| (4 rows) |
| |
| SELECT f1, f1 IS DISTINCT FROM f1 as "false" FROM disttable; |
| f1 | false |
| ----+------- |
| 2 | f |
| 3 | f |
| | f |
| 1 | f |
| (4 rows) |
| |
| SELECT f1, f1 IS DISTINCT FROM f1+1 as "not null" FROM disttable; |
| f1 | not null |
| ----+---------- |
| 2 | t |
| 3 | t |
| | f |
| 1 | t |
| (4 rows) |
| |
| -- check that optimizer constant-folds it properly |
| SELECT 1 IS DISTINCT FROM 2 as "yes"; |
| yes |
| ----- |
| t |
| (1 row) |
| |
| SELECT 2 IS DISTINCT FROM 2 as "no"; |
| no |
| ---- |
| f |
| (1 row) |
| |
| SELECT 2 IS DISTINCT FROM null as "yes"; |
| yes |
| ----- |
| t |
| (1 row) |
| |
| SELECT null IS DISTINCT FROM null as "no"; |
| no |
| ---- |
| f |
| (1 row) |
| |
| -- negated form |
| SELECT 1 IS NOT DISTINCT FROM 2 as "no"; |
| no |
| ---- |
| f |
| (1 row) |
| |
| SELECT 2 IS NOT DISTINCT FROM 2 as "yes"; |
| yes |
| ----- |
| t |
| (1 row) |
| |
| SELECT 2 IS NOT DISTINCT FROM null as "no"; |
| no |
| ---- |
| f |
| (1 row) |
| |
| SELECT null IS NOT DISTINCT FROM null as "yes"; |
| yes |
| ----- |
| t |
| (1 row) |
| |
| -- join cases |
| -- test IS DISTINCT FROM and IS NOT DISTINCT FROM join qual.The postgres planner doesn't support hash join on |
| -- IS NOT DISTINCT FROM for now, ORCA supports Hash Join on "IS NOT DISTINCT FROM". |
| CREATE TABLE distinct_1(a int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| CREATE TABLE distinct_2(a int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| INSERT INTO distinct_1 VALUES(1),(2),(NULL); |
| INSERT INTO distinct_2 VALUES(1),(NULL); |
| EXPLAIN SELECT * FROM distinct_1, distinct_2 WHERE distinct_1.a IS DISTINCT FROM distinct_2.a; |
| QUERY PLAN |
| --------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324032.54 rows=1 width=8) |
| -> Nested Loop (cost=0.00..1324032.54 rows=1 width=8) |
| Join Filter: (distinct_1.a IS DISTINCT FROM distinct_2.a) |
| -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=4) |
| -> Seq Scan on distinct_1 (cost=0.00..431.00 rows=1 width=4) |
| -> Seq Scan on distinct_2 (cost=0.00..431.00 rows=1 width=4) |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (7 rows) |
| |
| EXPLAIN SELECT * FROM distinct_1 left join distinct_2 on distinct_1.a IS DISTINCT FROM distinct_2.a; |
| QUERY PLAN |
| --------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324032.55 rows=2 width=8) |
| -> Nested Loop Left Join (cost=0.00..1324032.55 rows=1 width=8) |
| Join Filter: (distinct_1.a IS DISTINCT FROM distinct_2.a) |
| -> Seq Scan on distinct_1 (cost=0.00..431.00 rows=1 width=4) |
| -> Materialize (cost=0.00..431.00 rows=1 width=4) |
| -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=4) |
| -> Seq Scan on distinct_2 (cost=0.00..431.00 rows=1 width=4) |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (8 rows) |
| |
| EXPLAIN SELECT * FROM distinct_1 right join distinct_2 on distinct_1.a IS DISTINCT FROM distinct_2.a; |
| QUERY PLAN |
| --------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324032.55 rows=2 width=8) |
| -> Nested Loop Left Join (cost=0.00..1324032.55 rows=1 width=8) |
| Join Filter: (distinct_1.a IS DISTINCT FROM distinct_2.a) |
| -> Seq Scan on distinct_2 (cost=0.00..431.00 rows=1 width=4) |
| -> Materialize (cost=0.00..431.00 rows=1 width=4) |
| -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=4) |
| -> Seq Scan on distinct_1 (cost=0.00..431.00 rows=1 width=4) |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (8 rows) |
| |
| EXPLAIN SELECT * FROM distinct_1, distinct_2 WHERE distinct_1.a IS NOT DISTINCT FROM distinct_2.a; |
| QUERY PLAN |
| ------------------------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=1 width=8) |
| -> Hash Join (cost=0.00..862.00 rows=1 width=8) |
| Hash Cond: (NOT (distinct_1.a IS DISTINCT FROM distinct_2.a)) |
| -> Seq Scan on distinct_1 (cost=0.00..431.00 rows=1 width=4) |
| -> Hash (cost=431.00..431.00 rows=1 width=4) |
| -> Seq Scan on distinct_2 (cost=0.00..431.00 rows=1 width=4) |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (7 rows) |
| |
| EXPLAIN SELECT * FROM distinct_1 left join distinct_2 on distinct_1.a IS NOT DISTINCT FROM distinct_2.a; |
| QUERY PLAN |
| ------------------------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=2 width=8) |
| -> Hash Left Join (cost=0.00..862.00 rows=1 width=8) |
| Hash Cond: (NOT (distinct_1.a IS DISTINCT FROM distinct_2.a)) |
| -> Seq Scan on distinct_1 (cost=0.00..431.00 rows=1 width=4) |
| -> Hash (cost=431.00..431.00 rows=1 width=4) |
| -> Seq Scan on distinct_2 (cost=0.00..431.00 rows=1 width=4) |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (7 rows) |
| |
| EXPLAIN SELECT * FROM distinct_1 right join distinct_2 on distinct_1.a IS NOT DISTINCT FROM distinct_2.a; |
| QUERY PLAN |
| ------------------------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=2 width=8) |
| -> Hash Left Join (cost=0.00..862.00 rows=1 width=8) |
| Hash Cond: (NOT (distinct_2.a IS DISTINCT FROM distinct_1.a)) |
| -> Seq Scan on distinct_2 (cost=0.00..431.00 rows=1 width=4) |
| -> Hash (cost=431.00..431.00 rows=1 width=4) |
| -> Seq Scan on distinct_1 (cost=0.00..431.00 rows=1 width=4) |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (7 rows) |
| |
| SELECT * FROM distinct_1, distinct_2 WHERE distinct_1.a IS DISTINCT FROM distinct_2.a; |
| a | a |
| ---+--- |
| 1 | |
| 2 | |
| 2 | 1 |
| | 1 |
| (4 rows) |
| |
| SELECT * FROM distinct_1 left join distinct_2 on distinct_1.a IS DISTINCT FROM distinct_2.a; |
| a | a |
| ---+--- |
| 2 | |
| 2 | 1 |
| | 1 |
| 1 | |
| (4 rows) |
| |
| SELECT * FROM distinct_1 right join distinct_2 on distinct_1.a IS DISTINCT FROM distinct_2.a; |
| a | a |
| ---+--- |
| 2 | 1 |
| | 1 |
| 2 | |
| 1 | |
| (4 rows) |
| |
| SELECT * FROM distinct_1, distinct_2 WHERE distinct_1.a IS NOT DISTINCT FROM distinct_2.a; |
| a | a |
| ---+--- |
| | |
| 1 | 1 |
| (2 rows) |
| |
| SELECT * FROM distinct_1 left join distinct_2 on distinct_1.a IS NOT DISTINCT FROM distinct_2.a; |
| a | a |
| ---+--- |
| 1 | 1 |
| 2 | |
| | |
| (3 rows) |
| |
| SELECT * FROM distinct_1 right join distinct_2 on distinct_1.a IS NOT DISTINCT FROM distinct_2.a; |
| a | a |
| ---+--- |
| 1 | 1 |
| | |
| (2 rows) |
| |
| DROP TABLE distinct_1; |
| DROP TABLE distinct_2; |
| -- gpdb start: test inherit/partition table distinct when gp_statistics_pullup_from_child_partition is on |
| set gp_statistics_pullup_from_child_partition to on; |
| CREATE TABLE sales (id int, date date, amt decimal(10,2)) |
| DISTRIBUTED BY (id); |
| insert into sales values (1,'20210202',20), (2,'20210602',9) ,(3,'20211002',100); |
| select distinct * from sales order by 1; |
| id | date | amt |
| ----+------------+-------- |
| 1 | 02-02-2021 | 20.00 |
| 2 | 06-02-2021 | 9.00 |
| 3 | 10-02-2021 | 100.00 |
| (3 rows) |
| |
| select distinct sales from sales order by 1; |
| sales |
| ----------------------- |
| (1,02-02-2021,20.00) |
| (2,06-02-2021,9.00) |
| (3,10-02-2021,100.00) |
| (3 rows) |
| |
| CREATE TABLE sales_partition (id int, date date, amt decimal(10,2)) |
| DISTRIBUTED BY (id) |
| PARTITION BY RANGE (date) |
| ( START (date '2021-01-01') INCLUSIVE |
| END (date '2022-01-01') EXCLUSIVE |
| EVERY (INTERVAL '1 month') ); |
| insert into sales_partition values (1,'20210202',20), (2,'20210602',9) ,(3,'20211002',100); |
| select distinct * from sales_partition order by 1; |
| id | date | amt |
| ----+------------+-------- |
| 1 | 02-02-2021 | 20.00 |
| 2 | 06-02-2021 | 9.00 |
| 3 | 10-02-2021 | 100.00 |
| (3 rows) |
| |
| select distinct sales_partition from sales_partition order by 1; |
| sales_partition |
| ----------------------- |
| (1,02-02-2021,20.00) |
| (2,06-02-2021,9.00) |
| (3,10-02-2021,100.00) |
| (3 rows) |
| |
| DROP TABLE sales; |
| DROP TABLE sales_partition; |
| CREATE TABLE cities ( |
| name text, |
| population float, |
| altitude int |
| ); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'name' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| CREATE TABLE capitals ( |
| state char(2) |
| ) INHERITS (cities); |
| NOTICE: table has parent, setting distribution columns to match parent table |
| select distinct * from cities; |
| name | population | altitude |
| ------+------------+---------- |
| (0 rows) |
| |
| select distinct cities from cities; |
| cities |
| -------- |
| (0 rows) |
| |
| DROP TABLE capitals; |
| DROP TABLE cities; |
| set gp_statistics_pullup_from_child_partition to off; |
| -- gpdb end: test inherit/partition table distinct when gp_statistics_pullup_from_child_partition is on |
| create table t_distinct_sort(a int, b int, c int); |
| insert into t_distinct_sort select i, i+1, i+2 from generate_series(1, 10)i; |
| insert into t_distinct_sort select i, i+1, i+2 from generate_series(1, 10)i; |
| insert into t_distinct_sort select i, i+1, i+2 from generate_series(1, 10)i; |
| analyze t_distinct_sort; |
| explain(verbose, costs off) |
| select distinct count(a), sum(b) from t_distinct_sort order by sum(b), count(a); |
| QUERY PLAN |
| -------------------------------------------------------------------------------------------------------------------------- |
| Sort |
| Output: (count(a)), (sum(b)) |
| Sort Key: (sum(t_distinct_sort.b)), (count(t_distinct_sort.a)) |
| -> Finalize Aggregate |
| Output: count(a), sum(b) |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| Output: (PARTIAL count(a)), (PARTIAL sum(b)) |
| -> Partial Aggregate |
| Output: PARTIAL count(a), PARTIAL sum(b) |
| -> Seq Scan on public.t_distinct_sort |
| Output: a, b |
| Settings: enable_hashagg = 'on', enable_sort = 'on', gp_statistics_pullup_from_child_partition = 'off', optimizer = 'on' |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (13 rows) |
| |
| select distinct count(a), sum(b) from t_distinct_sort order by sum(b), count(a); |
| count | sum |
| -------+----- |
| 30 | 195 |
| (1 row) |
| |
| explain(verbose, costs off) |
| select distinct on(count(b), count(c)) count(a), sum(b) from t_distinct_sort order by count(c); |
| QUERY PLAN |
| -------------------------------------------------------------------------------------------------------------------------- |
| Finalize Aggregate |
| Output: count(a), sum(b) |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| Output: (PARTIAL count(a)), (PARTIAL sum(b)) |
| -> Partial Aggregate |
| Output: PARTIAL count(a), PARTIAL sum(b) |
| -> Seq Scan on public.t_distinct_sort |
| Output: a, b, c |
| Settings: enable_hashagg = 'on', enable_sort = 'on', gp_statistics_pullup_from_child_partition = 'off', optimizer = 'on' |
| Optimizer: Postgres query optimizer |
| (10 rows) |
| |
| select distinct on(count(b), count(c)) count(a), sum(b) from t_distinct_sort order by count(c); |
| count | sum |
| -------+----- |
| 30 | 195 |
| (1 row) |
| |
| explain(verbose, costs off) |
| select count(a), sum(b) from t_distinct_sort order by sum(a), count(c); |
| QUERY PLAN |
| -------------------------------------------------------------------------------------------------------------------------- |
| Result |
| Output: (count(a)), (sum(b)) |
| -> Sort |
| Output: (count(a)), (sum(b)), (sum(a)), (count(c)) |
| Sort Key: (sum(t_distinct_sort.a)), (count(t_distinct_sort.c)) |
| -> Finalize Aggregate |
| Output: count(a), sum(b), sum(a), count(c) |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| Output: (PARTIAL count(a)), (PARTIAL sum(b)), (PARTIAL sum(a)), (PARTIAL count(c)) |
| -> Partial Aggregate |
| Output: PARTIAL count(a), PARTIAL sum(b), PARTIAL sum(a), PARTIAL count(c) |
| -> Seq Scan on public.t_distinct_sort |
| Output: a, b, c |
| Settings: enable_hashagg = 'on', enable_sort = 'on', gp_statistics_pullup_from_child_partition = 'off', optimizer = 'on' |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (15 rows) |
| |
| select count(a), sum(b) from t_distinct_sort order by sum(a), count(c); |
| count | sum |
| -------+----- |
| 30 | 195 |
| (1 row) |
| |
| explain(verbose, costs off) |
| select distinct count(a), sum(b) from t_distinct_sort ; |
| QUERY PLAN |
| -------------------------------------------------------------------------------------------------------------------------- |
| Finalize Aggregate |
| Output: count(a), sum(b) |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| Output: (PARTIAL count(a)), (PARTIAL sum(b)) |
| -> Partial Aggregate |
| Output: PARTIAL count(a), PARTIAL sum(b) |
| -> Seq Scan on public.t_distinct_sort |
| Output: a, b |
| Settings: enable_hashagg = 'on', enable_sort = 'on', gp_statistics_pullup_from_child_partition = 'off', optimizer = 'on' |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (10 rows) |
| |
| select distinct count(a), sum(b) from t_distinct_sort ; |
| count | sum |
| -------+----- |
| 30 | 195 |
| (1 row) |
| |
| -- should keep distinct clause |
| explain(verbose, costs off) |
| select distinct on(count(random())) count(a), sum(b) from t_distinct_sort; |
| QUERY PLAN |
| -------------------------------------------------------------------------------------------------------------------------- |
| Unique |
| Output: (count(a)), (sum(b)), (count(random())) |
| Group Key: (count(random())) |
| -> Sort |
| Output: (count(a)), (sum(b)), (count(random())) |
| Sort Key: (count(random())) |
| -> Finalize Aggregate |
| Output: count(a), sum(b), count(random()) |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| Output: (PARTIAL count(a)), (PARTIAL sum(b)), (PARTIAL count(random())) |
| -> Partial Aggregate |
| Output: PARTIAL count(a), PARTIAL sum(b), PARTIAL count(random()) |
| -> Seq Scan on public.t_distinct_sort |
| Output: a, b, c |
| Settings: enable_hashagg = 'on', enable_sort = 'on', gp_statistics_pullup_from_child_partition = 'off', optimizer = 'on' |
| Optimizer: Postgres query optimizer |
| (16 rows) |
| |
| select distinct on(count(random())) count(a), sum(b) from t_distinct_sort; |
| count | sum |
| -------+----- |
| 30 | 195 |
| (1 row) |
| |
| explain(verbose, costs off) |
| select distinct(count(a)) from t_distinct_sort, (select distinct(count(*)), generate_series( |
| 0, 2) from t_distinct_sort)as xx; |
| QUERY PLAN |
| -------------------------------------------------------------------------------------------------------- |
| Finalize Aggregate |
| Output: count(t_distinct_sort_1.a) |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| Output: (PARTIAL count(t_distinct_sort_1.a)) |
| -> Partial Aggregate |
| Output: PARTIAL count(t_distinct_sort_1.a) |
| -> Nested Loop |
| Output: t_distinct_sort_1.a |
| Join Filter: true |
| -> Seq Scan on public.t_distinct_sort t_distinct_sort_1 |
| Output: t_distinct_sort_1.a |
| -> Materialize |
| -> Broadcast Motion 1:3 (slice2) |
| -> Result |
| -> ProjectSet |
| Output: generate_series(0, 2) |
| -> Finalize Aggregate |
| Output: count(*) |
| -> Gather Motion 3:1 (slice3; segments: 3) |
| Output: (PARTIAL count(*)) |
| -> Partial Aggregate |
| Output: PARTIAL count(*) |
| -> Seq Scan on public.t_distinct_sort |
| Settings: enable_hashagg = 'on', enable_sort = 'on', gp_statistics_pullup_from_child_partition = 'off' |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (25 rows) |
| |
| select distinct(count(a)) from t_distinct_sort, (select distinct(count(*)), generate_series( |
| 0, 2) from t_distinct_sort)as xx; |
| count |
| ------- |
| 90 |
| (1 row) |
| |
| drop table t_distinct_sort; |
| explain(verbose, costs off) |
| select distinct(count(a)) from generate_series(0, 1) as a; |
| QUERY PLAN |
| -------------------------------------------------------------------------------------------------------- |
| Aggregate |
| Output: count(generate_series) |
| -> Function Scan on pg_catalog.generate_series |
| Output: generate_series |
| Function Call: generate_series(0, 1) |
| Settings: enable_hashagg = 'on', enable_sort = 'on', gp_statistics_pullup_from_child_partition = 'off' |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (7 rows) |
| |
| select distinct(count(a)) from generate_series(0, 1) as a; |
| count |
| ------- |
| 2 |
| (1 row) |
| |
| explain(verbose, costs off) |
| select distinct(count(*)) from generate_series(0, 1) a join generate_series(0, 2) b on true; |
| QUERY PLAN |
| -------------------------------------------------------------------------------------------------------- |
| Aggregate |
| Output: count(*) |
| -> Nested Loop |
| Join Filter: true |
| -> Function Scan on pg_catalog.generate_series generate_series_1 |
| Output: generate_series_1.generate_series |
| Function Call: generate_series(0, 1) |
| -> Function Scan on pg_catalog.generate_series |
| Output: generate_series.generate_series |
| Function Call: generate_series(0, 2) |
| Settings: enable_hashagg = 'on', enable_sort = 'on', gp_statistics_pullup_from_child_partition = 'off' |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (12 rows) |
| |
| select distinct(count(*)) from generate_series(0, 1) a join generate_series(0, 2) b on true; |
| count |
| ------- |
| 6 |
| (1 row) |
| |
| -- please refer to https://github.com/greenplum-db/gpdb/issues/15033 |
| CREATE TABLE t1_issue_15033(c DECIMAL CHECK (0.4 IS DISTINCT FROM 0.3)); |
| CREATE TABLE t2_issue_15033(c DECIMAL CHECK (0.4 IS NOT DISTINCT FROM 0.3)); |
| INSERT INTO t1_issue_15033 VALUES(10); |
| SELECT * FROM t1_issue_15033; |
| c |
| ---- |
| 10 |
| (1 row) |
| |
| INSERT INTO t2_issue_15033 VALUES(10); |
| ERROR: new row for relation "t2_issue_15033" violates check constraint "t2_issue_15033_check" (seg2 127.0.0.1:7004 pid=3634583) |
| DETAIL: Failing row contains (10). |
| SELECT * FROM t2_issue_15033; |
| c |
| --- |
| (0 rows) |
| |