| -- |
| -- SELECT_DISTINCT |
| -- |
| |
| -- |
| -- awk '{print $3;}' onek.data | sort -n | uniq |
| -- |
| SELECT DISTINCT two FROM onek ORDER BY 1; |
| |
| -- |
| -- awk '{print $5;}' onek.data | sort -n | uniq |
| -- |
| SELECT DISTINCT ten FROM onek ORDER BY 1; |
| |
| -- |
| -- awk '{print $16;}' onek.data | sort -d | uniq |
| -- |
| SELECT DISTINCT string4 FROM onek ORDER BY 1; |
| |
| -- |
| -- awk '{print $3,$16,$5;}' onek.data | sort -d | uniq | |
| -- sort +0n -1 +1d -2 +2n -3 |
| -- |
| SELECT DISTINCT two, string4, ten |
| FROM onek |
| ORDER BY two using <, string4 using <, ten using <; |
| |
| -- |
| -- awk '{print $2;}' person.data | |
| -- awk '{if(NF!=1){print $2;}else{print;}}' - emp.data | |
| -- awk '{if(NF!=1){print $2;}else{print;}}' - student.data | |
| -- awk 'BEGIN{FS=" ";}{if(NF!=1){print $5;}else{print;}}' - stud_emp.data | |
| -- sort -n -r | uniq |
| -- |
| SELECT DISTINCT p.age FROM person* p ORDER BY age using >; |
| |
| -- |
| -- Check mentioning same column more than once |
| -- |
| |
| EXPLAIN (VERBOSE, COSTS OFF) |
| SELECT count(*) FROM |
| (SELECT DISTINCT two, four, two FROM tenk1) ss; |
| |
| SELECT count(*) FROM |
| (SELECT DISTINCT two, four, two FROM tenk1) ss; |
| |
| -- |
| -- Compare results between plans using sorting and plans using hash |
| -- aggregation. Force spilling in both cases by setting work_mem low. |
| -- |
| |
| SET work_mem='64kB'; |
| |
| -- Produce results with sorting. |
| |
| SET enable_hashagg=FALSE; |
| SET optimizer_enable_hashagg=FALSE; |
| |
| SET jit_above_cost=0; |
| |
| EXPLAIN (costs off) |
| SELECT DISTINCT g%1000 FROM generate_series(0,9999) g; |
| |
| CREATE TABLE distinct_group_1 AS |
| SELECT DISTINCT g%1000 FROM generate_series(0,9999) g; |
| |
| SET jit_above_cost TO DEFAULT; |
| |
| CREATE TABLE distinct_group_2 AS |
| SELECT DISTINCT (g%1000)::text FROM generate_series(0,9999) g; |
| |
| SET enable_seqscan = 0; |
| |
| -- Check to see we get an incremental sort plan |
| EXPLAIN (costs off) |
| SELECT DISTINCT hundred, two FROM tenk1; |
| |
| RESET enable_seqscan; |
| |
| SET enable_hashagg=TRUE; |
| SET optimizer_enable_hashagg=TRUE; |
| |
| -- Produce results with hash aggregation. |
| |
| SET enable_sort=FALSE; |
| |
| SET jit_above_cost=0; |
| |
| EXPLAIN (costs off) |
| SELECT DISTINCT g%1000 FROM generate_series(0,9999) g; |
| |
| CREATE TABLE distinct_hash_1 AS |
| SELECT DISTINCT g%1000 FROM generate_series(0,9999) g; |
| |
| SET jit_above_cost TO DEFAULT; |
| |
| CREATE TABLE distinct_hash_2 AS |
| SELECT DISTINCT (g%1000)::text FROM generate_series(0,9999) g; |
| |
| SET enable_sort=TRUE; |
| |
| SET work_mem TO DEFAULT; |
| |
| -- Compare results |
| |
| (SELECT * FROM distinct_hash_1 EXCEPT SELECT * FROM distinct_group_1) |
| UNION ALL |
| (SELECT * FROM distinct_group_1 EXCEPT SELECT * FROM distinct_hash_1); |
| |
| (SELECT * FROM distinct_hash_1 EXCEPT SELECT * FROM distinct_group_1) |
| UNION ALL |
| (SELECT * FROM distinct_group_1 EXCEPT SELECT * FROM distinct_hash_1); |
| |
| DROP TABLE distinct_hash_1; |
| DROP TABLE distinct_hash_2; |
| DROP TABLE distinct_group_1; |
| DROP TABLE distinct_group_2; |
| |
| -- Test parallel DISTINCT |
| SET parallel_tuple_cost=0; |
| SET parallel_setup_cost=0; |
| SET min_parallel_table_scan_size=0; |
| SET max_parallel_workers_per_gather=2; |
| |
| -- Ensure we get a parallel plan |
| EXPLAIN (costs off) |
| SELECT DISTINCT four FROM tenk1; |
| |
| -- Ensure the parallel plan produces the correct results |
| SELECT DISTINCT four FROM tenk1; |
| |
| CREATE OR REPLACE FUNCTION distinct_func(a INT) RETURNS INT AS $$ |
| BEGIN |
| RETURN a; |
| END; |
| $$ LANGUAGE plpgsql PARALLEL UNSAFE; |
| |
| -- Ensure we don't do parallel distinct with a parallel unsafe function |
| EXPLAIN (COSTS OFF) |
| SELECT DISTINCT distinct_func(1) FROM tenk1; |
| |
| -- make the function parallel safe |
| CREATE OR REPLACE FUNCTION distinct_func(a INT) RETURNS INT AS $$ |
| BEGIN |
| RETURN a; |
| END; |
| $$ LANGUAGE plpgsql PARALLEL SAFE; |
| |
| -- Ensure we do parallel distinct now that the function is parallel safe |
| EXPLAIN (COSTS OFF) |
| SELECT DISTINCT distinct_func(1) FROM tenk1; |
| |
| RESET max_parallel_workers_per_gather; |
| RESET min_parallel_table_scan_size; |
| RESET parallel_setup_cost; |
| RESET parallel_tuple_cost; |
| |
| -- |
| -- Test the planner's ability to use a LIMIT 1 instead of a Unique node when |
| -- all of the distinct_pathkeys have been marked as redundant |
| -- |
| |
| -- Ensure we get a plan with a Limit 1 |
| EXPLAIN (COSTS OFF) |
| SELECT DISTINCT four FROM tenk1 WHERE four = 0; |
| |
| -- Ensure the above gives us the correct result |
| SELECT DISTINCT four FROM tenk1 WHERE four = 0; |
| |
| -- Ensure we get a plan with a Limit 1 |
| EXPLAIN (COSTS OFF) |
| SELECT DISTINCT four FROM tenk1 WHERE four = 0 AND two <> 0; |
| |
| -- Ensure no rows are returned |
| SELECT DISTINCT four FROM tenk1 WHERE four = 0 AND two <> 0; |
| |
| -- Ensure we get a plan with a Limit 1 when the SELECT list contains constants |
| EXPLAIN (COSTS OFF) |
| SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0; |
| |
| -- Ensure we only get 1 row |
| SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0; |
| |
| -- |
| -- Also, some tests of IS DISTINCT FROM, which doesn't quite deserve its |
| -- very own regression file. |
| -- |
| |
| CREATE TEMP TABLE disttable (f1 integer); |
| INSERT INTO DISTTABLE VALUES(1); |
| INSERT INTO DISTTABLE VALUES(2); |
| INSERT INTO DISTTABLE VALUES(3); |
| INSERT INTO DISTTABLE VALUES(NULL); |
| |
| -- basic cases |
| SELECT f1, f1 IS DISTINCT FROM 2 as "not 2" FROM disttable; |
| SELECT f1, f1 IS DISTINCT FROM NULL as "not null" FROM disttable; |
| SELECT f1, f1 IS DISTINCT FROM f1 as "false" FROM disttable; |
| SELECT f1, f1 IS DISTINCT FROM f1+1 as "not null" FROM disttable; |
| |
| -- check that optimizer constant-folds it properly |
| SELECT 1 IS DISTINCT FROM 2 as "yes"; |
| SELECT 2 IS DISTINCT FROM 2 as "no"; |
| SELECT 2 IS DISTINCT FROM null as "yes"; |
| SELECT null IS DISTINCT FROM null as "no"; |
| |
| -- negated form |
| SELECT 1 IS NOT DISTINCT FROM 2 as "no"; |
| SELECT 2 IS NOT DISTINCT FROM 2 as "yes"; |
| SELECT 2 IS NOT DISTINCT FROM null as "no"; |
| SELECT null IS NOT DISTINCT FROM null as "yes"; |
| |
| -- join cases |
| -- test IS DISTINCT FROM and IS NOT DISTINCT FROM join qual.The postgres planner doesn't support hash join on |
| -- IS NOT DISTINCT FROM for now, ORCA supports Hash Join on "IS NOT DISTINCT FROM". |
| |
| CREATE TABLE distinct_1(a int); |
| CREATE TABLE distinct_2(a int); |
| INSERT INTO distinct_1 VALUES(1),(2),(NULL); |
| INSERT INTO distinct_2 VALUES(1),(NULL); |
| |
| EXPLAIN SELECT * FROM distinct_1, distinct_2 WHERE distinct_1.a IS DISTINCT FROM distinct_2.a; |
| EXPLAIN SELECT * FROM distinct_1 left join distinct_2 on distinct_1.a IS DISTINCT FROM distinct_2.a; |
| EXPLAIN SELECT * FROM distinct_1 right join distinct_2 on distinct_1.a IS DISTINCT FROM distinct_2.a; |
| |
| EXPLAIN SELECT * FROM distinct_1, distinct_2 WHERE distinct_1.a IS NOT DISTINCT FROM distinct_2.a; |
| EXPLAIN SELECT * FROM distinct_1 left join distinct_2 on distinct_1.a IS NOT DISTINCT FROM distinct_2.a; |
| EXPLAIN SELECT * FROM distinct_1 right join distinct_2 on distinct_1.a IS NOT DISTINCT FROM distinct_2.a; |
| |
| SELECT * FROM distinct_1, distinct_2 WHERE distinct_1.a IS DISTINCT FROM distinct_2.a; |
| SELECT * FROM distinct_1 left join distinct_2 on distinct_1.a IS DISTINCT FROM distinct_2.a; |
| SELECT * FROM distinct_1 right join distinct_2 on distinct_1.a IS DISTINCT FROM distinct_2.a; |
| |
| SELECT * FROM distinct_1, distinct_2 WHERE distinct_1.a IS NOT DISTINCT FROM distinct_2.a; |
| SELECT * FROM distinct_1 left join distinct_2 on distinct_1.a IS NOT DISTINCT FROM distinct_2.a; |
| SELECT * FROM distinct_1 right join distinct_2 on distinct_1.a IS NOT DISTINCT FROM distinct_2.a; |
| |
| DROP TABLE distinct_1; |
| DROP TABLE distinct_2; |
| |
| |
| -- gpdb start: test inherit/partition table distinct when gp_statistics_pullup_from_child_partition is on |
| set gp_statistics_pullup_from_child_partition to on; |
| CREATE TABLE sales (id int, date date, amt decimal(10,2)) |
| DISTRIBUTED BY (id); |
| insert into sales values (1,'20210202',20), (2,'20210602',9) ,(3,'20211002',100); |
| select distinct * from sales order by 1; |
| select distinct sales from sales order by 1; |
| CREATE TABLE sales_partition (id int, date date, amt decimal(10,2)) |
| DISTRIBUTED BY (id) |
| PARTITION BY RANGE (date) |
| ( START (date '2021-01-01') INCLUSIVE |
| END (date '2022-01-01') EXCLUSIVE |
| EVERY (INTERVAL '1 month') ); |
| insert into sales_partition values (1,'20210202',20), (2,'20210602',9) ,(3,'20211002',100); |
| select distinct * from sales_partition order by 1; |
| select distinct sales_partition from sales_partition order by 1; |
| DROP TABLE sales; |
| DROP TABLE sales_partition; |
| |
| CREATE TABLE cities ( |
| name text, |
| population float, |
| altitude int |
| ); |
| CREATE TABLE capitals ( |
| state char(2) |
| ) INHERITS (cities); |
| select distinct * from cities; |
| select distinct cities from cities; |
| DROP TABLE capitals; |
| DROP TABLE cities; |
| set gp_statistics_pullup_from_child_partition to off; |
| -- gpdb end: test inherit/partition table distinct when gp_statistics_pullup_from_child_partition is on |
| |
| create table t_distinct_sort(a int, b int, c int); |
| insert into t_distinct_sort select i, i+1, i+2 from generate_series(1, 10)i; |
| insert into t_distinct_sort select i, i+1, i+2 from generate_series(1, 10)i; |
| insert into t_distinct_sort select i, i+1, i+2 from generate_series(1, 10)i; |
| analyze t_distinct_sort; |
| |
| explain(verbose, costs off) |
| select distinct count(a), sum(b) from t_distinct_sort order by sum(b), count(a); |
| select distinct count(a), sum(b) from t_distinct_sort order by sum(b), count(a); |
| explain(verbose, costs off) |
| select distinct on(count(b), count(c)) count(a), sum(b) from t_distinct_sort order by count(c); |
| select distinct on(count(b), count(c)) count(a), sum(b) from t_distinct_sort order by count(c); |
| explain(verbose, costs off) |
| select count(a), sum(b) from t_distinct_sort order by sum(a), count(c); |
| select count(a), sum(b) from t_distinct_sort order by sum(a), count(c); |
| explain(verbose, costs off) |
| select distinct count(a), sum(b) from t_distinct_sort ; |
| select distinct count(a), sum(b) from t_distinct_sort ; |
| |
| -- should keep distinct clause |
| explain(verbose, costs off) |
| select distinct on(count(random())) count(a), sum(b) from t_distinct_sort; |
| select distinct on(count(random())) count(a), sum(b) from t_distinct_sort; |
| |
| explain(verbose, costs off) |
| select distinct(count(a)) from t_distinct_sort, (select distinct(count(*)), generate_series( |
| 0, 2) from t_distinct_sort)as xx; |
| select distinct(count(a)) from t_distinct_sort, (select distinct(count(*)), generate_series( |
| 0, 2) from t_distinct_sort)as xx; |
| drop table t_distinct_sort; |
| |
| explain(verbose, costs off) |
| select distinct(count(a)) from generate_series(0, 1) as a; |
| select distinct(count(a)) from generate_series(0, 1) as a; |
| explain(verbose, costs off) |
| select distinct(count(*)) from generate_series(0, 1) a join generate_series(0, 2) b on true; |
| select distinct(count(*)) from generate_series(0, 1) a join generate_series(0, 2) b on true; |
| -- please refer to https://github.com/greenplum-db/gpdb/issues/15033 |
| CREATE TABLE t1_issue_15033(c DECIMAL CHECK (0.4 IS DISTINCT FROM 0.3)); |
| CREATE TABLE t2_issue_15033(c DECIMAL CHECK (0.4 IS NOT DISTINCT FROM 0.3)); |
| INSERT INTO t1_issue_15033 VALUES(10); |
| SELECT * FROM t1_issue_15033; |
| INSERT INTO t2_issue_15033 VALUES(10); |
| SELECT * FROM t2_issue_15033; |