blob: a8f221ec110bf17da02ddbe8d8cf65737fd42d27 [file] [log] [blame]
--
-- SELECT_DISTINCT
--
--
-- awk '{print $3;}' onek.data | sort -n | uniq
--
SELECT DISTINCT two FROM onek ORDER BY 1;
two
-----
0
1
(2 rows)
--
-- awk '{print $5;}' onek.data | sort -n | uniq
--
SELECT DISTINCT ten FROM onek ORDER BY 1;
ten
-----
0
1
2
3
4
5
6
7
8
9
(10 rows)
--
-- awk '{print $16;}' onek.data | sort -d | uniq
--
SELECT DISTINCT string4 FROM onek ORDER BY 1;
string4
---------
AAAAxx
HHHHxx
OOOOxx
VVVVxx
(4 rows)
--
-- awk '{print $3,$16,$5;}' onek.data | sort -d | uniq |
-- sort +0n -1 +1d -2 +2n -3
--
SELECT DISTINCT two, string4, ten
FROM onek
ORDER BY two using <, string4 using <, ten using <;
two | string4 | ten
-----+---------+-----
0 | AAAAxx | 0
0 | AAAAxx | 2
0 | AAAAxx | 4
0 | AAAAxx | 6
0 | AAAAxx | 8
0 | HHHHxx | 0
0 | HHHHxx | 2
0 | HHHHxx | 4
0 | HHHHxx | 6
0 | HHHHxx | 8
0 | OOOOxx | 0
0 | OOOOxx | 2
0 | OOOOxx | 4
0 | OOOOxx | 6
0 | OOOOxx | 8
0 | VVVVxx | 0
0 | VVVVxx | 2
0 | VVVVxx | 4
0 | VVVVxx | 6
0 | VVVVxx | 8
1 | AAAAxx | 1
1 | AAAAxx | 3
1 | AAAAxx | 5
1 | AAAAxx | 7
1 | AAAAxx | 9
1 | HHHHxx | 1
1 | HHHHxx | 3
1 | HHHHxx | 5
1 | HHHHxx | 7
1 | HHHHxx | 9
1 | OOOOxx | 1
1 | OOOOxx | 3
1 | OOOOxx | 5
1 | OOOOxx | 7
1 | OOOOxx | 9
1 | VVVVxx | 1
1 | VVVVxx | 3
1 | VVVVxx | 5
1 | VVVVxx | 7
1 | VVVVxx | 9
(40 rows)
--
-- awk '{print $2;}' person.data |
-- awk '{if(NF!=1){print $2;}else{print;}}' - emp.data |
-- awk '{if(NF!=1){print $2;}else{print;}}' - student.data |
-- awk 'BEGIN{FS=" ";}{if(NF!=1){print $5;}else{print;}}' - stud_emp.data |
-- sort -n -r | uniq
--
SELECT DISTINCT p.age FROM person* p ORDER BY age using >;
age
-----
98
88
78
68
60
58
50
48
40
38
34
30
28
25
24
23
20
19
18
8
(20 rows)
--
-- Check mentioning same column more than once
--
EXPLAIN (VERBOSE, COSTS OFF)
SELECT count(*) FROM
(SELECT DISTINCT two, four, two FROM tenk1) ss;
QUERY PLAN
-----------------------------------------------------------------------------
Aggregate
Output: count(*)
-> Gather Motion 3:1 (slice1; segments: 3)
-> GroupAggregate
Group Key: tenk1.two, tenk1.four, tenk1.two
-> Sort
Output: two, four
Sort Key: tenk1.two, tenk1.four, tenk1.two
-> Redistribute Motion 3:3 (slice2; segments: 3)
Output: two, four
Hash Key: two, four, two
-> Streaming HashAggregate
Output: two, four
Group Key: tenk1.two, tenk1.four, tenk1.two
-> Seq Scan on public.tenk1
Output: two, four
Optimizer: Pivotal Optimizer (GPORCA)
(17 rows)
SELECT count(*) FROM
(SELECT DISTINCT two, four, two FROM tenk1) ss;
count
-------
4
(1 row)
--
-- Compare results between plans using sorting and plans using hash
-- aggregation. Force spilling in both cases by setting work_mem low.
--
SET work_mem='64kB';
-- Produce results with sorting.
SET enable_hashagg=FALSE;
SET optimizer_enable_hashagg=FALSE;
SET jit_above_cost=0;
EXPLAIN (costs off)
SELECT DISTINCT g%1000 FROM generate_series(0,9999) g;
QUERY PLAN
----------------------------------------------
GroupAggregate
Group Key: ((generate_series % 1000))
-> Sort
Sort Key: ((generate_series % 1000))
-> Function Scan on generate_series
Optimizer: Pivotal Optimizer (GPORCA)
(6 rows)
CREATE TABLE distinct_group_1 AS
SELECT DISTINCT g%1000 FROM generate_series(0,9999) g;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry.
SET jit_above_cost TO DEFAULT;
CREATE TABLE distinct_group_2 AS
SELECT DISTINCT (g%1000)::text FROM generate_series(0,9999) g;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry.
SET enable_hashagg=TRUE;
SET optimizer_enable_hashagg=TRUE;
-- Produce results with hash aggregation.
SET enable_sort=FALSE;
SET jit_above_cost=0;
EXPLAIN (costs off)
SELECT DISTINCT g%1000 FROM generate_series(0,9999) g;
QUERY PLAN
----------------------------------------
HashAggregate
Group Key: (generate_series % 1000)
-> Function Scan on generate_series
Optimizer: Pivotal Optimizer (GPORCA)
(4 rows)
CREATE TABLE distinct_hash_1 AS
SELECT DISTINCT g%1000 FROM generate_series(0,9999) g;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry.
SET jit_above_cost TO DEFAULT;
CREATE TABLE distinct_hash_2 AS
SELECT DISTINCT (g%1000)::text FROM generate_series(0,9999) g;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry.
SET enable_sort=TRUE;
SET work_mem TO DEFAULT;
-- Compare results
(SELECT * FROM distinct_hash_1 EXCEPT SELECT * FROM distinct_group_1)
UNION ALL
(SELECT * FROM distinct_group_1 EXCEPT SELECT * FROM distinct_hash_1);
?column?
----------
(0 rows)
(SELECT * FROM distinct_hash_1 EXCEPT SELECT * FROM distinct_group_1)
UNION ALL
(SELECT * FROM distinct_group_1 EXCEPT SELECT * FROM distinct_hash_1);
?column?
----------
(0 rows)
DROP TABLE distinct_hash_1;
DROP TABLE distinct_hash_2;
DROP TABLE distinct_group_1;
DROP TABLE distinct_group_2;
--
-- Also, some tests of IS DISTINCT FROM, which doesn't quite deserve its
-- very own regression file.
--
CREATE TEMP TABLE disttable (f1 integer);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'f1' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
INSERT INTO DISTTABLE VALUES(1);
INSERT INTO DISTTABLE VALUES(2);
INSERT INTO DISTTABLE VALUES(3);
INSERT INTO DISTTABLE VALUES(NULL);
-- basic cases
SELECT f1, f1 IS DISTINCT FROM 2 as "not 2" FROM disttable;
f1 | not 2
----+-------
2 | f
3 | t
| t
1 | t
(4 rows)
SELECT f1, f1 IS DISTINCT FROM NULL as "not null" FROM disttable;
f1 | not null
----+----------
2 | t
3 | t
| f
1 | t
(4 rows)
SELECT f1, f1 IS DISTINCT FROM f1 as "false" FROM disttable;
f1 | false
----+-------
2 | f
3 | f
| f
1 | f
(4 rows)
SELECT f1, f1 IS DISTINCT FROM f1+1 as "not null" FROM disttable;
f1 | not null
----+----------
2 | t
3 | t
| f
1 | t
(4 rows)
-- check that optimizer constant-folds it properly
SELECT 1 IS DISTINCT FROM 2 as "yes";
yes
-----
t
(1 row)
SELECT 2 IS DISTINCT FROM 2 as "no";
no
----
f
(1 row)
SELECT 2 IS DISTINCT FROM null as "yes";
yes
-----
t
(1 row)
SELECT null IS DISTINCT FROM null as "no";
no
----
f
(1 row)
-- negated form
SELECT 1 IS NOT DISTINCT FROM 2 as "no";
no
----
f
(1 row)
SELECT 2 IS NOT DISTINCT FROM 2 as "yes";
yes
-----
t
(1 row)
SELECT 2 IS NOT DISTINCT FROM null as "no";
no
----
f
(1 row)
SELECT null IS NOT DISTINCT FROM null as "yes";
yes
-----
t
(1 row)
-- join cases
-- test IS DISTINCT FROM and IS NOT DISTINCT FROM join qual.The postgres planner doesn't support hash join on
-- IS NOT DISTINCT FROM for now, ORCA supports Hash Join on "IS NOT DISTINCT FROM".
CREATE TABLE distinct_1(a int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
CREATE TABLE distinct_2(a int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
INSERT INTO distinct_1 VALUES(1),(2),(NULL);
INSERT INTO distinct_2 VALUES(1),(NULL);
EXPLAIN SELECT * FROM distinct_1, distinct_2 WHERE distinct_1.a IS DISTINCT FROM distinct_2.a;
QUERY PLAN
---------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324032.54 rows=1 width=8)
-> Nested Loop (cost=0.00..1324032.54 rows=1 width=8)
Join Filter: (distinct_1.a IS DISTINCT FROM distinct_2.a)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=4)
-> Seq Scan on distinct_1 (cost=0.00..431.00 rows=1 width=4)
-> Seq Scan on distinct_2 (cost=0.00..431.00 rows=1 width=4)
Optimizer: Pivotal Optimizer (GPORCA)
(7 rows)
EXPLAIN SELECT * FROM distinct_1 left join distinct_2 on distinct_1.a IS DISTINCT FROM distinct_2.a;
QUERY PLAN
---------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324032.55 rows=2 width=8)
-> Nested Loop Left Join (cost=0.00..1324032.55 rows=1 width=8)
Join Filter: (distinct_1.a IS DISTINCT FROM distinct_2.a)
-> Seq Scan on distinct_1 (cost=0.00..431.00 rows=1 width=4)
-> Materialize (cost=0.00..431.00 rows=1 width=4)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=4)
-> Seq Scan on distinct_2 (cost=0.00..431.00 rows=1 width=4)
Optimizer: Pivotal Optimizer (GPORCA)
(8 rows)
EXPLAIN SELECT * FROM distinct_1 right join distinct_2 on distinct_1.a IS DISTINCT FROM distinct_2.a;
QUERY PLAN
---------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..1324032.55 rows=2 width=8)
-> Nested Loop Left Join (cost=0.00..1324032.55 rows=1 width=8)
Join Filter: (distinct_1.a IS DISTINCT FROM distinct_2.a)
-> Seq Scan on distinct_2 (cost=0.00..431.00 rows=1 width=4)
-> Materialize (cost=0.00..431.00 rows=1 width=4)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=4)
-> Seq Scan on distinct_1 (cost=0.00..431.00 rows=1 width=4)
Optimizer: Pivotal Optimizer (GPORCA)
(8 rows)
EXPLAIN SELECT * FROM distinct_1, distinct_2 WHERE distinct_1.a IS NOT DISTINCT FROM distinct_2.a;
QUERY PLAN
------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=1 width=8)
-> Hash Join (cost=0.00..862.00 rows=1 width=8)
Hash Cond: (NOT (distinct_1.a IS DISTINCT FROM distinct_2.a))
-> Seq Scan on distinct_1 (cost=0.00..431.00 rows=1 width=4)
-> Hash (cost=431.00..431.00 rows=1 width=4)
-> Seq Scan on distinct_2 (cost=0.00..431.00 rows=1 width=4)
Optimizer: Pivotal Optimizer (GPORCA)
(7 rows)
EXPLAIN SELECT * FROM distinct_1 left join distinct_2 on distinct_1.a IS NOT DISTINCT FROM distinct_2.a;
QUERY PLAN
------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=2 width=8)
-> Hash Left Join (cost=0.00..862.00 rows=1 width=8)
Hash Cond: (NOT (distinct_1.a IS DISTINCT FROM distinct_2.a))
-> Seq Scan on distinct_1 (cost=0.00..431.00 rows=1 width=4)
-> Hash (cost=431.00..431.00 rows=1 width=4)
-> Seq Scan on distinct_2 (cost=0.00..431.00 rows=1 width=4)
Optimizer: Pivotal Optimizer (GPORCA)
(7 rows)
EXPLAIN SELECT * FROM distinct_1 right join distinct_2 on distinct_1.a IS NOT DISTINCT FROM distinct_2.a;
QUERY PLAN
------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=2 width=8)
-> Hash Left Join (cost=0.00..862.00 rows=1 width=8)
Hash Cond: (NOT (distinct_2.a IS DISTINCT FROM distinct_1.a))
-> Seq Scan on distinct_2 (cost=0.00..431.00 rows=1 width=4)
-> Hash (cost=431.00..431.00 rows=1 width=4)
-> Seq Scan on distinct_1 (cost=0.00..431.00 rows=1 width=4)
Optimizer: Pivotal Optimizer (GPORCA)
(7 rows)
SELECT * FROM distinct_1, distinct_2 WHERE distinct_1.a IS DISTINCT FROM distinct_2.a;
a | a
---+---
1 |
2 |
2 | 1
| 1
(4 rows)
SELECT * FROM distinct_1 left join distinct_2 on distinct_1.a IS DISTINCT FROM distinct_2.a;
a | a
---+---
2 |
2 | 1
| 1
1 |
(4 rows)
SELECT * FROM distinct_1 right join distinct_2 on distinct_1.a IS DISTINCT FROM distinct_2.a;
a | a
---+---
2 | 1
| 1
2 |
1 |
(4 rows)
SELECT * FROM distinct_1, distinct_2 WHERE distinct_1.a IS NOT DISTINCT FROM distinct_2.a;
a | a
---+---
|
1 | 1
(2 rows)
SELECT * FROM distinct_1 left join distinct_2 on distinct_1.a IS NOT DISTINCT FROM distinct_2.a;
a | a
---+---
1 | 1
2 |
|
(3 rows)
SELECT * FROM distinct_1 right join distinct_2 on distinct_1.a IS NOT DISTINCT FROM distinct_2.a;
a | a
---+---
1 | 1
|
(2 rows)
DROP TABLE distinct_1;
DROP TABLE distinct_2;
-- gpdb start: test inherit/partition table distinct when gp_statistics_pullup_from_child_partition is on
set gp_statistics_pullup_from_child_partition to on;
CREATE TABLE sales (id int, date date, amt decimal(10,2))
DISTRIBUTED BY (id);
insert into sales values (1,'20210202',20), (2,'20210602',9) ,(3,'20211002',100);
select distinct * from sales order by 1;
id | date | amt
----+------------+--------
1 | 02-02-2021 | 20.00
2 | 06-02-2021 | 9.00
3 | 10-02-2021 | 100.00
(3 rows)
select distinct sales from sales order by 1;
sales
-----------------------
(1,02-02-2021,20.00)
(2,06-02-2021,9.00)
(3,10-02-2021,100.00)
(3 rows)
CREATE TABLE sales_partition (id int, date date, amt decimal(10,2))
DISTRIBUTED BY (id)
PARTITION BY RANGE (date)
( START (date '2021-01-01') INCLUSIVE
END (date '2022-01-01') EXCLUSIVE
EVERY (INTERVAL '1 month') );
insert into sales_partition values (1,'20210202',20), (2,'20210602',9) ,(3,'20211002',100);
select distinct * from sales_partition order by 1;
id | date | amt
----+------------+--------
1 | 02-02-2021 | 20.00
2 | 06-02-2021 | 9.00
3 | 10-02-2021 | 100.00
(3 rows)
select distinct sales_partition from sales_partition order by 1;
sales_partition
-----------------------
(1,02-02-2021,20.00)
(2,06-02-2021,9.00)
(3,10-02-2021,100.00)
(3 rows)
DROP TABLE sales;
DROP TABLE sales_partition;
CREATE TABLE cities (
name text,
population float,
altitude int
);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'name' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
CREATE TABLE capitals (
state char(2)
) INHERITS (cities);
NOTICE: table has parent, setting distribution columns to match parent table
select distinct * from cities;
name | population | altitude
------+------------+----------
(0 rows)
select distinct cities from cities;
cities
--------
(0 rows)
DROP TABLE capitals;
DROP TABLE cities;
set gp_statistics_pullup_from_child_partition to off;
-- gpdb end: test inherit/partition table distinct when gp_statistics_pullup_from_child_partition is on
create table t_distinct_sort(a int, b int, c int);
insert into t_distinct_sort select i, i+1, i+2 from generate_series(1, 10)i;
insert into t_distinct_sort select i, i+1, i+2 from generate_series(1, 10)i;
insert into t_distinct_sort select i, i+1, i+2 from generate_series(1, 10)i;
analyze t_distinct_sort;
explain(verbose, costs off)
select distinct count(a), sum(b) from t_distinct_sort order by sum(b), count(a);
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------
Sort
Output: (count(a)), (sum(b))
Sort Key: (sum(t_distinct_sort.b)), (count(t_distinct_sort.a))
-> Finalize Aggregate
Output: count(a), sum(b)
-> Gather Motion 3:1 (slice1; segments: 3)
Output: (PARTIAL count(a)), (PARTIAL sum(b))
-> Partial Aggregate
Output: PARTIAL count(a), PARTIAL sum(b)
-> Seq Scan on public.t_distinct_sort
Output: a, b
Settings: enable_hashagg = 'on', enable_sort = 'on', gp_statistics_pullup_from_child_partition = 'off', optimizer = 'on'
Optimizer: Pivotal Optimizer (GPORCA)
(13 rows)
select distinct count(a), sum(b) from t_distinct_sort order by sum(b), count(a);
count | sum
-------+-----
30 | 195
(1 row)
explain(verbose, costs off)
select distinct on(count(b), count(c)) count(a), sum(b) from t_distinct_sort order by count(c);
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------
Finalize Aggregate
Output: count(a), sum(b)
-> Gather Motion 3:1 (slice1; segments: 3)
Output: (PARTIAL count(a)), (PARTIAL sum(b))
-> Partial Aggregate
Output: PARTIAL count(a), PARTIAL sum(b)
-> Seq Scan on public.t_distinct_sort
Output: a, b, c
Settings: enable_hashagg = 'on', enable_sort = 'on', gp_statistics_pullup_from_child_partition = 'off', optimizer = 'on'
Optimizer: Postgres query optimizer
(10 rows)
select distinct on(count(b), count(c)) count(a), sum(b) from t_distinct_sort order by count(c);
count | sum
-------+-----
30 | 195
(1 row)
explain(verbose, costs off)
select count(a), sum(b) from t_distinct_sort order by sum(a), count(c);
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------
Result
Output: (count(a)), (sum(b))
-> Sort
Output: (count(a)), (sum(b)), (sum(a)), (count(c))
Sort Key: (sum(t_distinct_sort.a)), (count(t_distinct_sort.c))
-> Finalize Aggregate
Output: count(a), sum(b), sum(a), count(c)
-> Gather Motion 3:1 (slice1; segments: 3)
Output: (PARTIAL count(a)), (PARTIAL sum(b)), (PARTIAL sum(a)), (PARTIAL count(c))
-> Partial Aggregate
Output: PARTIAL count(a), PARTIAL sum(b), PARTIAL sum(a), PARTIAL count(c)
-> Seq Scan on public.t_distinct_sort
Output: a, b, c
Settings: enable_hashagg = 'on', enable_sort = 'on', gp_statistics_pullup_from_child_partition = 'off', optimizer = 'on'
Optimizer: Pivotal Optimizer (GPORCA)
(15 rows)
select count(a), sum(b) from t_distinct_sort order by sum(a), count(c);
count | sum
-------+-----
30 | 195
(1 row)
explain(verbose, costs off)
select distinct count(a), sum(b) from t_distinct_sort ;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------
Finalize Aggregate
Output: count(a), sum(b)
-> Gather Motion 3:1 (slice1; segments: 3)
Output: (PARTIAL count(a)), (PARTIAL sum(b))
-> Partial Aggregate
Output: PARTIAL count(a), PARTIAL sum(b)
-> Seq Scan on public.t_distinct_sort
Output: a, b
Settings: enable_hashagg = 'on', enable_sort = 'on', gp_statistics_pullup_from_child_partition = 'off', optimizer = 'on'
Optimizer: Pivotal Optimizer (GPORCA)
(10 rows)
select distinct count(a), sum(b) from t_distinct_sort ;
count | sum
-------+-----
30 | 195
(1 row)
-- should keep distinct clause
explain(verbose, costs off)
select distinct on(count(random())) count(a), sum(b) from t_distinct_sort;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------
Unique
Output: (count(a)), (sum(b)), (count(random()))
Group Key: (count(random()))
-> Sort
Output: (count(a)), (sum(b)), (count(random()))
Sort Key: (count(random()))
-> Finalize Aggregate
Output: count(a), sum(b), count(random())
-> Gather Motion 3:1 (slice1; segments: 3)
Output: (PARTIAL count(a)), (PARTIAL sum(b)), (PARTIAL count(random()))
-> Partial Aggregate
Output: PARTIAL count(a), PARTIAL sum(b), PARTIAL count(random())
-> Seq Scan on public.t_distinct_sort
Output: a, b, c
Settings: enable_hashagg = 'on', enable_sort = 'on', gp_statistics_pullup_from_child_partition = 'off', optimizer = 'on'
Optimizer: Postgres query optimizer
(16 rows)
select distinct on(count(random())) count(a), sum(b) from t_distinct_sort;
count | sum
-------+-----
30 | 195
(1 row)
explain(verbose, costs off)
select distinct(count(a)) from t_distinct_sort, (select distinct(count(*)), generate_series(
0, 2) from t_distinct_sort)as xx;
QUERY PLAN
--------------------------------------------------------------------------------------------------------
Finalize Aggregate
Output: count(t_distinct_sort_1.a)
-> Gather Motion 3:1 (slice1; segments: 3)
Output: (PARTIAL count(t_distinct_sort_1.a))
-> Partial Aggregate
Output: PARTIAL count(t_distinct_sort_1.a)
-> Nested Loop
Output: t_distinct_sort_1.a
Join Filter: true
-> Seq Scan on public.t_distinct_sort t_distinct_sort_1
Output: t_distinct_sort_1.a
-> Materialize
-> Broadcast Motion 1:3 (slice2)
-> Result
-> ProjectSet
Output: generate_series(0, 2)
-> Finalize Aggregate
Output: count(*)
-> Gather Motion 3:1 (slice3; segments: 3)
Output: (PARTIAL count(*))
-> Partial Aggregate
Output: PARTIAL count(*)
-> Seq Scan on public.t_distinct_sort
Settings: enable_hashagg = 'on', enable_sort = 'on', gp_statistics_pullup_from_child_partition = 'off'
Optimizer: Pivotal Optimizer (GPORCA)
(25 rows)
select distinct(count(a)) from t_distinct_sort, (select distinct(count(*)), generate_series(
0, 2) from t_distinct_sort)as xx;
count
-------
90
(1 row)
drop table t_distinct_sort;
explain(verbose, costs off)
select distinct(count(a)) from generate_series(0, 1) as a;
QUERY PLAN
--------------------------------------------------------------------------------------------------------
Aggregate
Output: count(generate_series)
-> Function Scan on pg_catalog.generate_series
Output: generate_series
Function Call: generate_series(0, 1)
Settings: enable_hashagg = 'on', enable_sort = 'on', gp_statistics_pullup_from_child_partition = 'off'
Optimizer: Pivotal Optimizer (GPORCA)
(7 rows)
select distinct(count(a)) from generate_series(0, 1) as a;
count
-------
2
(1 row)
explain(verbose, costs off)
select distinct(count(*)) from generate_series(0, 1) a join generate_series(0, 2) b on true;
QUERY PLAN
--------------------------------------------------------------------------------------------------------
Aggregate
Output: count(*)
-> Nested Loop
Join Filter: true
-> Function Scan on pg_catalog.generate_series generate_series_1
Output: generate_series_1.generate_series
Function Call: generate_series(0, 1)
-> Function Scan on pg_catalog.generate_series
Output: generate_series.generate_series
Function Call: generate_series(0, 2)
Settings: enable_hashagg = 'on', enable_sort = 'on', gp_statistics_pullup_from_child_partition = 'off'
Optimizer: Pivotal Optimizer (GPORCA)
(12 rows)
select distinct(count(*)) from generate_series(0, 1) a join generate_series(0, 2) b on true;
count
-------
6
(1 row)
-- please refer to https://github.com/greenplum-db/gpdb/issues/15033
CREATE TABLE t1_issue_15033(c DECIMAL CHECK (0.4 IS DISTINCT FROM 0.3));
CREATE TABLE t2_issue_15033(c DECIMAL CHECK (0.4 IS NOT DISTINCT FROM 0.3));
INSERT INTO t1_issue_15033 VALUES(10);
SELECT * FROM t1_issue_15033;
c
----
10
(1 row)
INSERT INTO t2_issue_15033 VALUES(10);
ERROR: new row for relation "t2_issue_15033" violates check constraint "t2_issue_15033_check" (seg2 127.0.0.1:7004 pid=3634583)
DETAIL: Failing row contains (10).
SELECT * FROM t2_issue_15033;
c
---
(0 rows)