| -- The last digits of some of the results vary from one invocation to another, |
| -- because the intermediate operations are done in arbitrary order when rows |
| -- are flowing from different segments in different order. Mask those |
| -- differences by setting 'extra_float_digits'. This isn't enough for all of |
| -- the queries, so a few also use TO_CHAR() to truncate the results further. |
| set extra_float_digits=-1; |
| SET optimizer_trace_fallback to on; |
| drop table if exists dqa_t1; |
| NOTICE: table "dqa_t1" does not exist, skipping |
| drop table if exists dqa_t2; |
| NOTICE: table "dqa_t2" does not exist, skipping |
| create table dqa_t1 (d int, i int, c char, dt date); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'd' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| create table dqa_t2 (d int, i int, c char, dt date); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'd' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| insert into dqa_t1 select i%23, i%12, (i%10) || '', '2009-06-10'::date + ( (i%34) || ' days')::interval |
| from generate_series(0, 99) i; |
| insert into dqa_t2 select i%34, i%45, (i%10) || '', '2009-06-10'::date + ( (i%56) || ' days')::interval |
| from generate_series(0, 99) i; |
| analyze dqa_t1; |
| analyze dqa_t2; |
| -- With the default very small cost, the planner often prefer to just Gather |
| -- all the rows to the QD. We want to test the more complicated multi-tage DQA |
| -- plans here, without using a huge number of rows. |
| -- GPDB_14_MERGE_FIXME: `DedupCost` seems to be computed wrongly caused by pg's |
| -- change of get_agg_clause_costs implementation. |
| set gp_motion_cost_per_row=2; |
| set enable_hashagg=on; |
| set enable_groupagg=off; |
| -- Also run EXPLAIN on each of these queries, to make sure you get an efficient plan, |
| -- and not e.g. a naive one that just pulls all the rows to the QD. |
| -- Distinct keys are distribution keys |
| select count(distinct d) from dqa_t1; |
| count |
| ------- |
| 23 |
| (1 row) |
| |
| explain (costs off) select count(distinct d) from dqa_t1; |
| QUERY PLAN |
| ------------------------------------------------------ |
| Finalize Aggregate |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Partial Aggregate |
| -> Seq Scan on dqa_t1 |
| Optimizer: Pivotal Optimizer (GPORCA) version 3.11.0 |
| (5 rows) |
| |
| select count(distinct d) from dqa_t1 group by i; |
| count |
| ------- |
| 9 |
| 8 |
| 8 |
| 9 |
| 8 |
| 8 |
| 9 |
| 8 |
| 9 |
| 8 |
| 8 |
| 8 |
| (12 rows) |
| |
| explain (costs off) select count(distinct d) from dqa_t1 group by i; |
| QUERY PLAN |
| ------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Finalize HashAggregate |
| Group Key: i |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: i |
| -> Partial GroupAggregate |
| Group Key: i |
| -> Sort |
| Sort Key: i, d |
| -> Seq Scan on dqa_t1 |
| Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0 |
| (11 rows) |
| |
| select count(distinct d), sum(distinct d) from dqa_t1 group by i; |
| count | sum |
| -------+----- |
| 9 | 73 |
| 9 | 64 |
| 8 | 100 |
| 8 | 71 |
| 8 | 56 |
| 8 | 108 |
| 8 | 86 |
| 9 | 82 |
| 8 | 116 |
| 8 | 92 |
| 8 | 101 |
| 9 | 91 |
| (12 rows) |
| |
| explain (costs off) select count(distinct d), sum(distinct d) from dqa_t1 group by i; |
| QUERY PLAN |
| ------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Finalize HashAggregate |
| Group Key: i |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: i |
| -> Partial GroupAggregate |
| Group Key: i |
| -> Sort |
| Sort Key: i, d |
| -> Seq Scan on dqa_t1 |
| Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0 |
| (11 rows) |
| |
| select count(distinct d), count(distinct dt) from dqa_t1; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| count | count |
| -------+------- |
| 23 | 34 |
| (1 row) |
| |
| explain (costs off) select count(distinct d), count(distinct dt) from dqa_t1; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| QUERY PLAN |
| ------------------------------------------------------------------ |
| Finalize Aggregate |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Partial Aggregate |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: d, dt, (AggExprId) |
| -> Streaming HashAggregate |
| Group Key: AggExprId, d, dt |
| -> TupleSplit |
| Split by Col: (d), (dt) |
| -> Seq Scan on dqa_t1 |
| Optimizer: Postgres query optimizer |
| (11 rows) |
| |
| select count(distinct d), count(distinct c), count(distinct dt) from dqa_t1; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| count | count | count |
| -------+-------+------- |
| 23 | 10 | 34 |
| (1 row) |
| |
| explain (costs off) select count(distinct d), count(distinct c), count(distinct dt) from dqa_t1; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| QUERY PLAN |
| ------------------------------------------------------------------ |
| Finalize Aggregate |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Partial Aggregate |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: d, c, dt, (AggExprId) |
| -> Streaming HashAggregate |
| Group Key: AggExprId, d, c, dt |
| -> TupleSplit |
| Split by Col: (d), (c), (dt) |
| -> Seq Scan on dqa_t1 |
| Optimizer: Postgres query optimizer |
| (11 rows) |
| |
| select count(distinct d), count(distinct dt) from dqa_t1 group by c; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| count | count |
| -------+------- |
| 10 | 10 |
| 10 | 10 |
| 10 | 10 |
| 10 | 10 |
| 10 | 10 |
| 10 | 10 |
| 10 | 10 |
| 10 | 10 |
| 10 | 10 |
| 10 | 10 |
| (10 rows) |
| |
| explain (costs off) select count(distinct d), count(distinct dt) from dqa_t1 group by c; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| QUERY PLAN |
| ------------------------------------------------------------------------ |
| Finalize HashAggregate |
| Group Key: c |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Partial HashAggregate |
| Group Key: c |
| -> HashAggregate |
| Group Key: (AggExprId), d, dt, c |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: c, d, dt, (AggExprId) |
| -> Streaming HashAggregate |
| Group Key: AggExprId, d, dt, c |
| -> TupleSplit |
| Split by Col: (d), (dt) |
| Group Key: c |
| -> Seq Scan on dqa_t1 |
| Optimizer: Postgres query optimizer |
| (16 rows) |
| |
| select count(distinct d), count(distinct dt) from dqa_t1 group by d; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| count | count |
| -------+------- |
| 1 | 5 |
| 1 | 4 |
| 1 | 4 |
| 1 | 4 |
| 1 | 4 |
| 1 | 5 |
| 1 | 4 |
| 1 | 4 |
| 1 | 4 |
| 1 | 5 |
| 1 | 5 |
| 1 | 4 |
| 1 | 5 |
| 1 | 4 |
| 1 | 4 |
| 1 | 4 |
| 1 | 5 |
| 1 | 4 |
| 1 | 5 |
| 1 | 4 |
| 1 | 4 |
| 1 | 4 |
| 1 | 5 |
| (23 rows) |
| |
| explain (costs off) select count(distinct d), count(distinct dt) from dqa_t1 group by d; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| QUERY PLAN |
| ------------------------------------------------------------------------ |
| Finalize HashAggregate |
| Group Key: d |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Partial HashAggregate |
| Group Key: d |
| -> HashAggregate |
| Group Key: (AggExprId), dt, d |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: d, dt, (AggExprId) |
| -> Streaming HashAggregate |
| Group Key: AggExprId, dt, d |
| -> TupleSplit |
| Split by Col: (d), (dt) |
| Group Key: d |
| -> Seq Scan on dqa_t1 |
| Optimizer: Postgres query optimizer |
| (16 rows) |
| |
| select count(distinct dqa_t1.d) from dqa_t1, dqa_t2 where dqa_t1.d = dqa_t2.d; |
| count |
| ------- |
| 23 |
| (1 row) |
| |
| explain (costs off) select count(distinct dqa_t1.d) from dqa_t1, dqa_t2 where dqa_t1.d = dqa_t2.d; |
| QUERY PLAN |
| ------------------------------------------------ |
| Finalize Aggregate |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Partial Aggregate |
| -> Hash Join |
| Hash Cond: (dqa_t1.d = dqa_t2.d) |
| -> Seq Scan on dqa_t1 |
| -> Hash |
| -> Seq Scan on dqa_t2 |
| Optimizer: Pivotal Optimizer (GPORCA) version 3.11.0 |
| (9 rows) |
| |
| select count(distinct dqa_t1.d) from dqa_t1, dqa_t2 where dqa_t1.d = dqa_t2.d group by dqa_t2.dt; |
| count |
| ------- |
| 1 |
| 2 |
| 1 |
| 1 |
| 1 |
| 1 |
| 1 |
| 2 |
| 1 |
| 2 |
| 2 |
| 1 |
| 2 |
| 1 |
| 1 |
| 1 |
| 1 |
| 2 |
| 2 |
| 1 |
| 2 |
| 1 |
| 1 |
| 1 |
| 1 |
| 1 |
| 1 |
| 1 |
| 1 |
| 1 |
| 2 |
| 1 |
| 1 |
| 1 |
| 1 |
| 1 |
| 1 |
| 1 |
| 1 |
| 1 |
| 2 |
| 1 |
| 2 |
| 2 |
| 1 |
| 1 |
| 1 |
| 1 |
| 2 |
| 1 |
| 1 |
| 1 |
| 1 |
| 1 |
| 1 |
| 1 |
| (56 rows) |
| |
| explain (costs off) select count(distinct dqa_t1.d) from dqa_t1, dqa_t2 where dqa_t1.d = dqa_t2.d group by dqa_t2.dt; |
| QUERY PLAN |
| ------------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> GroupAggregate |
| Group Key: dqa_t2.dt |
| -> Sort |
| Sort Key: dqa_t2.dt |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: dqa_t2.dt |
| -> Hash Join |
| Hash Cond: (dqa_t2.d = dqa_t1.d) |
| -> Seq Scan on dqa_t2 |
| -> Hash |
| -> Seq Scan on dqa_t1 |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (13 rows) |
| |
| -- Distinct keys are not distribution keys |
| select count(distinct c) from dqa_t1; |
| count |
| ------- |
| 10 |
| (1 row) |
| |
| explain (costs off) select count(distinct c) from dqa_t1; |
| QUERY PLAN |
| ------------------------------------------------------------------ |
| Finalize Aggregate |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Partial Aggregate |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: c |
| -> Seq Scan on dqa_t1 |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (7 rows) |
| |
| select count(distinct c) from dqa_t1 group by dt; |
| count |
| ------- |
| 3 |
| 3 |
| 3 |
| 3 |
| 3 |
| 3 |
| 3 |
| 3 |
| 3 |
| 3 |
| 3 |
| 3 |
| 3 |
| 3 |
| 3 |
| 3 |
| 2 |
| 3 |
| 3 |
| 3 |
| 3 |
| 3 |
| 3 |
| 3 |
| 3 |
| 3 |
| 3 |
| 3 |
| 3 |
| 3 |
| 3 |
| 3 |
| 3 |
| 2 |
| (34 rows) |
| |
| explain (costs off) select count(distinct c) from dqa_t1 group by dt; |
| QUERY PLAN |
| ------------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> GroupAggregate |
| Group Key: dt |
| -> Sort |
| Sort Key: dt |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: dt |
| -> Seq Scan on dqa_t1 |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (9 rows) |
| |
| select count(distinct c) from dqa_t1 group by d; |
| count |
| ------- |
| 5 |
| 4 |
| 4 |
| 4 |
| 5 |
| 5 |
| 4 |
| 4 |
| 4 |
| 5 |
| 4 |
| 4 |
| 5 |
| 4 |
| 4 |
| 5 |
| 4 |
| 4 |
| 4 |
| 5 |
| 4 |
| 4 |
| 5 |
| (23 rows) |
| |
| explain (costs off) select count(distinct c) from dqa_t1 group by d; |
| QUERY PLAN |
| ------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> GroupAggregate |
| Group Key: d |
| -> Sort |
| Sort Key: d |
| -> Seq Scan on dqa_t1 |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (7 rows) |
| |
| select count(distinct i), sum(distinct i) from dqa_t1 group by c; |
| count | sum |
| -------+----- |
| 6 | 30 |
| 6 | 36 |
| 6 | 30 |
| 6 | 36 |
| 6 | 36 |
| 6 | 30 |
| 6 | 30 |
| 6 | 36 |
| 6 | 36 |
| 6 | 30 |
| (10 rows) |
| |
| explain (costs off) select count(distinct i), sum(distinct i) from dqa_t1 group by c; |
| QUERY PLAN |
| ------------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> GroupAggregate |
| Group Key: c |
| -> Sort |
| Sort Key: c |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: c |
| -> Seq Scan on dqa_t1 |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (9 rows) |
| |
| select count(distinct c), count(distinct dt) from dqa_t1; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| count | count |
| -------+------- |
| 10 | 34 |
| (1 row) |
| |
| explain (costs off) select count(distinct c), count(distinct dt) from dqa_t1; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| QUERY PLAN |
| ------------------------------------------------------------------ |
| Finalize Aggregate |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Partial Aggregate |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: c, dt, (AggExprId) |
| -> Streaming HashAggregate |
| Group Key: AggExprId, c, dt |
| -> TupleSplit |
| Split by Col: (c), (dt) |
| -> Seq Scan on dqa_t1 |
| Optimizer: Postgres query optimizer |
| (11 rows) |
| |
| select count(distinct c), count(distinct dt), i from dqa_t1 group by i; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| count | count | i |
| -------+-------+---- |
| 5 | 9 | 3 |
| 5 | 8 | 11 |
| 5 | 8 | 5 |
| 5 | 8 | 9 |
| 5 | 9 | 1 |
| 5 | 8 | 7 |
| 5 | 9 | 2 |
| 5 | 8 | 6 |
| 5 | 9 | 0 |
| 5 | 8 | 8 |
| 5 | 8 | 10 |
| 5 | 8 | 4 |
| (12 rows) |
| |
| explain (costs off) select count(distinct c), count(distinct dt), i from dqa_t1 group by i; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| QUERY PLAN |
| ------------------------------------------------------------------------ |
| Finalize HashAggregate |
| Group Key: i |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Partial HashAggregate |
| Group Key: i |
| -> HashAggregate |
| Group Key: (AggExprId), c, dt, i |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: i, c, dt, (AggExprId) |
| -> Streaming HashAggregate |
| Group Key: AggExprId, c, dt, i |
| -> TupleSplit |
| Split by Col: (c), (dt) |
| Group Key: i |
| -> Seq Scan on dqa_t1 |
| Optimizer: Postgres query optimizer |
| (16 rows) |
| |
| select count(distinct i), count(distinct c), d from dqa_t1 group by d; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| count | count | d |
| -------+-------+---- |
| 5 | 5 | 3 |
| 4 | 4 | 21 |
| 4 | 4 | 19 |
| 4 | 4 | 11 |
| 4 | 4 | 13 |
| 5 | 5 | 5 |
| 4 | 4 | 15 |
| 4 | 4 | 9 |
| 4 | 4 | 17 |
| 5 | 5 | 1 |
| 5 | 5 | 7 |
| 4 | 4 | 20 |
| 5 | 5 | 2 |
| 4 | 4 | 14 |
| 4 | 4 | 12 |
| 4 | 4 | 18 |
| 5 | 5 | 6 |
| 4 | 4 | 22 |
| 5 | 5 | 0 |
| 4 | 4 | 16 |
| 4 | 4 | 8 |
| 4 | 4 | 10 |
| 5 | 5 | 4 |
| (23 rows) |
| |
| explain (costs off) select count(distinct i), count(distinct c), d from dqa_t1 group by d; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| QUERY PLAN |
| ------------------------------------------------------------------------ |
| Finalize HashAggregate |
| Group Key: d |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Partial HashAggregate |
| Group Key: d |
| -> HashAggregate |
| Group Key: (AggExprId), i, c, d |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: d, i, c, (AggExprId) |
| -> Streaming HashAggregate |
| Group Key: AggExprId, i, c, d |
| -> TupleSplit |
| Split by Col: (i), (c) |
| Group Key: d |
| -> Seq Scan on dqa_t1 |
| Optimizer: Postgres query optimizer |
| (16 rows) |
| |
| select count(distinct dqa_t1.dt) from dqa_t1, dqa_t2 where dqa_t1.c = dqa_t2.c; |
| count |
| ------- |
| 34 |
| (1 row) |
| |
| explain (costs off) select count(distinct dqa_t1.dt) from dqa_t1, dqa_t2 where dqa_t1.c = dqa_t2.c; |
| QUERY PLAN |
| ------------------------------------------------------------------------------------ |
| Finalize Aggregate |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Partial Aggregate |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: dqa_t1.dt |
| -> Hash Join |
| Hash Cond: (dqa_t1.c = dqa_t2.c) |
| -> Redistribute Motion 3:3 (slice3; segments: 3) |
| Hash Key: dqa_t1.c |
| -> Seq Scan on dqa_t1 |
| -> Hash |
| -> Redistribute Motion 3:3 (slice4; segments: 3) |
| Hash Key: dqa_t2.c |
| -> Seq Scan on dqa_t2 |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (15 rows) |
| |
| select count(distinct dqa_t1.dt) from dqa_t1, dqa_t2 where dqa_t1.c = dqa_t2.c group by dqa_t2.dt; |
| count |
| ------- |
| 10 |
| 13 |
| 13 |
| 14 |
| 14 |
| 13 |
| 14 |
| 10 |
| 10 |
| 13 |
| 13 |
| 10 |
| 14 |
| 13 |
| 13 |
| 14 |
| 10 |
| 13 |
| 13 |
| 13 |
| 13 |
| 13 |
| 14 |
| 14 |
| 10 |
| 14 |
| 14 |
| 14 |
| 13 |
| 14 |
| 13 |
| 14 |
| 14 |
| 13 |
| 10 |
| 13 |
| 14 |
| 13 |
| 13 |
| 10 |
| 13 |
| 14 |
| 13 |
| 10 |
| 13 |
| 14 |
| 14 |
| 10 |
| 14 |
| 13 |
| 13 |
| 10 |
| 10 |
| 13 |
| 14 |
| 14 |
| (56 rows) |
| |
| explain (costs off) select count(distinct dqa_t1.dt) from dqa_t1, dqa_t2 where dqa_t1.c = dqa_t2.c group by dqa_t2.dt; |
| QUERY PLAN |
| ------------------------------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> GroupAggregate |
| Group Key: dqa_t2.dt |
| -> Sort |
| Sort Key: dqa_t2.dt |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: dqa_t2.dt |
| -> Hash Join |
| Hash Cond: (dqa_t1.c = dqa_t2.c) |
| -> Redistribute Motion 3:3 (slice3; segments: 3) |
| Hash Key: dqa_t1.c |
| -> Seq Scan on dqa_t1 |
| -> Hash |
| -> Redistribute Motion 3:3 (slice4; segments: 3) |
| Hash Key: dqa_t2.c |
| -> Seq Scan on dqa_t2 |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (17 rows) |
| |
| -- multidqa with groupby and order by |
| select sum(distinct d), count(distinct i), count(distinct c),i,c from dqa_t1 group by i,c order by i,c; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| sum | count | count | i | c |
| -----+-------+-------+----+--- |
| 14 | 1 | 1 | 0 | 0 |
| 15 | 1 | 1 | 0 | 2 |
| 16 | 1 | 1 | 0 | 4 |
| 17 | 1 | 1 | 0 | 6 |
| 2 | 1 | 1 | 0 | 8 |
| 16 | 1 | 1 | 1 | 1 |
| 17 | 1 | 1 | 1 | 3 |
| 18 | 1 | 1 | 1 | 5 |
| 19 | 1 | 1 | 1 | 7 |
| 3 | 1 | 1 | 1 | 9 |
| 4 | 1 | 1 | 2 | 0 |
| 18 | 1 | 1 | 2 | 2 |
| 19 | 1 | 1 | 2 | 4 |
| 20 | 1 | 1 | 2 | 6 |
| 21 | 1 | 1 | 2 | 8 |
| 5 | 1 | 1 | 3 | 1 |
| 20 | 1 | 1 | 3 | 3 |
| 21 | 1 | 1 | 3 | 5 |
| 22 | 1 | 1 | 3 | 7 |
| 23 | 1 | 1 | 3 | 9 |
| 17 | 1 | 1 | 4 | 0 |
| 6 | 1 | 1 | 4 | 2 |
| 22 | 1 | 1 | 4 | 4 |
| 23 | 1 | 1 | 4 | 6 |
| 24 | 1 | 1 | 4 | 8 |
| 18 | 1 | 1 | 5 | 1 |
| 7 | 1 | 1 | 5 | 3 |
| 24 | 1 | 1 | 5 | 5 |
| 25 | 1 | 1 | 5 | 7 |
| 26 | 1 | 1 | 5 | 9 |
| 28 | 1 | 1 | 6 | 0 |
| 19 | 1 | 1 | 6 | 2 |
| 8 | 1 | 1 | 6 | 4 |
| 26 | 1 | 1 | 6 | 6 |
| 27 | 1 | 1 | 6 | 8 |
| 30 | 1 | 1 | 7 | 1 |
| 20 | 1 | 1 | 7 | 3 |
| 9 | 1 | 1 | 7 | 5 |
| 28 | 1 | 1 | 7 | 7 |
| 29 | 1 | 1 | 7 | 9 |
| 31 | 1 | 1 | 8 | 0 |
| 9 | 1 | 1 | 8 | 2 |
| 21 | 1 | 1 | 8 | 4 |
| 10 | 1 | 1 | 8 | 6 |
| 30 | 1 | 1 | 8 | 8 |
| 33 | 1 | 1 | 9 | 1 |
| 11 | 1 | 1 | 9 | 3 |
| 22 | 1 | 1 | 9 | 5 |
| 11 | 1 | 1 | 9 | 7 |
| 9 | 1 | 1 | 9 | 9 |
| 11 | 1 | 1 | 10 | 0 |
| 35 | 1 | 1 | 10 | 2 |
| 13 | 1 | 1 | 10 | 4 |
| 0 | 1 | 1 | 10 | 6 |
| 12 | 1 | 1 | 10 | 8 |
| 13 | 1 | 1 | 11 | 1 |
| 14 | 1 | 1 | 11 | 3 |
| 15 | 1 | 1 | 11 | 5 |
| 1 | 1 | 1 | 11 | 7 |
| 13 | 1 | 1 | 11 | 9 |
| (60 rows) |
| |
| explain (costs off) select sum(distinct d), count(distinct i), count(distinct c),i,c from dqa_t1 group by i,c order by i,c; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| QUERY PLAN |
| ------------------------------------------------------------------------------------ |
| Sort |
| Sort Key: i, c |
| -> Finalize HashAggregate |
| Group Key: i, c |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Partial HashAggregate |
| Group Key: i, c |
| -> HashAggregate |
| Group Key: (AggExprId), d, i, c |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: i, c, d, (AggExprId) |
| -> Streaming HashAggregate |
| Group Key: AggExprId, d, i, c |
| -> TupleSplit |
| Split by Col: (d), (i), (c) |
| Group Key: i, c |
| -> Seq Scan on dqa_t1 |
| Optimizer: Postgres query optimizer |
| (18 rows) |
| |
| -- multi args singledqa |
| select corr(distinct d, i) from dqa_t1; |
| corr |
| ------------------- |
| 0.082401334146002 |
| (1 row) |
| |
| explain (costs off) select corr(distinct d, i) from dqa_t1; |
| QUERY PLAN |
| ------------------------------------------------ |
| Aggregate |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Seq Scan on dqa_t1 |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (4 rows) |
| |
| -- multi args singledqa with group by |
| select corr(distinct d, i) from dqa_t1 group by d; |
| corr |
| ------ |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| (23 rows) |
| |
| explain (costs off) select corr(distinct d, i) from dqa_t1 group by d; |
| QUERY PLAN |
| ------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> GroupAggregate |
| Group Key: d |
| -> Sort |
| Sort Key: d |
| -> Seq Scan on dqa_t1 |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (7 rows) |
| |
| select to_char(corr(distinct d, i), '9.99999999999999') from dqa_t1 group by c; |
| to_char |
| ------------------- |
| .13670602618479 |
| .13670602618479 |
| .32622410426034 |
| -.11810476840833 |
| .07008652924496 |
| .07008652924496 |
| -.17582636927840 |
| -.17582636927840 |
| .42037777407962 |
| .05796784490862 |
| (10 rows) |
| |
| explain (costs off) select to_char(corr(distinct d, i), '9.99999999999999') from dqa_t1 group by c; |
| QUERY PLAN |
| ------------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> GroupAggregate |
| Group Key: c |
| -> Sort |
| Sort Key: c |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: c |
| -> Seq Scan on dqa_t1 |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (9 rows) |
| |
| -- multi args multidqa |
| select count(distinct c), corr(distinct d, i) from dqa_t1; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| count | corr |
| -------+------------------- |
| 10 | 0.082401334146002 |
| (1 row) |
| |
| explain (costs off) select count(distinct c), corr(distinct d, i) from dqa_t1; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| QUERY PLAN |
| ----------------------------------------------------------------------------------------------------- |
| Finalize Aggregate |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Partial Aggregate |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: c, ((d)::double precision), ((i)::double precision), (AggExprId) |
| -> Streaming HashAggregate |
| Group Key: AggExprId, c, ((d)::double precision), ((i)::double precision) |
| -> TupleSplit |
| Split by Col: (c), (((d)::double precision),((i)::double precision)) |
| -> Seq Scan on dqa_t1 |
| Optimizer: Postgres query optimizer |
| (11 rows) |
| |
| select count(distinct d), corr(distinct d, i) from dqa_t1; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| count | corr |
| -------+------------------- |
| 23 | 0.082401334146002 |
| (1 row) |
| |
| explain (costs off) select count(distinct d), corr(distinct d, i) from dqa_t1; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| QUERY PLAN |
| ----------------------------------------------------------------------------------------------------- |
| Finalize Aggregate |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Partial Aggregate |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: d, ((d)::double precision), ((i)::double precision), (AggExprId) |
| -> Streaming HashAggregate |
| Group Key: AggExprId, d, ((d)::double precision), ((i)::double precision) |
| -> TupleSplit |
| Split by Col: (d), (((d)::double precision),((i)::double precision)) |
| -> Seq Scan on dqa_t1 |
| Optimizer: Postgres query optimizer |
| (11 rows) |
| |
| select count(distinct d), count(distinct i), corr(distinct d, i) from dqa_t1; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| count | count | corr |
| -------+-------+------------------- |
| 23 | 12 | 0.082401334146002 |
| (1 row) |
| |
| explain (costs off) select count(distinct d), count(distinct i), corr(distinct d, i) from dqa_t1; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| QUERY PLAN |
| -------------------------------------------------------------------------------------------------------- |
| Finalize Aggregate |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Partial Aggregate |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: d, i, ((d)::double precision), ((i)::double precision), (AggExprId) |
| -> Streaming HashAggregate |
| Group Key: AggExprId, d, i, ((d)::double precision), ((i)::double precision) |
| -> TupleSplit |
| Split by Col: (d), (i), (((d)::double precision),((i)::double precision)) |
| -> Seq Scan on dqa_t1 |
| Optimizer: Postgres query optimizer |
| (11 rows) |
| |
| select count(distinct c), count(distinct d), count(distinct i), corr(distinct d, i) from dqa_t1; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| count | count | count | corr |
| -------+-------+-------+------------------- |
| 10 | 23 | 12 | 0.082401334146002 |
| (1 row) |
| |
| explain (costs off) select count(distinct c), count(distinct d), count(distinct i), corr(distinct d, i) from dqa_t1; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| QUERY PLAN |
| ------------------------------------------------------------------------------------------------------------ |
| Finalize Aggregate |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Partial Aggregate |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: c, d, i, ((d)::double precision), ((i)::double precision), (AggExprId) |
| -> Streaming HashAggregate |
| Group Key: AggExprId, c, d, i, ((d)::double precision), ((i)::double precision) |
| -> TupleSplit |
| Split by Col: (c), (d), (i), (((d)::double precision),((i)::double precision)) |
| -> Seq Scan on dqa_t1 |
| Optimizer: Postgres query optimizer |
| (11 rows) |
| |
| -- multi args multidqa with group by |
| select count(distinct c), corr(distinct d, i), d from dqa_t1 group by d; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| count | corr | d |
| -------+------+---- |
| 5 | | 0 |
| 5 | | 1 |
| 5 | | 2 |
| 5 | | 3 |
| 5 | | 4 |
| 5 | | 5 |
| 5 | | 6 |
| 5 | | 7 |
| 4 | | 8 |
| 4 | | 9 |
| 4 | | 10 |
| 4 | | 11 |
| 4 | | 12 |
| 4 | | 13 |
| 4 | | 14 |
| 4 | | 15 |
| 4 | | 16 |
| 4 | | 17 |
| 4 | | 18 |
| 4 | | 19 |
| 4 | | 20 |
| 4 | | 21 |
| 4 | | 22 |
| (23 rows) |
| |
| explain (costs off) select count(distinct c), corr(distinct d, i), d from dqa_t1 group by d; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| QUERY PLAN |
| -------------------------------------------------------------------------------------------------------------- |
| Finalize HashAggregate |
| Group Key: d |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Partial HashAggregate |
| Group Key: d |
| -> HashAggregate |
| Group Key: (AggExprId), c, ((d)::double precision), ((i)::double precision), d |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: d, c, ((d)::double precision), ((i)::double precision), (AggExprId) |
| -> Streaming HashAggregate |
| Group Key: AggExprId, c, ((d)::double precision), ((i)::double precision), d |
| -> TupleSplit |
| Split by Col: (c), (((d)::double precision),((i)::double precision)) |
| Group Key: d |
| -> Seq Scan on dqa_t1 |
| Optimizer: Postgres query optimizer |
| (16 rows) |
| |
| select count(distinct c), corr(distinct d, i), d, i from dqa_t1 group by d,i; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| count | corr | d | i |
| -------+------+----+---- |
| 1 | | 0 | 0 |
| 1 | | 0 | 8 |
| 1 | | 0 | 9 |
| 1 | | 0 | 10 |
| 1 | | 0 | 11 |
| 1 | | 1 | 0 |
| 1 | | 1 | 1 |
| 1 | | 1 | 9 |
| 1 | | 1 | 10 |
| 1 | | 1 | 11 |
| 1 | | 2 | 0 |
| 1 | | 2 | 1 |
| 1 | | 2 | 2 |
| 1 | | 2 | 10 |
| 1 | | 2 | 11 |
| 1 | | 3 | 0 |
| 1 | | 3 | 1 |
| 1 | | 3 | 2 |
| 1 | | 3 | 3 |
| 1 | | 3 | 11 |
| 1 | | 4 | 0 |
| 1 | | 4 | 1 |
| 1 | | 4 | 2 |
| 1 | | 4 | 3 |
| 1 | | 4 | 4 |
| 1 | | 5 | 1 |
| 1 | | 5 | 2 |
| 1 | | 5 | 3 |
| 1 | | 5 | 4 |
| 1 | | 5 | 5 |
| 1 | | 6 | 2 |
| 1 | | 6 | 3 |
| 1 | | 6 | 4 |
| 1 | | 6 | 5 |
| 1 | | 6 | 6 |
| 1 | | 7 | 3 |
| 1 | | 7 | 4 |
| 1 | | 7 | 5 |
| 1 | | 7 | 6 |
| 1 | | 7 | 7 |
| 1 | | 8 | 5 |
| 1 | | 8 | 6 |
| 1 | | 8 | 7 |
| 1 | | 8 | 8 |
| 1 | | 9 | 6 |
| 1 | | 9 | 7 |
| 1 | | 9 | 8 |
| 1 | | 9 | 9 |
| 1 | | 10 | 7 |
| 1 | | 10 | 8 |
| 1 | | 10 | 9 |
| 1 | | 10 | 10 |
| 1 | | 11 | 8 |
| 1 | | 11 | 9 |
| 1 | | 11 | 10 |
| 1 | | 11 | 11 |
| 1 | | 12 | 0 |
| 1 | | 12 | 9 |
| 1 | | 12 | 10 |
| 1 | | 12 | 11 |
| 1 | | 13 | 0 |
| 1 | | 13 | 1 |
| 1 | | 13 | 10 |
| 1 | | 13 | 11 |
| 1 | | 14 | 0 |
| 1 | | 14 | 1 |
| 1 | | 14 | 2 |
| 1 | | 14 | 11 |
| 1 | | 15 | 0 |
| 1 | | 15 | 1 |
| 1 | | 15 | 2 |
| 1 | | 15 | 3 |
| 1 | | 16 | 1 |
| 1 | | 16 | 2 |
| 1 | | 16 | 3 |
| 1 | | 16 | 4 |
| 1 | | 17 | 2 |
| 1 | | 17 | 3 |
| 1 | | 17 | 4 |
| 1 | | 17 | 5 |
| 1 | | 18 | 3 |
| 1 | | 18 | 4 |
| 1 | | 18 | 5 |
| 1 | | 18 | 6 |
| 1 | | 19 | 4 |
| 1 | | 19 | 5 |
| 1 | | 19 | 6 |
| 1 | | 19 | 7 |
| 1 | | 20 | 5 |
| 1 | | 20 | 6 |
| 1 | | 20 | 7 |
| 1 | | 20 | 8 |
| 1 | | 21 | 6 |
| 1 | | 21 | 7 |
| 1 | | 21 | 8 |
| 1 | | 21 | 9 |
| 1 | | 22 | 7 |
| 1 | | 22 | 8 |
| 1 | | 22 | 9 |
| 1 | | 22 | 10 |
| (100 rows) |
| |
| explain (costs off) select count(distinct c), corr(distinct d, i), d, i from dqa_t1 group by d,i; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| QUERY PLAN |
| ----------------------------------------------------------------------------------------------------------------- |
| Finalize HashAggregate |
| Group Key: d, i |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Partial HashAggregate |
| Group Key: d, i |
| -> HashAggregate |
| Group Key: (AggExprId), c, ((d)::double precision), ((i)::double precision), d, i |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: d, i, c, ((d)::double precision), ((i)::double precision), (AggExprId) |
| -> Streaming HashAggregate |
| Group Key: AggExprId, c, ((d)::double precision), ((i)::double precision), d, i |
| -> TupleSplit |
| Split by Col: (c), (((d)::double precision),((i)::double precision)) |
| Group Key: d, i |
| -> Seq Scan on dqa_t1 |
| Optimizer: Postgres query optimizer |
| (16 rows) |
| |
| select count(distinct c), corr(distinct d, i), dt from dqa_t1 group by dt; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| count | corr | dt |
| -------+-------------------+------------ |
| 3 | 0.75592894601845 | 06-10-2009 |
| 3 | 0.075093926148264 | 07-11-2009 |
| 3 | 0.075093926148264 | 06-28-2009 |
| 3 | 0.075093926148264 | 06-19-2009 |
| 3 | 0.075093926148264 | 06-17-2009 |
| 3 | 0.075093926148264 | 07-08-2009 |
| 3 | 0.075093926148264 | 06-16-2009 |
| 3 | 0.075093926148264 | 06-29-2009 |
| 2 | -1 | 07-13-2009 |
| 3 | 0.59603956067927 | 07-04-2009 |
| 3 | -0.70957090557056 | 06-23-2009 |
| 2 | -1 | 07-12-2009 |
| 3 | 0.075093926148264 | 07-02-2009 |
| 3 | 0.59603956067927 | 06-11-2009 |
| 3 | 0.075093926148264 | 07-10-2009 |
| 3 | 0.075093926148264 | 06-18-2009 |
| 3 | 0.59603956067927 | 07-05-2009 |
| 3 | 0.075093926148264 | 06-20-2009 |
| 3 | 0.075093926148264 | 07-09-2009 |
| 3 | -0.70957090557056 | 06-12-2009 |
| 3 | 0.59603956067927 | 06-24-2009 |
| 3 | 0.075093926148264 | 06-14-2009 |
| 3 | 0.075093926148264 | 06-26-2009 |
| 3 | -0.70957090557056 | 06-22-2009 |
| 3 | -0.70957090557056 | 06-13-2009 |
| 3 | 0.075093926148264 | 06-21-2009 |
| 3 | -0.70957090557056 | 07-07-2009 |
| 3 | 0.075093926148264 | 06-27-2009 |
| 3 | -0.70957090557056 | 07-06-2009 |
| 3 | 0.075093926148264 | 07-01-2009 |
| 3 | -1 | 07-03-2009 |
| 3 | 0.075093926148264 | 06-15-2009 |
| 3 | 0.59603956067927 | 06-25-2009 |
| 3 | 0.075093926148264 | 06-30-2009 |
| (34 rows) |
| |
| explain (costs off) select count(distinct c), corr(distinct d, i), dt from dqa_t1 group by dt; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| QUERY PLAN |
| --------------------------------------------------------------------------------------------------------------- |
| Finalize HashAggregate |
| Group Key: dt |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Partial HashAggregate |
| Group Key: dt |
| -> HashAggregate |
| Group Key: (AggExprId), c, ((d)::double precision), ((i)::double precision), dt |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: dt, c, ((d)::double precision), ((i)::double precision), (AggExprId) |
| -> Streaming HashAggregate |
| Group Key: AggExprId, c, ((d)::double precision), ((i)::double precision), dt |
| -> TupleSplit |
| Split by Col: (c), (((d)::double precision),((i)::double precision)) |
| Group Key: dt |
| -> Seq Scan on dqa_t1 |
| Optimizer: Postgres query optimizer |
| (16 rows) |
| |
| select count(distinct d), corr(distinct d, i), i from dqa_t1 group by i; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| count | corr | i |
| -------+------+---- |
| 9 | | 0 |
| 9 | | 1 |
| 9 | | 2 |
| 9 | | 3 |
| 8 | | 4 |
| 8 | | 5 |
| 8 | | 6 |
| 8 | | 7 |
| 8 | | 8 |
| 8 | | 9 |
| 8 | | 10 |
| 8 | | 11 |
| (12 rows) |
| |
| explain (costs off) select count(distinct d), corr(distinct d, i), i from dqa_t1 group by i; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| QUERY PLAN |
| -------------------------------------------------------------------------------------------------------------- |
| Finalize HashAggregate |
| Group Key: i |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Partial HashAggregate |
| Group Key: i |
| -> HashAggregate |
| Group Key: (AggExprId), d, ((d)::double precision), ((i)::double precision), i |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: i, d, ((d)::double precision), ((i)::double precision), (AggExprId) |
| -> Streaming HashAggregate |
| Group Key: AggExprId, d, ((d)::double precision), ((i)::double precision), i |
| -> TupleSplit |
| Split by Col: (d), (((d)::double precision),((i)::double precision)) |
| Group Key: i |
| -> Seq Scan on dqa_t1 |
| Optimizer: Postgres query optimizer |
| (16 rows) |
| |
| select count(distinct d), corr(distinct d, i), d from dqa_t1 group by d; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| count | corr | d |
| -------+------+---- |
| 1 | | 0 |
| 1 | | 1 |
| 1 | | 2 |
| 1 | | 3 |
| 1 | | 4 |
| 1 | | 5 |
| 1 | | 6 |
| 1 | | 7 |
| 1 | | 8 |
| 1 | | 9 |
| 1 | | 10 |
| 1 | | 11 |
| 1 | | 12 |
| 1 | | 13 |
| 1 | | 14 |
| 1 | | 15 |
| 1 | | 16 |
| 1 | | 17 |
| 1 | | 18 |
| 1 | | 19 |
| 1 | | 20 |
| 1 | | 21 |
| 1 | | 22 |
| (23 rows) |
| |
| explain (costs off) select count(distinct d), corr(distinct d, i), d from dqa_t1 group by d; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| QUERY PLAN |
| ----------------------------------------------------------------------------------------------------------- |
| Finalize HashAggregate |
| Group Key: d |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Partial HashAggregate |
| Group Key: d |
| -> HashAggregate |
| Group Key: (AggExprId), ((d)::double precision), ((i)::double precision), d |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: d, ((d)::double precision), ((i)::double precision), (AggExprId) |
| -> Streaming HashAggregate |
| Group Key: AggExprId, ((d)::double precision), ((i)::double precision), d |
| -> TupleSplit |
| Split by Col: (d), (((d)::double precision),((i)::double precision)) |
| Group Key: d |
| -> Seq Scan on dqa_t1 |
| Optimizer: Postgres query optimizer |
| (16 rows) |
| |
| select count(distinct d), to_char(corr(distinct d, i), '9.99999999999999'), c from dqa_t1 group by c; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| count | to_char | c |
| -------+-------------------+--- |
| 10 | .13670602618479 | 0 |
| 10 | .13670602618479 | 1 |
| 10 | .32622410426034 | 2 |
| 10 | -.11810476840833 | 3 |
| 10 | .07008652924496 | 4 |
| 10 | .07008652924496 | 5 |
| 10 | -.17582636927840 | 6 |
| 10 | -.17582636927840 | 7 |
| 10 | .42037777407962 | 8 |
| 10 | .05796784490862 | 9 |
| (10 rows) |
| |
| explain (costs off) select count(distinct d), to_char(corr(distinct d, i), '9.99999999999999'), c from dqa_t1 group by c; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| QUERY PLAN |
| -------------------------------------------------------------------------------------------------------------- |
| Finalize HashAggregate |
| Group Key: c |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Partial HashAggregate |
| Group Key: c |
| -> HashAggregate |
| Group Key: (AggExprId), d, ((d)::double precision), ((i)::double precision), c |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: c, d, ((d)::double precision), ((i)::double precision), (AggExprId) |
| -> Streaming HashAggregate |
| Group Key: AggExprId, d, ((d)::double precision), ((i)::double precision), c |
| -> TupleSplit |
| Split by Col: (d), (((d)::double precision),((i)::double precision)) |
| Group Key: c |
| -> Seq Scan on dqa_t1 |
| Optimizer: Postgres query optimizer |
| (16 rows) |
| |
| -- MPP-19037 |
| drop table if exists fact_route_aggregation; |
| NOTICE: table "fact_route_aggregation" does not exist, skipping |
| drop table if exists dim_devices; |
| NOTICE: table "dim_devices" does not exist, skipping |
| CREATE TABLE fact_route_aggregation |
| ( |
| device_id integer, |
| is_route integer , |
| is_pedestrian integer, |
| user_id integer, |
| pedestrian_route_length_in_meters integer, |
| in_car_route_length_in_meters integer |
| ) DISTRIBUTED BY (device_id); |
| insert into fact_route_aggregation select generate_series(1,700),generate_series(200,300),generate_series(300,400), generate_series(400,500),generate_series(500,600),generate_series(600,700); |
| CREATE TABLE dim_devices |
| ( |
| device_id integer, |
| platform integer |
| ) DISTRIBUTED BY (device_id); |
| -- Repro query from the JIRA |
| select distinct |
| count(distinct case when T218094.is_route >= 1 or T218094.is_pedestrian >= 1 then T218094.user_id else NULL end ) as c1, |
| sum(cast(T218094.is_route + T218094.is_pedestrian as DOUBLE PRECISION )) as c2, |
| sum(cast(T218094.is_pedestrian as DOUBLE PRECISION )) as c3, |
| count(distinct case when T218094.is_pedestrian >= 1 then T218094.user_id else NULL end ) as c4, |
| sum(T218094.pedestrian_route_length_in_meters / 1000.0) as c5, |
| sum(T218094.in_car_route_length_in_meters / 1000.0) as c6, |
| sum(cast(T218094.is_route as DOUBLE PRECISION )) as c7, |
| count(distinct case when T218094.is_route >= 1 then T218094.user_id else NULL end ) as c8, |
| T43883.platform as c9 |
| from |
| dim_devices T43883, |
| fact_route_aggregation T218094 |
| where ( T43883.device_id = T218094.device_id ) |
| group by T43883.platform; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8 | c9 |
| ----+----+----+----+----+----+----+----+---- |
| (0 rows) |
| |
| -- cleanup |
| drop table fact_route_aggreagation; |
| ERROR: table "fact_route_aggreagation" does not exist |
| drop table dim_devices; |
| -- other test queries for mpp-19037 |
| drop table if exists t1_mdqa; |
| NOTICE: table "t1_mdqa" does not exist, skipping |
| drop table if exists t2_mdqa; |
| NOTICE: table "t2_mdqa" does not exist, skipping |
| create table t1_mdqa(a int, b int, c varchar); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| create table t2_mdqa(a int, b int, c varchar); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| insert into t1_mdqa select i % 5 , i % 10, i || 'value' from generate_series(1, 20) i; |
| insert into t1_mdqa select i % 5 , i % 10, i || 'value' from generate_series(1, 20) i; |
| insert into t2_mdqa select i % 10 , i % 5, i || 'value' from generate_series(1, 20) i; |
| insert into t2_mdqa select i % 10 , i % 5, i || 'value' from generate_series(1, 20) i; |
| -- simple mdqa |
| select count(distinct t1.a), count(distinct t2.b), t1.c, t2.c from t1_mdqa t1, t2_mdqa t2 where t1.c = t2.c group by t1.c, t2.c order by t1.c; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| count | count | c | c |
| -------+-------+---------+--------- |
| 1 | 1 | 10value | 10value |
| 1 | 1 | 11value | 11value |
| 1 | 1 | 12value | 12value |
| 1 | 1 | 13value | 13value |
| 1 | 1 | 14value | 14value |
| 1 | 1 | 15value | 15value |
| 1 | 1 | 16value | 16value |
| 1 | 1 | 17value | 17value |
| 1 | 1 | 18value | 18value |
| 1 | 1 | 19value | 19value |
| 1 | 1 | 1value | 1value |
| 1 | 1 | 20value | 20value |
| 1 | 1 | 2value | 2value |
| 1 | 1 | 3value | 3value |
| 1 | 1 | 4value | 4value |
| 1 | 1 | 5value | 5value |
| 1 | 1 | 6value | 6value |
| 1 | 1 | 7value | 7value |
| 1 | 1 | 8value | 8value |
| 1 | 1 | 9value | 9value |
| (20 rows) |
| |
| -- distinct on top of some mdqas |
| select distinct sum(distinct t1.a), avg(t2.a), sum(distinct t2.b), t1.a, t2.b from t1_mdqa t1, t2_mdqa t2 where t1.a = t2.a group by t1.a, t2.b order by t1.a; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| sum | avg | sum | a | b |
| -----+------------------------+-----+---+--- |
| 0 | 0.00000000000000000000 | 0 | 0 | 0 |
| 1 | 1.00000000000000000000 | 1 | 1 | 1 |
| 2 | 2.0000000000000000 | 2 | 2 | 2 |
| 3 | 3.0000000000000000 | 3 | 3 | 3 |
| 4 | 4.0000000000000000 | 4 | 4 | 4 |
| (5 rows) |
| |
| select distinct sum (distinct t1.a), avg(distinct t2.a), sum(distinct t2.b), t1.c from t1_mdqa t1, t2_mdqa t2 where t1.a = t2.a group by t1.c order by t1.c; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| sum | avg | sum | c |
| -----+------------------------+-----+--------- |
| 0 | 0.00000000000000000000 | 0 | 10value |
| 1 | 1.00000000000000000000 | 1 | 11value |
| 2 | 2.0000000000000000 | 2 | 12value |
| 3 | 3.0000000000000000 | 3 | 13value |
| 4 | 4.0000000000000000 | 4 | 14value |
| 0 | 0.00000000000000000000 | 0 | 15value |
| 1 | 1.00000000000000000000 | 1 | 16value |
| 2 | 2.0000000000000000 | 2 | 17value |
| 3 | 3.0000000000000000 | 3 | 18value |
| 4 | 4.0000000000000000 | 4 | 19value |
| 1 | 1.00000000000000000000 | 1 | 1value |
| 0 | 0.00000000000000000000 | 0 | 20value |
| 2 | 2.0000000000000000 | 2 | 2value |
| 3 | 3.0000000000000000 | 3 | 3value |
| 4 | 4.0000000000000000 | 4 | 4value |
| 0 | 0.00000000000000000000 | 0 | 5value |
| 1 | 1.00000000000000000000 | 1 | 6value |
| 2 | 2.0000000000000000 | 2 | 7value |
| 3 | 3.0000000000000000 | 3 | 8value |
| 4 | 4.0000000000000000 | 4 | 9value |
| (20 rows) |
| |
| -- distinct on group by fields |
| select distinct t1.c , sum(distinct t1.a), count(t2.b), sum(distinct t2.b) from t1_mdqa t1, t2_mdqa t2 where t1.a = t2.a group by t1.c order by t1.c; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| c | sum | count | sum |
| ---------+-----+-------+----- |
| 10value | 0 | 8 | 0 |
| 11value | 1 | 8 | 1 |
| 12value | 2 | 8 | 2 |
| 13value | 3 | 8 | 3 |
| 14value | 4 | 8 | 4 |
| 15value | 0 | 8 | 0 |
| 16value | 1 | 8 | 1 |
| 17value | 2 | 8 | 2 |
| 18value | 3 | 8 | 3 |
| 19value | 4 | 8 | 4 |
| 1value | 1 | 8 | 1 |
| 20value | 0 | 8 | 0 |
| 2value | 2 | 8 | 2 |
| 3value | 3 | 8 | 3 |
| 4value | 4 | 8 | 4 |
| 5value | 0 | 8 | 0 |
| 6value | 1 | 8 | 1 |
| 7value | 2 | 8 | 2 |
| 8value | 3 | 8 | 3 |
| 9value | 4 | 8 | 4 |
| (20 rows) |
| |
| -- distinct on normal aggregates |
| select distinct sum(t1.a), avg(distinct t2.a), sum(distinct (t1.a + t2.a)), t1.a, t2.b from t1_mdqa t1, t2_mdqa t2 where t1.a = t2.a group by t1.a, t2.b order by t1.a; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| sum | avg | sum | a | b |
| -----+------------------------+-----+---+--- |
| 0 | 0.00000000000000000000 | 0 | 0 | 0 |
| 32 | 1.00000000000000000000 | 2 | 1 | 1 |
| 64 | 2.0000000000000000 | 4 | 2 | 2 |
| 96 | 3.0000000000000000 | 6 | 3 | 3 |
| 128 | 4.0000000000000000 | 8 | 4 | 4 |
| (5 rows) |
| |
| select distinct avg(t1.a + t2.b), count(distinct t1.c), count(distinct char_length(t1.c)), t1.a, t2.b from t1_mdqa t1, t2_mdqa t2 where t1.a = t2.a group by t1.a, t2.b order by t1.a; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| avg | count | count | a | b |
| ------------------------+-------+-------+---+--- |
| 0.00000000000000000000 | 4 | 2 | 0 | 0 |
| 2.0000000000000000 | 4 | 2 | 1 | 1 |
| 4.0000000000000000 | 4 | 2 | 2 | 2 |
| 6.0000000000000000 | 4 | 2 | 3 | 3 |
| 8.0000000000000000 | 4 | 2 | 4 | 4 |
| (5 rows) |
| |
| -- cleanup |
| drop table t1_mdqa; |
| drop table t2_mdqa; |
| -- other queries from MPP-19037 |
| drop table if exists gp_dqa_r; |
| NOTICE: table "gp_dqa_r" does not exist, skipping |
| drop table if exists gp_dqa_s; |
| NOTICE: table "gp_dqa_s" does not exist, skipping |
| create table gp_dqa_r (a int, b int, c int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| create table gp_dqa_s (d int, e int, f int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'd' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| insert into gp_dqa_r select i , i %10, i%5 from generate_series(1,20) i; |
| insert into gp_dqa_s select i, i %15, i%10 from generate_series(1,30) i; |
| select a, d, count(distinct b) as c1, count(distinct c) as c2 from gp_dqa_r, gp_dqa_s where ( e = a ) group by d, a order by a,d; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| a | d | c1 | c2 |
| ----+----+----+---- |
| 1 | 1 | 1 | 1 |
| 1 | 16 | 1 | 1 |
| 2 | 2 | 1 | 1 |
| 2 | 17 | 1 | 1 |
| 3 | 3 | 1 | 1 |
| 3 | 18 | 1 | 1 |
| 4 | 4 | 1 | 1 |
| 4 | 19 | 1 | 1 |
| 5 | 5 | 1 | 1 |
| 5 | 20 | 1 | 1 |
| 6 | 6 | 1 | 1 |
| 6 | 21 | 1 | 1 |
| 7 | 7 | 1 | 1 |
| 7 | 22 | 1 | 1 |
| 8 | 8 | 1 | 1 |
| 8 | 23 | 1 | 1 |
| 9 | 9 | 1 | 1 |
| 9 | 24 | 1 | 1 |
| 10 | 10 | 1 | 1 |
| 10 | 25 | 1 | 1 |
| 11 | 11 | 1 | 1 |
| 11 | 26 | 1 | 1 |
| 12 | 12 | 1 | 1 |
| 12 | 27 | 1 | 1 |
| 13 | 13 | 1 | 1 |
| 13 | 28 | 1 | 1 |
| 14 | 14 | 1 | 1 |
| 14 | 29 | 1 | 1 |
| (28 rows) |
| |
| select distinct |
| count(distinct case when b >= 1 or c >= 1 then b else NULL end ) as c1, |
| sum(cast(b + c as DOUBLE PRECISION )) as c2, |
| sum(cast(c as DOUBLE PRECISION )) as c3, |
| count(distinct case when b >= 1 then b else NULL end ) as c2, |
| d as c9 |
| from gp_dqa_r, gp_dqa_s |
| where ( e = a ) |
| group by d order by c9; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| c1 | c2 | c3 | c2 | c9 |
| ----+----+----+----+---- |
| 1 | 2 | 1 | 1 | 1 |
| 1 | 4 | 2 | 1 | 2 |
| 1 | 6 | 3 | 1 | 3 |
| 1 | 8 | 4 | 1 | 4 |
| 1 | 5 | 0 | 1 | 5 |
| 1 | 7 | 1 | 1 | 6 |
| 1 | 9 | 2 | 1 | 7 |
| 1 | 11 | 3 | 1 | 8 |
| 1 | 13 | 4 | 1 | 9 |
| 0 | 0 | 0 | 0 | 10 |
| 1 | 2 | 1 | 1 | 11 |
| 1 | 4 | 2 | 1 | 12 |
| 1 | 6 | 3 | 1 | 13 |
| 1 | 8 | 4 | 1 | 14 |
| 1 | 2 | 1 | 1 | 16 |
| 1 | 4 | 2 | 1 | 17 |
| 1 | 6 | 3 | 1 | 18 |
| 1 | 8 | 4 | 1 | 19 |
| 1 | 5 | 0 | 1 | 20 |
| 1 | 7 | 1 | 1 | 21 |
| 1 | 9 | 2 | 1 | 22 |
| 1 | 11 | 3 | 1 | 23 |
| 1 | 13 | 4 | 1 | 24 |
| 0 | 0 | 0 | 0 | 25 |
| 1 | 2 | 1 | 1 | 26 |
| 1 | 4 | 2 | 1 | 27 |
| 1 | 6 | 3 | 1 | 28 |
| 1 | 8 | 4 | 1 | 29 |
| (28 rows) |
| |
| select distinct |
| count(distinct case when b >= 1 or c >= 1 then b else NULL end ) as c1, |
| count(distinct case when b >= 1 then b else NULL end ) as c2, |
| d as c9 |
| from gp_dqa_r, gp_dqa_s |
| where ( e = a ) |
| group by d order by c9; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| c1 | c2 | c9 |
| ----+----+---- |
| 1 | 1 | 1 |
| 1 | 1 | 2 |
| 1 | 1 | 3 |
| 1 | 1 | 4 |
| 1 | 1 | 5 |
| 1 | 1 | 6 |
| 1 | 1 | 7 |
| 1 | 1 | 8 |
| 1 | 1 | 9 |
| 0 | 0 | 10 |
| 1 | 1 | 11 |
| 1 | 1 | 12 |
| 1 | 1 | 13 |
| 1 | 1 | 14 |
| 1 | 1 | 16 |
| 1 | 1 | 17 |
| 1 | 1 | 18 |
| 1 | 1 | 19 |
| 1 | 1 | 20 |
| 1 | 1 | 21 |
| 1 | 1 | 22 |
| 1 | 1 | 23 |
| 1 | 1 | 24 |
| 0 | 0 | 25 |
| 1 | 1 | 26 |
| 1 | 1 | 27 |
| 1 | 1 | 28 |
| 1 | 1 | 29 |
| (28 rows) |
| |
| select distinct count(distinct b) as c1, count(distinct c) as c2, d as c9 |
| from gp_dqa_r, gp_dqa_s |
| where ( e = a ) |
| group by d order by c9; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| c1 | c2 | c9 |
| ----+----+---- |
| 1 | 1 | 1 |
| 1 | 1 | 2 |
| 1 | 1 | 3 |
| 1 | 1 | 4 |
| 1 | 1 | 5 |
| 1 | 1 | 6 |
| 1 | 1 | 7 |
| 1 | 1 | 8 |
| 1 | 1 | 9 |
| 1 | 1 | 10 |
| 1 | 1 | 11 |
| 1 | 1 | 12 |
| 1 | 1 | 13 |
| 1 | 1 | 14 |
| 1 | 1 | 16 |
| 1 | 1 | 17 |
| 1 | 1 | 18 |
| 1 | 1 | 19 |
| 1 | 1 | 20 |
| 1 | 1 | 21 |
| 1 | 1 | 22 |
| 1 | 1 | 23 |
| 1 | 1 | 24 |
| 1 | 1 | 25 |
| 1 | 1 | 26 |
| 1 | 1 | 27 |
| 1 | 1 | 28 |
| 1 | 1 | 29 |
| (28 rows) |
| |
| select distinct d, count(distinct b) as c1, count(distinct c) as c2, d as c9 from gp_dqa_r, gp_dqa_s group by d order by c9; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| d | c1 | c2 | c9 |
| ----+----+----+---- |
| 1 | 10 | 5 | 1 |
| 2 | 10 | 5 | 2 |
| 3 | 10 | 5 | 3 |
| 4 | 10 | 5 | 4 |
| 5 | 10 | 5 | 5 |
| 6 | 10 | 5 | 6 |
| 7 | 10 | 5 | 7 |
| 8 | 10 | 5 | 8 |
| 9 | 10 | 5 | 9 |
| 10 | 10 | 5 | 10 |
| 11 | 10 | 5 | 11 |
| 12 | 10 | 5 | 12 |
| 13 | 10 | 5 | 13 |
| 14 | 10 | 5 | 14 |
| 15 | 10 | 5 | 15 |
| 16 | 10 | 5 | 16 |
| 17 | 10 | 5 | 17 |
| 18 | 10 | 5 | 18 |
| 19 | 10 | 5 | 19 |
| 20 | 10 | 5 | 20 |
| 21 | 10 | 5 | 21 |
| 22 | 10 | 5 | 22 |
| 23 | 10 | 5 | 23 |
| 24 | 10 | 5 | 24 |
| 25 | 10 | 5 | 25 |
| 26 | 10 | 5 | 26 |
| 27 | 10 | 5 | 27 |
| 28 | 10 | 5 | 28 |
| 29 | 10 | 5 | 29 |
| 30 | 10 | 5 | 30 |
| (30 rows) |
| |
| select distinct d, count(distinct b) as c1, count(distinct c) as c2, d as c9 from gp_dqa_r, gp_dqa_s group by d, a order by c9; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| d | c1 | c2 | c9 |
| ----+----+----+---- |
| 1 | 1 | 1 | 1 |
| 2 | 1 | 1 | 2 |
| 3 | 1 | 1 | 3 |
| 4 | 1 | 1 | 4 |
| 5 | 1 | 1 | 5 |
| 6 | 1 | 1 | 6 |
| 7 | 1 | 1 | 7 |
| 8 | 1 | 1 | 8 |
| 9 | 1 | 1 | 9 |
| 10 | 1 | 1 | 10 |
| 11 | 1 | 1 | 11 |
| 12 | 1 | 1 | 12 |
| 13 | 1 | 1 | 13 |
| 14 | 1 | 1 | 14 |
| 15 | 1 | 1 | 15 |
| 16 | 1 | 1 | 16 |
| 17 | 1 | 1 | 17 |
| 18 | 1 | 1 | 18 |
| 19 | 1 | 1 | 19 |
| 20 | 1 | 1 | 20 |
| 21 | 1 | 1 | 21 |
| 22 | 1 | 1 | 22 |
| 23 | 1 | 1 | 23 |
| 24 | 1 | 1 | 24 |
| 25 | 1 | 1 | 25 |
| 26 | 1 | 1 | 26 |
| 27 | 1 | 1 | 27 |
| 28 | 1 | 1 | 28 |
| 29 | 1 | 1 | 29 |
| 30 | 1 | 1 | 30 |
| (30 rows) |
| |
| select distinct count(distinct b) as c1, count(distinct c) as c2 from gp_dqa_r, gp_dqa_s; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| c1 | c2 |
| ----+---- |
| 10 | 5 |
| (1 row) |
| |
| select distinct count(distinct b) as c1, count(distinct c) as c2 from gp_dqa_r; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| c1 | c2 |
| ----+---- |
| 10 | 5 |
| (1 row) |
| |
| select distinct count(distinct b) as c1, count(distinct c) as c2, d, a from gp_dqa_r, gp_dqa_s where ( e = a)group by d, a order by a,d; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| c1 | c2 | d | a |
| ----+----+----+---- |
| 1 | 1 | 1 | 1 |
| 1 | 1 | 16 | 1 |
| 1 | 1 | 2 | 2 |
| 1 | 1 | 17 | 2 |
| 1 | 1 | 3 | 3 |
| 1 | 1 | 18 | 3 |
| 1 | 1 | 4 | 4 |
| 1 | 1 | 19 | 4 |
| 1 | 1 | 5 | 5 |
| 1 | 1 | 20 | 5 |
| 1 | 1 | 6 | 6 |
| 1 | 1 | 21 | 6 |
| 1 | 1 | 7 | 7 |
| 1 | 1 | 22 | 7 |
| 1 | 1 | 8 | 8 |
| 1 | 1 | 23 | 8 |
| 1 | 1 | 9 | 9 |
| 1 | 1 | 24 | 9 |
| 1 | 1 | 10 | 10 |
| 1 | 1 | 25 | 10 |
| 1 | 1 | 11 | 11 |
| 1 | 1 | 26 | 11 |
| 1 | 1 | 12 | 12 |
| 1 | 1 | 27 | 12 |
| 1 | 1 | 13 | 13 |
| 1 | 1 | 28 | 13 |
| 1 | 1 | 14 | 14 |
| 1 | 1 | 29 | 14 |
| (28 rows) |
| |
| select distinct count(distinct b) as c1, count(distinct c) as c2, d from gp_dqa_r, gp_dqa_s group by d, a order by d,a; |
| ERROR: for SELECT DISTINCT, ORDER BY expressions must appear in select list |
| LINE 1: ...as c2, d from gp_dqa_r, gp_dqa_s group by d, a order by d,a; |
| ^ |
| select distinct count(distinct b) as c1, count(distinct c) as c2, d from gp_dqa_r, gp_dqa_s group by d, a order by d; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| c1 | c2 | d |
| ----+----+---- |
| 1 | 1 | 1 |
| 1 | 1 | 2 |
| 1 | 1 | 3 |
| 1 | 1 | 4 |
| 1 | 1 | 5 |
| 1 | 1 | 6 |
| 1 | 1 | 7 |
| 1 | 1 | 8 |
| 1 | 1 | 9 |
| 1 | 1 | 10 |
| 1 | 1 | 11 |
| 1 | 1 | 12 |
| 1 | 1 | 13 |
| 1 | 1 | 14 |
| 1 | 1 | 15 |
| 1 | 1 | 16 |
| 1 | 1 | 17 |
| 1 | 1 | 18 |
| 1 | 1 | 19 |
| 1 | 1 | 20 |
| 1 | 1 | 21 |
| 1 | 1 | 22 |
| 1 | 1 | 23 |
| 1 | 1 | 24 |
| 1 | 1 | 25 |
| 1 | 1 | 26 |
| 1 | 1 | 27 |
| 1 | 1 | 28 |
| 1 | 1 | 29 |
| 1 | 1 | 30 |
| (30 rows) |
| |
| select distinct count(distinct b) as c1, count(distinct c) as c2, d from gp_dqa_r, gp_dqa_s group by d order by d; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| c1 | c2 | d |
| ----+----+---- |
| 10 | 5 | 1 |
| 10 | 5 | 2 |
| 10 | 5 | 3 |
| 10 | 5 | 4 |
| 10 | 5 | 5 |
| 10 | 5 | 6 |
| 10 | 5 | 7 |
| 10 | 5 | 8 |
| 10 | 5 | 9 |
| 10 | 5 | 10 |
| 10 | 5 | 11 |
| 10 | 5 | 12 |
| 10 | 5 | 13 |
| 10 | 5 | 14 |
| 10 | 5 | 15 |
| 10 | 5 | 16 |
| 10 | 5 | 17 |
| 10 | 5 | 18 |
| 10 | 5 | 19 |
| 10 | 5 | 20 |
| 10 | 5 | 21 |
| 10 | 5 | 22 |
| 10 | 5 | 23 |
| 10 | 5 | 24 |
| 10 | 5 | 25 |
| 10 | 5 | 26 |
| 10 | 5 | 27 |
| 10 | 5 | 28 |
| 10 | 5 | 29 |
| 10 | 5 | 30 |
| (30 rows) |
| |
| -- cleanup |
| drop table gp_dqa_r; |
| drop table gp_dqa_s; |
| -- setup |
| drop table if exists gp_dqa_t1; |
| NOTICE: table "gp_dqa_t1" does not exist, skipping |
| drop table if exists gp_dqa_t2; |
| NOTICE: table "gp_dqa_t2" does not exist, skipping |
| create table gp_dqa_t1 (a int, b int) distributed by (a); |
| create table gp_dqa_t2 (a int, c int) distributed by (a); |
| insert into gp_dqa_t1 select i , i %5 from generate_series(1,10) i; |
| insert into gp_dqa_t2 select i , i %4 from generate_series(1,10) i; |
| select distinct A.a, sum(distinct A.b), count(distinct B.c) from gp_dqa_t1 A left join gp_dqa_t2 B on (A.a = B.a) group by A.a order by A.a; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| a | sum | count |
| ----+-----+------- |
| 1 | 1 | 1 |
| 2 | 2 | 1 |
| 3 | 3 | 1 |
| 4 | 4 | 1 |
| 5 | 0 | 1 |
| 6 | 1 | 1 |
| 7 | 2 | 1 |
| 8 | 3 | 1 |
| 9 | 4 | 1 |
| 10 | 0 | 1 |
| (10 rows) |
| |
| select distinct A.a, sum(distinct A.b), count(distinct B.c) from gp_dqa_t1 A right join gp_dqa_t2 B on (A.a = B.a) group by A.a order by A.a; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| a | sum | count |
| ----+-----+------- |
| 1 | 1 | 1 |
| 2 | 2 | 1 |
| 3 | 3 | 1 |
| 4 | 4 | 1 |
| 5 | 0 | 1 |
| 6 | 1 | 1 |
| 7 | 2 | 1 |
| 8 | 3 | 1 |
| 9 | 4 | 1 |
| 10 | 0 | 1 |
| (10 rows) |
| |
| -- Most of the above test queries got planned as hash aggregates. Repeat |
| -- a few of them as group aggregates |
| set enable_hashagg=off; |
| set enable_groupagg=on; |
| select count(distinct d) from dqa_t1 group by i; |
| count |
| ------- |
| 9 |
| 9 |
| 9 |
| 9 |
| 8 |
| 8 |
| 8 |
| 8 |
| 8 |
| 8 |
| 8 |
| 8 |
| (12 rows) |
| |
| explain (costs off) select count(distinct d) from dqa_t1 group by i; |
| QUERY PLAN |
| ------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Finalize HashAggregate |
| Group Key: i |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: i |
| -> Partial GroupAggregate |
| Group Key: i |
| -> Sort |
| Sort Key: i, d |
| -> Seq Scan on dqa_t1 |
| Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0 |
| (11 rows) |
| |
| select count(distinct d), count(distinct c), count(distinct dt) from dqa_t1; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| count | count | count |
| -------+-------+------- |
| 23 | 10 | 34 |
| (1 row) |
| |
| select count(distinct c), count(distinct dt), i from dqa_t1 group by i; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| count | count | i |
| -------+-------+---- |
| 5 | 9 | 3 |
| 5 | 8 | 11 |
| 5 | 8 | 5 |
| 5 | 8 | 9 |
| 5 | 9 | 1 |
| 5 | 8 | 7 |
| 5 | 9 | 2 |
| 5 | 8 | 6 |
| 5 | 9 | 0 |
| 5 | 8 | 8 |
| 5 | 8 | 10 |
| 5 | 8 | 4 |
| (12 rows) |
| |
| -- cleanup |
| drop table gp_dqa_t1; |
| drop table gp_dqa_t2; |
| create table foo_mdqa(x int, y int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| SELECT distinct C.z, count(distinct FS.x), count(distinct FS.y) FROM (SELECT 1 AS z FROM generate_series(1,10)) C, foo_mdqa FS GROUP BY z; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| z | count | count |
| ---+-------+------- |
| (0 rows) |
| |
| SELECT distinct C.z, count(distinct FS.x), count(distinct FS.y) FROM (SELECT i AS z FROM generate_series(1,10) i) C, foo_mdqa FS GROUP BY z; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| z | count | count |
| ---+-------+------- |
| (0 rows) |
| |
| drop table foo_mdqa; |
| -- non-strict agg test |
| -- Like COUNT(col), but also counts NULLs |
| create or replace function countall_trans(c int, newval int) returns int as $$ |
| SELECT $1 + 1; |
| $$ language sql; |
| create aggregate countall(sfunc = countall_trans, basetype = int, stype = int, initcond = 0, combinefunc = int4pl); |
| -- Test table |
| create table nonullstab (a int, b int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| insert into nonullstab select 1, 1 from generate_series(1, 100); |
| -- This returns wrong result. countall(distinct a) should return 1. |
| select countall(distinct a), count(distinct b) from nonullstab; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| countall | count |
| ----------+------- |
| 1 | 1 |
| (1 row) |
| |
| -- multi DQA with filter test |
| set enable_hashagg=on; |
| set enable_groupagg=off; |
| create table dqa_f1(a int, b int, c int) distributed by (a); |
| create table dqa_f2(x int, y int, z int) distributed by (x); |
| insert into dqa_f1 select i%17, i%5 , i%3 from generate_series(1,1000) i; |
| insert into dqa_f2 select i % 13, i % 5 , i % 11 from generate_series(1,1000) i; |
| select sum(distinct a) filter (where a > 0), sum(distinct b) filter (where a > 0) from dqa_f1; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER |
| sum | sum |
| -----+----- |
| 136 | 10 |
| (1 row) |
| |
| select sum(distinct a) filter (where a > 0), sum(distinct b) filter (where a > 0) from dqa_f1 group by b; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER |
| sum | sum |
| -----+----- |
| 136 | 0 |
| 136 | 1 |
| 136 | 2 |
| 136 | 3 |
| 136 | 4 |
| (5 rows) |
| |
| select sum(distinct a) filter (where a > 0), sum(distinct b) filter (where a > 0) from dqa_f1 group by c; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER |
| sum | sum |
| -----+----- |
| 136 | 10 |
| 136 | 10 |
| 136 | 10 |
| (3 rows) |
| |
| select sum(distinct a) filter (where a in (select x from dqa_f2 where x = a)), sum(distinct b) filter (where a > 0) from dqa_f1; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER |
| sum | sum |
| -----+----- |
| 78 | 10 |
| (1 row) |
| |
| select sum(distinct a) filter (where a in (select x from dqa_f2 where x = a)), sum(distinct b) filter (where a > 0) from dqa_f1 group by c; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER |
| sum | sum |
| -----+----- |
| 78 | 10 |
| 78 | 10 |
| 78 | 10 |
| (3 rows) |
| |
| select count(distinct a) filter (where a > 3),count( distinct b) filter (where a > 4), sum(distinct b) filter( where a > 4) from dqa_f1; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER |
| count | count | sum |
| -------+-------+----- |
| 13 | 5 | 10 |
| (1 row) |
| |
| -- fix hang of multi-dqa with filter (https://github.com/greenplum-db/gpdb/issues/14728) |
| select count(distinct a) filter (where a > 3), count(distinct b) from dqa_f1; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER |
| count | count |
| -------+------- |
| 13 | 5 |
| (1 row) |
| |
| explain select sum(distinct a) filter (where a > 0), sum(distinct b) filter (where a > 0) from dqa_f1; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER |
| QUERY PLAN |
| ------------------------------------------------------------------------------------------------------ |
| Finalize Aggregate (cost=20.66..20.67 rows=1 width=16) |
| -> Gather Motion 3:1 (slice1; segments: 3) (cost=20.62..20.65 rows=1 width=16) |
| -> Partial Aggregate (cost=20.62..20.63 rows=1 width=16) |
| -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=20.50..20.59 rows=1 width=8) |
| Hash Key: a, b, (AggExprId) |
| -> Streaming HashAggregate (cost=20.50..20.53 rows=1 width=8) |
| Group Key: AggExprId, a, b |
| -> TupleSplit (cost=13.00..15.50 rows=667 width=8) |
| Split by Col: (a) FILTER (WHERE (a > 0)), (b) FILTER (WHERE (a > 0)) |
| -> Seq Scan on dqa_f1 (cost=0.00..13.00 rows=334 width=8) |
| Optimizer: Postgres query optimizer |
| (11 rows) |
| |
| explain select sum(distinct a) filter (where a > 0), sum(distinct b) filter (where a > 0) from dqa_f1 group by b; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER |
| QUERY PLAN |
| ------------------------------------------------------------------------------------------------------------ |
| Finalize HashAggregate (cost=21.62..21.67 rows=5 width=20) |
| Group Key: b |
| -> Gather Motion 3:1 (slice1; segments: 3) (cost=21.30..21.56 rows=9 width=20) |
| -> Partial HashAggregate (cost=21.30..21.39 rows=3 width=20) |
| Group Key: b |
| -> HashAggregate (cost=21.06..21.17 rows=4 width=8) |
| Group Key: (AggExprId), a, b |
| -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=20.50..20.95 rows=5 width=8) |
| Hash Key: b, a, (AggExprId) |
| -> Streaming HashAggregate (cost=20.50..20.65 rows=5 width=8) |
| Group Key: AggExprId, a, b |
| -> TupleSplit (cost=13.00..15.50 rows=667 width=8) |
| Split by Col: (a) FILTER (WHERE (a > 0)), (b) FILTER (WHERE (a > 0)) |
| Group Key: b |
| -> Seq Scan on dqa_f1 (cost=0.00..13.00 rows=334 width=8) |
| Optimizer: Postgres query optimizer |
| (16 rows) |
| |
| explain select sum(distinct a) filter (where a > 0), sum(distinct b) filter (where a > 0) from dqa_f1 group by c; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER |
| QUERY PLAN |
| ------------------------------------------------------------------------------------------------------------- |
| Finalize HashAggregate (cost=21.20..21.23 rows=3 width=20) |
| Group Key: c |
| -> Gather Motion 3:1 (slice1; segments: 3) (cost=21.00..21.16 rows=6 width=20) |
| -> Partial HashAggregate (cost=21.00..21.05 rows=2 width=20) |
| Group Key: c |
| -> HashAggregate (cost=20.86..20.92 rows=3 width=12) |
| Group Key: (AggExprId), a, b, c |
| -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=20.50..20.77 rows=3 width=12) |
| Hash Key: c, a, b, (AggExprId) |
| -> Streaming HashAggregate (cost=20.50..20.59 rows=3 width=12) |
| Group Key: AggExprId, a, b, c |
| -> TupleSplit (cost=13.00..15.50 rows=667 width=12) |
| Split by Col: (a) FILTER (WHERE (a > 0)), (b) FILTER (WHERE (a > 0)) |
| Group Key: c |
| -> Seq Scan on dqa_f1 (cost=0.00..13.00 rows=334 width=12) |
| Optimizer: Postgres query optimizer |
| (16 rows) |
| |
| explain select sum(distinct a) filter (where a in (select x from dqa_f2 where x = a)), sum(distinct b) filter (where a > 0) from dqa_f1; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER |
| QUERY PLAN |
| ------------------------------------------------------------------------------------------------------------------------------------ |
| Finalize Aggregate (cost=96.41..96.42 rows=1 width=16) |
| -> Gather Motion 3:1 (slice1; segments: 3) (cost=96.37..96.40 rows=1 width=16) |
| -> Partial Aggregate (cost=96.37..96.38 rows=1 width=16) |
| -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=20.50..20.59 rows=1 width=8) |
| Hash Key: dqa_f1.a, dqa_f1.b, (AggExprId) |
| -> Streaming HashAggregate (cost=20.50..20.53 rows=1 width=8) |
| Group Key: AggExprId, dqa_f1.a, dqa_f1.b |
| -> TupleSplit (cost=13.00..15.50 rows=667 width=8) |
| Split by Col: (dqa_f1.a) FILTER (WHERE (SubPlan 1)), (dqa_f1.b) FILTER (WHERE (dqa_f1.a > 0)) |
| -> Seq Scan on dqa_f1 (cost=0.00..13.00 rows=334 width=8) |
| SubPlan 1 |
| -> Result (cost=0.00..48.00 rows=334 width=4) |
| Filter: (dqa_f2.x = dqa_f1.a) |
| -> Materialize (cost=0.00..38.00 rows=334 width=4) |
| -> Broadcast Motion 3:3 (slice3; segments: 3) (cost=0.00..33.00 rows=334 width=4) |
| -> Seq Scan on dqa_f2 (cost=0.00..13.00 rows=334 width=4) |
| Optimizer: Postgres query optimizer |
| (17 rows) |
| |
| explain select sum(distinct a) filter (where a in (select x from dqa_f2 where x = a)), sum(distinct b) filter (where a > 0) from dqa_f1 group by c; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER |
| QUERY PLAN |
| ------------------------------------------------------------------------------------------------------------------------------------------ |
| Finalize HashAggregate (cost=181.11..181.14 rows=3 width=20) |
| Group Key: dqa_f1.c |
| -> Gather Motion 3:1 (slice1; segments: 3) (cost=180.92..181.07 rows=6 width=20) |
| -> Partial HashAggregate (cost=180.92..180.97 rows=2 width=20) |
| Group Key: dqa_f1.c |
| -> HashAggregate (cost=20.86..20.92 rows=3 width=12) |
| Group Key: (AggExprId), dqa_f1.a, dqa_f1.b, dqa_f1.c |
| -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=20.50..20.77 rows=3 width=12) |
| Hash Key: dqa_f1.c, dqa_f1.a, dqa_f1.b, (AggExprId) |
| -> Streaming HashAggregate (cost=20.50..20.59 rows=3 width=12) |
| Group Key: AggExprId, dqa_f1.a, dqa_f1.b, dqa_f1.c |
| -> TupleSplit (cost=13.00..15.50 rows=667 width=12) |
| Split by Col: (dqa_f1.a) FILTER (WHERE (SubPlan 1)), (dqa_f1.b) FILTER (WHERE (dqa_f1.a > 0)) |
| Group Key: dqa_f1.c |
| -> Seq Scan on dqa_f1 (cost=0.00..13.00 rows=334 width=12) |
| SubPlan 1 |
| -> Result (cost=0.00..48.00 rows=334 width=4) |
| Filter: (dqa_f2.x = dqa_f1.a) |
| -> Materialize (cost=0.00..38.00 rows=334 width=4) |
| -> Broadcast Motion 3:3 (slice3; segments: 3) (cost=0.00..33.00 rows=334 width=4) |
| -> Seq Scan on dqa_f2 (cost=0.00..13.00 rows=334 width=4) |
| Optimizer: Postgres query optimizer |
| (22 rows) |
| |
| explain select count(distinct a) filter (where a > 3),count( distinct b) filter (where a > 4), sum(distinct b) filter( where a > 4) from dqa_f1; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER |
| QUERY PLAN |
| ------------------------------------------------------------------------------------------------------ |
| Finalize Aggregate (cost=20.67..20.68 rows=1 width=24) |
| -> Gather Motion 3:1 (slice1; segments: 3) (cost=20.64..20.67 rows=1 width=24) |
| -> Partial Aggregate (cost=20.64..20.65 rows=1 width=24) |
| -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=20.50..20.59 rows=1 width=8) |
| Hash Key: a, b, (AggExprId) |
| -> Streaming HashAggregate (cost=20.50..20.53 rows=1 width=8) |
| Group Key: AggExprId, a, b |
| -> TupleSplit (cost=13.00..15.50 rows=667 width=8) |
| Split by Col: (a) FILTER (WHERE (a > 3)), (b) FILTER (WHERE (a > 4)) |
| -> Seq Scan on dqa_f1 (cost=0.00..13.00 rows=334 width=8) |
| Optimizer: Postgres query optimizer |
| (11 rows) |
| |
| -- single DQA with agg |
| -- the following SQL should use two stage agg |
| explain select count(distinct a), sum(b), sum(c) from dqa_f1; |
| QUERY PLAN |
| ------------------------------------------------------------------------------------- |
| Finalize Aggregate (cost=0.00..431.00 rows=1 width=24) |
| -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=24) |
| -> Partial Aggregate (cost=0.00..431.00 rows=1 width=24) |
| -> Seq Scan on dqa_f1 (cost=0.00..431.00 rows=1 width=12) |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (5 rows) |
| |
| select count(distinct a), sum(b), sum(c) from dqa_f1; |
| count | sum | sum |
| -------+------+------ |
| 17 | 2000 | 1000 |
| (1 row) |
| |
| -- multi DQA with primary key |
| create table dqa_unique(a int, b int, c int, d int, primary key(a, b)); |
| insert into dqa_unique select i%3, i%5, i%7, i%9 from generate_series(1, 10) i; |
| explain(verbose on, costs off) select count(distinct a), count(distinct d), c from dqa_unique group by a, b; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Query-to-DXL Translation: No attribute entry found due to incorrect normalization of query |
| QUERY PLAN |
| ------------------------------------------------------------------------------------------------ |
| Finalize HashAggregate |
| Output: count(a), count(d), c, a, b |
| Group Key: dqa_unique.a, dqa_unique.b |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| Output: a, b, (PARTIAL count(a)), (PARTIAL count(d)), c |
| -> Partial HashAggregate |
| Output: a, b, PARTIAL count(a), PARTIAL count(d), c |
| Group Key: dqa_unique.a, dqa_unique.b |
| -> HashAggregate |
| Output: a, b, d, c, (AggExprId) |
| Group Key: (AggExprId), dqa_unique.d, dqa_unique.a, dqa_unique.b |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Output: a, b, d, c, (AggExprId) |
| Hash Key: a, b, d, (AggExprId) |
| -> Streaming HashAggregate |
| Output: a, b, d, c, (AggExprId) |
| Group Key: AggExprId, dqa_unique.d, dqa_unique.a, dqa_unique.b |
| -> TupleSplit |
| Output: a, b, d, c, AggExprId |
| Split by Col: (dqa_unique.a), (dqa_unique.d) |
| Group Key: dqa_unique.a, dqa_unique.b |
| -> Seq Scan on public.dqa_unique |
| Output: a, b, c, d |
| Optimizer: Postgres query optimizer |
| Settings: enable_groupagg = 'off', gp_motion_cost_per_row = '1' |
| (25 rows) |
| |
| select count(distinct a), count(distinct d), c from dqa_unique group by a, b; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Query-to-DXL Translation: No attribute entry found due to incorrect normalization of query |
| count | count | c |
| -------+-------+--- |
| 1 | 1 | 5 |
| 1 | 1 | 6 |
| 1 | 1 | 0 |
| 1 | 1 | 2 |
| 1 | 1 | 1 |
| 1 | 1 | 1 |
| 1 | 1 | 2 |
| 1 | 1 | 4 |
| 1 | 1 | 3 |
| 1 | 1 | 3 |
| (10 rows) |
| |
| -- multi DQA with type conversions |
| create table dqa_f3(a character varying, b bigint) distributed by (a); |
| insert into dqa_f3 values ('123', 2), ('213', 0), ('231', 2), ('312', 0), ('321', 2), ('132', 1), ('4', 0); |
| -- Case 1: When converting the type of column 'a' from 'VARCHAR' to 'TEXT' in DQA expression, instead of generating a new column '(a)::text' |
| -- by TupleSplit, we can reference the column 'a' as part of hash-key in Redistribute-Motion directly, since the conversion is binary-compatible. |
| -- -> Redistribute Motion 3:3 (slice2; segments: 3) |
| -- Output: b, a, ((b)::text), (AggExprId) |
| -- Hash Key: ((b)::text), a, (AggExprId) |
| -- ... |
| -- -> TupleSplit |
| -- Output: b, a, ((b)::text), AggExprId |
| -- Split by Col: (((dqa_f3.b)::text)), (dqa_f3.a) |
| -- -> Seq Scan on public.dqa_f3 |
| -- Output: b, a, (b)::text |
| select count(distinct (b)::text) as b, count(distinct (a)::text) as a from dqa_f3; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| b | a |
| ---+--- |
| 3 | 7 |
| (1 row) |
| |
| explain (verbose, costs off) select count(distinct (b)::text) as b, count(distinct (a)::text) as a from dqa_f3; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| QUERY PLAN |
| ------------------------------------------------------------------------------------------------ |
| Finalize Aggregate |
| Output: count(DISTINCT ((b)::text)), count(DISTINCT (a)::text) |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| Output: (PARTIAL count(DISTINCT ((b)::text))), (PARTIAL count(DISTINCT (a)::text)) |
| -> Partial Aggregate |
| Output: PARTIAL count(DISTINCT ((b)::text)), PARTIAL count(DISTINCT (a)::text) |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Output: b, a, ((b)::text), (AggExprId) |
| Hash Key: ((b)::text), a, (AggExprId) |
| -> Streaming HashAggregate |
| Output: b, a, ((b)::text), (AggExprId) |
| Group Key: AggExprId, ((dqa_f3.b)::text), dqa_f3.a |
| -> TupleSplit |
| Output: b, a, ((b)::text), AggExprId |
| Split by Col: (((dqa_f3.b)::text)), (dqa_f3.a) |
| -> Seq Scan on public.dqa_f3 |
| Output: b, a, (b)::text |
| Optimizer: Postgres query optimizer |
| Settings: enable_groupagg = 'off', gp_motion_cost_per_row = '1' |
| (19 rows) |
| |
| -- Case 2: Same as the above one, but convert the type of column 'a' to 'varchar' via binary-compatible types. |
| select count(distinct (b)::text) as b, count(distinct (a)::text::varchar) as a from dqa_f3; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| b | a |
| ---+--- |
| 3 | 7 |
| (1 row) |
| |
| explain (verbose, costs off) select count(distinct (b)::text) as b, count(distinct (a)::text::varchar) as a from dqa_f3; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| QUERY PLAN |
| ----------------------------------------------------------------------------------------------------------- |
| Finalize Aggregate |
| Output: count(DISTINCT ((b)::text)), count(DISTINCT a) |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| Output: (PARTIAL count(DISTINCT ((b)::text))), (PARTIAL count(DISTINCT a)) |
| -> Partial Aggregate |
| Output: PARTIAL count(DISTINCT ((b)::text)), PARTIAL count(DISTINCT a) |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Output: b, a, ((b)::text), (AggExprId) |
| Hash Key: ((b)::text), a, (AggExprId) |
| -> Streaming HashAggregate |
| Output: b, a, ((b)::text), (AggExprId) |
| Group Key: AggExprId, ((dqa_f3.b)::text), dqa_f3.a |
| -> TupleSplit |
| Output: b, a, ((b)::text), AggExprId |
| Split by Col: (((dqa_f3.b)::text)), (dqa_f3.a) |
| -> Seq Scan on public.dqa_f3 |
| Output: b, a, (b)::text |
| Optimizer: Postgres query optimizer |
| Settings: enable_groupagg = 'off', gp_motion_cost_per_row = '1' |
| (19 rows) |
| |
| -- Case 3: When converting the type of column 'a' from 'varchar' to 'int' in DQA expression, TupleSplit should generate an additional |
| -- column '(a)::integer' as part of hash-key in Redistribute-Motion, since the conversion is not binary-compatible. |
| -- -> Redistribute Motion 3:3 (slice2; segments: 3) |
| -- Output: b, a, ((b)::text), ((a)::integer), (AggExprId) |
| -- Hash Key: ((b)::text), ((a)::integer), (AggExprId) |
| -- ... |
| -- -> TupleSplit |
| -- Output: b, a, ((b)::text), ((a)::integer), AggExprId |
| -- Split by Col: (((dqa_f3.b)::text)), (((dqa_f3.a)::integer)) |
| -- -> Seq Scan on public.dqa_f3 |
| -- Output: b, a, (b)::text, (a)::integer |
| select count(distinct (b)::text) as b, count(distinct (a)::int) as a from dqa_f3; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| b | a |
| ---+--- |
| 3 | 7 |
| (1 row) |
| |
| explain (verbose, costs off) select count(distinct (b)::text) as b, count(distinct (a)::int) as a from dqa_f3; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| QUERY PLAN |
| ----------------------------------------------------------------------- |
| Finalize Aggregate |
| Output: count(DISTINCT ((b)::text)), count(DISTINCT ((a)::integer)) |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| Output: (PARTIAL count(DISTINCT ((b)::text))), (PARTIAL count(DISTINCT ((a)::integer))) |
| -> Partial Aggregate |
| Output: PARTIAL count(DISTINCT ((b)::text)), PARTIAL count(DISTINCT ((a)::integer)) |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Output: b, a, ((b)::text), ((a)::integer), (AggExprId) |
| Hash Key: ((b)::text), ((a)::integer), (AggExprId) |
| -> Streaming HashAggregate |
| Output: b, a, ((b)::text), ((a)::integer), (AggExprId) |
| Group Key: AggExprId, ((dqa_f3.b)::text), ((dqa_f3.a)::integer) |
| -> TupleSplit |
| Output: b, a, ((b)::text), ((a)::integer), AggExprId |
| Split by Col: (((dqa_f3.b)::text)), (((dqa_f3.a)::integer)) |
| -> Seq Scan on public.dqa_f3 |
| Output: b, a, (b)::text, (a)::integer |
| Optimizer: Postgres query optimizer |
| Settings: enable_groupagg = 'off', gp_motion_cost_per_row = '1' |
| (19 rows) |
| |
| -- Case 4: When converting the type of column 'a' from 'varchar' to 'int' to 'varchar', TupleSplit should generate an additional |
| -- column '(a)::integer::varchar' as part of hash-key in Redistribute-Motion. |
| select count(distinct (b)::text) as b, count(distinct (a)::int::varchar) as a from dqa_f3; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| b | a |
| ---+--- |
| 3 | 7 |
| (1 row) |
| |
| explain (verbose, costs off) select count(distinct (b)::text) as b, count(distinct (a)::int::varchar) as a from dqa_f3; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| QUERY PLAN |
| -------------------------------------------------------------------------------------------- |
| Finalize Aggregate |
| Output: count(DISTINCT ((b)::text)), count(DISTINCT (((a)::integer)::character varying)) |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| Output: (PARTIAL count(DISTINCT ((b)::text))), (PARTIAL count(DISTINCT (((a)::integer)::character varying))) |
| -> Partial Aggregate |
| Output: PARTIAL count(DISTINCT ((b)::text)), PARTIAL count(DISTINCT (((a)::integer)::character varying)) |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Output: b, a, ((b)::text), (((a)::integer)::character varying), (AggExprId) |
| Hash Key: ((b)::text), (((a)::integer)::character varying), (AggExprId) |
| -> Streaming HashAggregate |
| Output: b, a, ((b)::text), (((a)::integer)::character varying), (AggExprId) |
| Group Key: AggExprId, ((dqa_f3.b)::text), (((dqa_f3.a)::integer)::character varying) |
| -> TupleSplit |
| Output: b, a, ((b)::text), (((a)::integer)::character varying), AggExprId |
| Split by Col: (((dqa_f3.b)::text)), ((((dqa_f3.a)::integer)::character varying)) |
| -> Seq Scan on public.dqa_f3 |
| Output: b, a, (b)::text, ((a)::integer)::character varying |
| Optimizer: Postgres query optimizer |
| Settings: enable_groupagg = 'off', gp_motion_cost_per_row = '1' |
| (19 rows) |
| |
| drop table dqa_f3; |
| -- Test 3-phase agg for DISTINCT on distribution keys |
| -- or DISTINCT when GROUP BY on distribution keys |
| create table t_issue_659(a int, b int) using ao_row; |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| insert into t_issue_659 select i from generate_series(1, 1000) i; |
| insert into t_issue_659 select * from t_issue_659; |
| insert into t_issue_659 select * from t_issue_659; |
| insert into t_issue_659 select * from t_issue_659; |
| insert into t_issue_659 select * from t_issue_659; |
| insert into t_issue_659 select * from t_issue_659; |
| insert into t_issue_659 select * from t_issue_659; |
| insert into t_issue_659 select * from t_issue_659; |
| insert into t_issue_659 select * from t_issue_659; |
| insert into t_issue_659 select * from t_issue_659; |
| insert into t_issue_659 select * from t_issue_659; |
| insert into t_issue_659 select * from t_issue_659; |
| insert into t_issue_659 select * from t_issue_659; |
| analyze t_issue_659; |
| explain(costs off) |
| select count(distinct a) from t_issue_659; |
| QUERY PLAN |
| ------------------------------------------------ |
| Finalize Aggregate |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> Partial Aggregate |
| -> Seq Scan on t_issue_659 |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (5 rows) |
| |
| select count(distinct a) from t_issue_659; |
| count |
| ------- |
| 1000 |
| (1 row) |
| |
| set gp_eager_distinct_dedup = on; |
| -- for ORCA |
| set optimizer_force_three_stage_scalar_dqa to on; |
| set optimizer_force_multistage_agg to on; |
| set optimizer_enable_use_distribution_in_dqa to on; |
| explain(costs off) |
| select count(distinct a) from t_issue_659; |
| QUERY PLAN |
| ------------------------------------------------- |
| Aggregate |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| -> HashAggregate |
| Group Key: a |
| -> Streaming HashAggregate |
| Group Key: a |
| -> Seq Scan on t_issue_659 |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (8 rows) |
| |
| select count(distinct a) from t_issue_659; |
| count |
| ------- |
| 1000 |
| (1 row) |
| |
| reset gp_eager_distinct_dedup; |
| reset optimizer_force_three_stage_scalar_dqa; |
| reset optimizer_enable_use_distribution_in_dqa; |
| drop table t_issue_659; |
| -- fix dqa bug when optimizer_force_multistage_agg is on |
| set optimizer_force_multistage_agg = on; |
| create table multiagg1(a int, b bigint, c int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| create table multiagg2(a int, b bigint, c numeric(8, 4)); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| insert into multiagg1 values(generate_series(1, 10), generate_series(1, 10), generate_series(1, 10)); |
| insert into multiagg2 values(generate_series(1, 10), generate_series(1, 10), 555.55); |
| analyze multiagg1; |
| analyze multiagg2; |
| explain (verbose, costs off) select count(distinct b), sum(c) from multiagg1; |
| QUERY PLAN |
| -------------------------------------------------------------------------------------------------------- |
| Finalize Aggregate |
| Output: count(b), sum(c) |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| Output: b, (PARTIAL sum(c)) |
| -> Partial HashAggregate |
| Output: b, PARTIAL sum(c) |
| Group Key: multiagg1.b |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Output: b, (PARTIAL sum(c)) |
| Hash Key: b |
| -> Streaming Partial HashAggregate |
| Output: b, PARTIAL sum(c) |
| Group Key: multiagg1.b |
| -> Seq Scan on public.multiagg1 |
| Output: b, c |
| Optimizer: Pivotal Optimizer (GPORCA) |
| Settings: enable_groupagg = 'off', gp_motion_cost_per_row = '1', optimizer_force_multistage_agg = 'on' |
| (17 rows) |
| |
| select count(distinct b), sum(c) from multiagg1; |
| count | sum |
| -------+----- |
| 10 | 55 |
| (1 row) |
| |
| explain (verbose, costs off) select count(distinct b), sum(c) from multiagg2; |
| QUERY PLAN |
| -------------------------------------------------------------------------------------------------------- |
| Finalize Aggregate |
| Output: count(b), sum(c) |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| Output: b, (PARTIAL sum(c)) |
| -> Partial HashAggregate |
| Output: b, PARTIAL sum(c) |
| Group Key: multiagg2.b |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Output: b, (PARTIAL sum(c)) |
| Hash Key: b |
| -> Streaming Partial HashAggregate |
| Output: b, PARTIAL sum(c) |
| Group Key: multiagg2.b |
| -> Seq Scan on public.multiagg2 |
| Output: b, c |
| Optimizer: Pivotal Optimizer (GPORCA) |
| Settings: enable_groupagg = 'off', gp_motion_cost_per_row = '1', optimizer_force_multistage_agg = 'on' |
| (17 rows) |
| |
| select count(distinct b), sum(c) from multiagg2; |
| count | sum |
| -------+----------- |
| 10 | 5555.5000 |
| (1 row) |
| |
| drop table multiagg1; |
| drop table multiagg2; |
| -- Support Multi-stage DQA with ride along aggregation in ORCA |
| -- Historically, Agg aggsplit is identically equal to Aggref aggsplit |
| -- In ORCA's attempt to support intermediate aggregation |
| -- The two are allowed to differ |
| -- Now Agg aggsplit is derived as bitwise OR of its children Aggref aggsplit |
| -- The plan is to eventually make Agg aggsplit a dummy |
| -- And use Aggref aggsplit to build trans/combine functions |
| set optimizer_force_multistage_agg=on; |
| create table num_table(id int, a bigint, b int, c numeric); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| insert into num_table values(1,1,1,1),(2,2,2,2),(3,3,3,3); |
| -- count(distinct a) is a simple aggregation |
| -- sum(b) is a split aggregation |
| -- Before the fix, in the final aggregation of sum(b) |
| -- the executor mistakenly built a trans func instead of a combine func |
| -- The trans func building process errored out due to mismatch between |
| -- the input type (int) and trans type (bigint), and caused missing plan |
| explain select count(distinct a), sum(b) from num_table; |
| QUERY PLAN |
| ------------------------------------------------------------------------------------------------------- |
| Finalize Aggregate (cost=0.00..431.00 rows=1 width=16) |
| -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=3 width=16) |
| -> Partial HashAggregate (cost=0.00..431.00 rows=1 width=16) |
| Group Key: a |
| -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=16) |
| Hash Key: a |
| -> Streaming Partial HashAggregate (cost=0.00..431.00 rows=1 width=16) |
| Group Key: a |
| -> Seq Scan on num_table (cost=0.00..431.00 rows=1 width=12) |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (10 rows) |
| |
| select count(distinct a), sum(b) from num_table; |
| count | sum |
| -------+----- |
| 3 | 6 |
| (1 row) |
| |
| explain select count(distinct a), sum(b) from num_table group by id; |
| QUERY PLAN |
| ------------------------------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=16) |
| -> Finalize GroupAggregate (cost=0.00..431.00 rows=1 width=16) |
| Group Key: id |
| -> Partial GroupAggregate (cost=0.00..431.00 rows=1 width=20) |
| Group Key: id, a |
| -> Sort (cost=0.00..431.00 rows=1 width=16) |
| Sort Key: id, a |
| -> Seq Scan on num_table (cost=0.00..431.00 rows=1 width=16) |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (9 rows) |
| |
| select count(distinct a), sum(b) from num_table group by id; |
| count | sum |
| -------+----- |
| 1 | 1 |
| 1 | 2 |
| 1 | 3 |
| (3 rows) |
| |
| -- count(distinct a) is a simple aggregation |
| -- sum(c) is a split aggregation |
| -- Before the fix, the final aggregation of sum(c) was mistakenly |
| -- treated as simple aggregation, and led to the missing |
| -- deserialization step in the aggregation execution prep |
| -- Numeric aggregation serializes partial aggregation states |
| -- The executor then evaluated the aggregation state without deserializing it first |
| -- This led to the creation of garbage NaN count, and caused NaN output |
| explain select count(distinct a), sum(c) from num_table; |
| QUERY PLAN |
| ------------------------------------------------------------------------------------------------------- |
| Finalize Aggregate (cost=0.00..431.00 rows=1 width=16) |
| -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=3 width=16) |
| -> Partial HashAggregate (cost=0.00..431.00 rows=1 width=16) |
| Group Key: a |
| -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=16) |
| Hash Key: a |
| -> Streaming Partial HashAggregate (cost=0.00..431.00 rows=1 width=16) |
| Group Key: a |
| -> Seq Scan on num_table (cost=0.00..431.00 rows=1 width=13) |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (10 rows) |
| |
| select count(distinct a), sum(c) from num_table; |
| count | sum |
| -------+----- |
| 3 | 6 |
| (1 row) |
| |
| explain select id, count(distinct a), avg(b), sum(c) from num_table group by grouping sets ((id,c)); |
| QUERY PLAN |
| ------------------------------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=28) |
| -> Finalize GroupAggregate (cost=0.00..431.00 rows=1 width=28) |
| Group Key: id, c |
| -> Partial GroupAggregate (cost=0.00..431.00 rows=1 width=36) |
| Group Key: id, c, a |
| -> Sort (cost=0.00..431.00 rows=1 width=24) |
| Sort Key: id, c, a |
| -> Seq Scan on num_table (cost=0.00..431.00 rows=1 width=24) |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (9 rows) |
| |
| select id, count(distinct a), avg(b), sum(c) from num_table group by grouping sets ((id,c)); |
| id | count | avg | sum |
| ----+-------+------------------------+----- |
| 1 | 1 | 1.00000000000000000000 | 1 |
| 2 | 1 | 2.0000000000000000 | 2 |
| 3 | 1 | 3.0000000000000000 | 3 |
| (3 rows) |
| |
| explain (verbose on, costs off) select count(distinct b) from num_table group by c; |
| QUERY PLAN |
| ----------------------------------------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Output: (count(DISTINCT b)) |
| -> Finalize HashAggregate |
| Output: count(DISTINCT b) |
| Group Key: num_table.c |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Output: c, (PARTIAL count(DISTINCT b)) |
| Hash Key: c |
| -> Partial GroupAggregate |
| Output: c, PARTIAL count(DISTINCT b) |
| Group Key: num_table.c |
| -> Sort |
| Output: b, c |
| Sort Key: num_table.c, num_table.b |
| -> Redistribute Motion 3:3 (slice3; segments: 3) |
| Output: b, c |
| Hash Key: c, b |
| -> Seq Scan on public.num_table |
| Output: b, c |
| Settings: enable_groupagg = 'off', enable_hashagg = 'on', enable_parallel = 'off', gp_motion_cost_per_row = '2', optimizer = 'on' |
| Optimizer: GPORCA |
| (21 rows) |
| |
| select count(distinct b) from num_table group by c; |
| count |
| ------- |
| 1 |
| 1 |
| 1 |
| (3 rows) |
| |
| reset optimizer_force_multistage_agg; |
| -- DQA with Agg(Intermediate Agg) |
| -- In PG optimizer Intermediate Agg have not support yet |
| -- Current test cases will be changed after commit 971fa82(Support intermediate Agg in planner (#13959)) |
| set enable_hashagg=on; |
| set enable_groupagg=off; |
| create table dqa_f3(a int, b int, c int, d int, e int ) distributed by (a); |
| insert into dqa_f3 select i % 17, i % 5 , i % 3, i %10, i % 7 from generate_series(1,1000) i; |
| analyze dqa_f3; |
| /* |
| * Test distinct or group by column is distributed key |
| * |
| * 1. If the input's locus matches the DISTINCT, but not GROUP BY: |
| * |
| * HashAggregate |
| * -> Redistribute (according to GROUP BY) |
| * -> HashAggregate (to eliminate duplicates) |
| * -> input (hashed by GROUP BY + DISTINCT) |
| * |
| * 2. If the input's locus matches the GROUP BY(don't care DISTINCT any more): |
| * |
| * HashAggregate (to aggregate) |
| * -> HashAggregate (to eliminate duplicates) |
| * -> input (hashed by GROUP BY) |
| * |
| */ |
| explain (verbose on, costs off)select sum(Distinct a), count(b), sum(c) from dqa_f3 group by e; |
| QUERY PLAN |
| -------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Output: (sum(a)), (count(b)), (sum(c)) |
| -> Finalize HashAggregate |
| Output: sum(a), count(b), sum(c) |
| Group Key: dqa_f3.e |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Output: a, e, (PARTIAL count(b)), (PARTIAL sum(c)) |
| Hash Key: e |
| -> Partial HashAggregate |
| Output: a, e, PARTIAL count(b), PARTIAL sum(c) |
| Group Key: dqa_f3.e, dqa_f3.a |
| -> Seq Scan on public.dqa_f3 |
| Output: a, b, c, e |
| Optimizer: Pivotal Optimizer (GPORCA) |
| Settings: enable_groupagg = 'off', gp_motion_cost_per_row = '1' |
| (15 rows) |
| |
| select sum(Distinct a), count(b), sum(c) from dqa_f3 group by e; |
| sum | count | sum |
| -----+-------+----- |
| 136 | 142 | 142 |
| 136 | 143 | 144 |
| 136 | 143 | 142 |
| 136 | 143 | 143 |
| 136 | 143 | 144 |
| 136 | 143 | 142 |
| 136 | 143 | 143 |
| (7 rows) |
| |
| explain (verbose on, costs off) select sum(Distinct e), count(b), sum(c) from dqa_f3 group by a; |
| QUERY PLAN |
| ----------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Output: (sum(e)), (count(b)), (sum(c)) |
| -> Finalize HashAggregate |
| Output: sum(e), count(b), sum(c) |
| Group Key: dqa_f3.a |
| -> Partial HashAggregate |
| Output: PARTIAL count(b), PARTIAL sum(c), a, e |
| Group Key: dqa_f3.a, dqa_f3.e |
| -> Seq Scan on public.dqa_f3 |
| Output: a, b, c, e |
| Optimizer: Pivotal Optimizer (GPORCA) |
| Settings: enable_groupagg = 'off', gp_motion_cost_per_row = '1' |
| (12 rows) |
| |
| select sum(Distinct e), count(b), sum(c) from dqa_f3 group by a; |
| sum | count | sum |
| -----+-------+----- |
| 21 | 58 | 57 |
| 21 | 58 | 59 |
| 21 | 59 | 58 |
| 21 | 59 | 59 |
| 21 | 59 | 59 |
| 21 | 59 | 58 |
| 21 | 59 | 60 |
| 21 | 59 | 60 |
| 21 | 58 | 58 |
| 21 | 59 | 58 |
| 21 | 59 | 60 |
| 21 | 59 | 58 |
| 21 | 59 | 58 |
| 21 | 59 | 59 |
| 21 | 59 | 60 |
| 21 | 59 | 59 |
| 21 | 59 | 60 |
| (17 rows) |
| |
| /* |
| * Test both distinct and group by column are not distributed key |
| * |
| * HashAgg (to aggregate) |
| * -> HashAgg (to eliminate duplicates) |
| * -> Redistribute (according to GROUP BY) |
| * -> Streaming HashAgg (to eliminate duplicates) |
| * -> input |
| * |
| */ |
| explain (verbose on, costs off) select sum(Distinct c), count(a), sum(d) from dqa_f3 group by b; |
| QUERY PLAN |
| -------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Output: (sum(c)), (count(a)), (sum(d)) |
| -> Finalize HashAggregate |
| Output: sum(c), count(a), sum(d) |
| Group Key: dqa_f3.b |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Output: b, c, (PARTIAL count(a)), (PARTIAL sum(d)) |
| Hash Key: b |
| -> Partial GroupAggregate |
| Output: b, c, PARTIAL count(a), PARTIAL sum(d) |
| Group Key: dqa_f3.b, dqa_f3.c |
| -> Sort |
| Output: b, c, (PARTIAL count(a)), (PARTIAL sum(d)) |
| Sort Key: dqa_f3.b, dqa_f3.c |
| -> Redistribute Motion 3:3 (slice3; segments: 3) |
| Output: b, c, (PARTIAL count(a)), (PARTIAL sum(d)) |
| Hash Key: b, c |
| -> Streaming Partial HashAggregate |
| Output: b, c, PARTIAL count(a), PARTIAL sum(d) |
| Group Key: dqa_f3.b, dqa_f3.c |
| -> Seq Scan on public.dqa_f3 |
| Output: a, b, c, d |
| Optimizer: Pivotal Optimizer (GPORCA) |
| Settings: enable_groupagg = 'off', gp_motion_cost_per_row = '1' |
| (24 rows) |
| |
| select sum(Distinct c), count(a), sum(d) from dqa_f3 group by b; |
| sum | count | sum |
| -----+-------+------ |
| 3 | 200 | 1100 |
| 3 | 200 | 900 |
| 3 | 200 | 1300 |
| 3 | 200 | 500 |
| 3 | 200 | 700 |
| (5 rows) |
| |
| explain (verbose on, costs off) select sum(Distinct c), count(a), sum(d) from dqa_f3 group by b order by b; |
| QUERY PLAN |
| -------------------------------------------------------------------------------------------------- |
| Result |
| Output: (sum(c)), (count(a)), (sum(d)) |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| Output: (sum(c)), (count(a)), (sum(d)), b |
| Merge Key: b |
| -> Finalize GroupAggregate |
| Output: sum(c), count(a), sum(d), b |
| Group Key: dqa_f3.b |
| -> Sort |
| Output: b, c, (PARTIAL count(a)), (PARTIAL sum(d)) |
| Sort Key: dqa_f3.b |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Output: b, c, (PARTIAL count(a)), (PARTIAL sum(d)) |
| Hash Key: b |
| -> Partial GroupAggregate |
| Output: b, c, PARTIAL count(a), PARTIAL sum(d) |
| Group Key: dqa_f3.b, dqa_f3.c |
| -> Sort |
| Output: b, c, (PARTIAL count(a)), (PARTIAL sum(d)) |
| Sort Key: dqa_f3.b, dqa_f3.c |
| -> Redistribute Motion 3:3 (slice3; segments: 3) |
| Output: b, c, (PARTIAL count(a)), (PARTIAL sum(d)) |
| Hash Key: b, c |
| -> Streaming Partial HashAggregate |
| Output: b, c, PARTIAL count(a), PARTIAL sum(d) |
| Group Key: dqa_f3.b, dqa_f3.c |
| -> Seq Scan on public.dqa_f3 |
| Output: a, b, c, d |
| Optimizer: Pivotal Optimizer (GPORCA) |
| Settings: enable_groupagg = 'off', gp_motion_cost_per_row = '1' |
| (30 rows) |
| |
| select sum(Distinct c), count(a), sum(d) from dqa_f3 group by b order by b; |
| sum | count | sum |
| -----+-------+------ |
| 3 | 200 | 500 |
| 3 | 200 | 700 |
| 3 | 200 | 900 |
| 3 | 200 | 1100 |
| 3 | 200 | 1300 |
| (5 rows) |
| |
| explain (verbose on, costs off) select distinct sum(Distinct c), count(a), sum(d) from dqa_f3 group by b; |
| QUERY PLAN |
| ----------------------------------------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Output: (sum(c)), (count(a)), (sum(d)) |
| -> GroupAggregate |
| Output: (sum(c)), (count(a)), (sum(d)) |
| Group Key: (sum(dqa_f3.c)), (count(dqa_f3.a)), (sum(dqa_f3.d)) |
| -> Sort |
| Output: (sum(c)), (count(a)), (sum(d)) |
| Sort Key: (sum(dqa_f3.c)), (count(dqa_f3.a)), (sum(dqa_f3.d)) |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Output: (sum(c)), (count(a)), (sum(d)) |
| Hash Key: (sum(c)), (count(a)), (sum(d)) |
| -> Finalize HashAggregate |
| Output: sum(c), count(a), sum(d) |
| Group Key: dqa_f3.b |
| -> Redistribute Motion 3:3 (slice3; segments: 3) |
| Output: b, c, (PARTIAL count(a)), (PARTIAL sum(d)) |
| Hash Key: b |
| -> Partial GroupAggregate |
| Output: b, c, PARTIAL count(a), PARTIAL sum(d) |
| Group Key: dqa_f3.b, dqa_f3.c |
| -> Sort |
| Output: b, c, (PARTIAL count(a)), (PARTIAL sum(d)) |
| Sort Key: dqa_f3.b, dqa_f3.c |
| -> Redistribute Motion 3:3 (slice4; segments: 3) |
| Output: b, c, (PARTIAL count(a)), (PARTIAL sum(d)) |
| Hash Key: b, c |
| -> Streaming Partial HashAggregate |
| Output: b, c, PARTIAL count(a), PARTIAL sum(d) |
| Group Key: dqa_f3.b, dqa_f3.c |
| -> Seq Scan on public.dqa_f3 |
| Output: a, b, c, d |
| Settings: enable_groupagg = 'off', enable_hashagg = 'on', enable_parallel = 'off', gp_motion_cost_per_row = '2', optimizer = 'on' |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (33 rows) |
| |
| select distinct sum(Distinct c), count(a), sum(d) from dqa_f3 group by b; |
| sum | count | sum |
| -----+-------+------ |
| 3 | 200 | 700 |
| 3 | 200 | 900 |
| 3 | 200 | 500 |
| 3 | 200 | 1300 |
| 3 | 200 | 1100 |
| (5 rows) |
| |
| explain (verbose on, costs off) select sum(Distinct c), count(a), sum(d) from dqa_f3 group by b having avg(e) > 3; |
| QUERY PLAN |
| ------------------------------------------------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| Output: (sum(c)), (count(a)), (sum(d)) |
| -> Result |
| Output: (sum(c)), (count(a)), (sum(d)) |
| Filter: ((avg(dqa_f3.e)) > '3'::numeric) |
| -> Finalize HashAggregate |
| Output: sum(c), count(a), sum(d), avg(e), b |
| Group Key: dqa_f3.b |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Output: b, c, (PARTIAL count(a)), (PARTIAL sum(d)), (PARTIAL avg(e)) |
| Hash Key: b |
| -> Partial HashAggregate |
| Output: b, c, PARTIAL count(a), PARTIAL sum(d), PARTIAL avg(e) |
| Group Key: dqa_f3.b, dqa_f3.c |
| -> Redistribute Motion 3:3 (slice3; segments: 3) |
| Output: b, c, (PARTIAL count(a)), (PARTIAL sum(d)), (PARTIAL avg(e)) |
| Hash Key: b, c |
| -> Streaming Partial HashAggregate |
| Output: b, c, PARTIAL count(a), PARTIAL sum(d), PARTIAL avg(e) |
| Group Key: dqa_f3.b, dqa_f3.c |
| -> Seq Scan on public.dqa_f3 |
| Output: a, b, c, d, e |
| Optimizer: Pivotal Optimizer (GPORCA) |
| Settings: enable_groupagg = 'off', gp_motion_cost_per_row = '1' |
| (24 rows) |
| |
| select sum(Distinct c), count(a), sum(d) from dqa_f3 group by b having avg(e) > 3; |
| sum | count | sum |
| -----+-------+------ |
| 3 | 200 | 500 |
| 3 | 200 | 700 |
| 3 | 200 | 1100 |
| (3 rows) |
| |
| -- flaky tests |
| -- explain (verbose on, costs off) |
| -- select sum(Distinct sub.c), count(a), sum(d) |
| -- from dqa_f3 left join(select x, coalesce(y, 5) as c from dqa_f2) as sub |
| -- on sub.x = dqa_f3.e group by b; |
| -- select sum(Distinct sub.c), count(a), sum(d) |
| -- from dqa_f3 left join(select x, coalesce(y, 5) as c from dqa_f2) as sub |
| -- on sub.x = dqa_f3.e group by b; |
| -- Test gp_enable_agg_distinct_pruning is off on this branch |
| set gp_enable_agg_distinct_pruning = off; |
| explain (verbose on, costs off) select sum(Distinct c), count(a), sum(d) from dqa_f3 group by b; |
| QUERY PLAN |
| --------------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Output: (sum(c)), (count(a)), (sum(d)) |
| -> Finalize HashAggregate |
| Output: sum(c), count(a), sum(d) |
| Group Key: dqa_f3.b |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Output: b, c, (PARTIAL count(a)), (PARTIAL sum(d)) |
| Hash Key: b |
| -> Partial GroupAggregate |
| Output: b, c, PARTIAL count(a), PARTIAL sum(d) |
| Group Key: dqa_f3.b, dqa_f3.c |
| -> Sort |
| Output: b, c, (PARTIAL count(a)), (PARTIAL sum(d)) |
| Sort Key: dqa_f3.b, dqa_f3.c |
| -> Redistribute Motion 3:3 (slice3; segments: 3) |
| Output: b, c, (PARTIAL count(a)), (PARTIAL sum(d)) |
| Hash Key: b, c |
| -> Streaming Partial HashAggregate |
| Output: b, c, PARTIAL count(a), PARTIAL sum(d) |
| Group Key: dqa_f3.b, dqa_f3.c |
| -> Seq Scan on public.dqa_f3 |
| Output: a, b, c, d |
| Optimizer: Pivotal Optimizer (GPORCA) |
| Settings: enable_groupagg = 'off', gp_enable_agg_distinct_pruning = 'off', gp_motion_cost_per_row = '1' |
| (24 rows) |
| |
| select sum(Distinct c), count(a), sum(d) from dqa_f3 group by b; |
| sum | count | sum |
| -----+-------+------ |
| 3 | 200 | 1100 |
| 3 | 200 | 900 |
| 3 | 200 | 1300 |
| 3 | 200 | 500 |
| 3 | 200 | 700 |
| (5 rows) |
| |
| reset gp_enable_agg_distinct_pruning; |
| /* |
| * Test multistage through Gather Motion(grouplocus cannot hashed or not exist) |
| * |
| * Finalize Aggregate |
| * -> Gather Motion |
| * -> Partial Aggregate |
| * -> HashAggregate, to remove duplicates |
| * -> Redistribute Motion (according to DISTINCT arg) |
| * -> Streaming HashAgg (to eliminate duplicates) |
| * -> input |
| */ |
| explain (verbose on, costs off) select sum(Distinct b), count(c), sum(a) from dqa_f3; |
| QUERY PLAN |
| ----------------------------------------------------------------------- |
| Finalize Aggregate |
| Output: sum(b), count(c), sum(a) |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| Output: b, (PARTIAL count(c)), (PARTIAL sum(a)) |
| -> Partial HashAggregate |
| Output: b, PARTIAL count(c), PARTIAL sum(a) |
| Group Key: dqa_f3.b |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Output: b, (PARTIAL count(c)), (PARTIAL sum(a)) |
| Hash Key: b |
| -> Streaming Partial HashAggregate |
| Output: b, PARTIAL count(c), PARTIAL sum(a) |
| Group Key: dqa_f3.b |
| -> Seq Scan on public.dqa_f3 |
| Output: a, b, c |
| Optimizer: Pivotal Optimizer (GPORCA) |
| Settings: enable_groupagg = 'off', gp_motion_cost_per_row = '1' |
| (17 rows) |
| |
| select sum(Distinct b), count(c), sum(a) from dqa_f3; |
| sum | count | sum |
| -----+-------+------ |
| 10 | 1000 | 7993 |
| (1 row) |
| |
| explain (verbose on, costs off) select distinct sum(Distinct b), count(c), sum(a) from dqa_f3; |
| QUERY PLAN |
| ----------------------------------------------------------------------- |
| Finalize Aggregate |
| Output: sum(b), count(c), sum(a) |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| Output: b, (PARTIAL count(c)), (PARTIAL sum(a)) |
| -> Partial HashAggregate |
| Output: b, PARTIAL count(c), PARTIAL sum(a) |
| Group Key: dqa_f3.b |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Output: b, (PARTIAL count(c)), (PARTIAL sum(a)) |
| Hash Key: b |
| -> Streaming Partial HashAggregate |
| Output: b, PARTIAL count(c), PARTIAL sum(a) |
| Group Key: dqa_f3.b |
| -> Seq Scan on public.dqa_f3 |
| Output: a, b, c |
| Optimizer: Pivotal Optimizer (GPORCA) |
| Settings: enable_groupagg = 'off', gp_motion_cost_per_row = '1' |
| (17 rows) |
| |
| select distinct sum(Distinct b), count(c), sum(a) from dqa_f3; |
| sum | count | sum |
| -----+-------+------ |
| 10 | 1000 | 7993 |
| (1 row) |
| |
| explain (verbose on, costs off) select sum(Distinct b), count(c) filter(where c > 1), sum(a) from dqa_f3; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER |
| QUERY PLAN |
| ---------------------------------------------------------------------------------------------- |
| Finalize Aggregate |
| Output: sum(DISTINCT b), count(c) FILTER (WHERE (c > 1)), sum(a) |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| Output: (PARTIAL sum(DISTINCT b)), (PARTIAL count(c) FILTER (WHERE (c > 1))), (PARTIAL sum(a)) |
| -> Partial Aggregate |
| Output: PARTIAL sum(DISTINCT b), PARTIAL count(c) FILTER (WHERE (c > 1)), PARTIAL sum(a) |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Output: b, c, a |
| Hash Key: b |
| -> Seq Scan on public.dqa_f3 |
| Output: b, c, a |
| Settings: enable_groupagg = 'off', enable_hashagg = 'on', enable_parallel = 'off', gp_motion_cost_per_row = '2', optimizer = 'on' |
| Optimizer: Postgres query optimizer |
| (13 rows) |
| |
| select sum(Distinct b), count(c) filter(where c > 1), sum(a) from dqa_f3; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Aggregate functions with FILTER |
| sum | count | sum |
| -----+-------+------ |
| 10 | 333 | 7993 |
| (1 row) |
| |
| drop table dqa_f3; |
| -- Test some corner case of dqa ex.NULL |
| create table dqa_f4(a int, b int, c int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| insert into dqa_f4 values(null, null, null); |
| insert into dqa_f4 values(1, 1, 1); |
| insert into dqa_f4 values(2, 2, 2); |
| select count(distinct a), count(distinct b) from dqa_f4 group by c; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| count | count |
| -------+------- |
| 0 | 0 |
| 1 | 1 |
| 1 | 1 |
| (3 rows) |
| |
| set optimizer_enable_multiple_distinct_aggs=on; |
| explain (verbose on, costs off) select count(distinct a), count(distinct b) from dqa_f4 group by c; |
| QUERY PLAN |
| --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Output: (count(DISTINCT share0_ref3.a)), (count(DISTINCT share0_ref2.b)) |
| -> Sequence |
| Output: (count(DISTINCT share0_ref3.a)), (count(DISTINCT share0_ref2.b)) |
| -> Shared Scan (share slice:id 1:0) |
| Output: share0_ref1.a, share0_ref1.b, share0_ref1.c, share0_ref1.ctid, share0_ref1.xmin, share0_ref1.cmin, share0_ref1.xmax, share0_ref1.cmax, share0_ref1.tableoid, share0_ref1.gp_segment_id, share0_ref1.gp_foreign_server |
| -> Seq Scan on public.dqa_f4 |
| Output: dqa_f4.a, dqa_f4.b, dqa_f4.c, dqa_f4.ctid, dqa_f4.xmin, dqa_f4.cmin, dqa_f4.xmax, dqa_f4.cmax, dqa_f4.tableoid, dqa_f4.gp_segment_id, dqa_f4.gp_foreign_server |
| -> Hash Join |
| Output: (count(DISTINCT share0_ref3.a)), (count(DISTINCT share0_ref2.b)) |
| Hash Cond: (NOT (share0_ref3.c IS DISTINCT FROM share0_ref2.c)) |
| -> GroupAggregate |
| Output: count(DISTINCT share0_ref3.a), share0_ref3.c |
| Group Key: share0_ref3.c |
| -> Sort |
| Output: share0_ref3.a, share0_ref3.c |
| Sort Key: share0_ref3.c |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Output: share0_ref3.a, share0_ref3.c |
| Hash Key: share0_ref3.c |
| -> Result |
| Output: share0_ref3.a, share0_ref3.c |
| -> Shared Scan (share slice:id 2:0) |
| Output: share0_ref3.a, share0_ref3.b, share0_ref3.c, share0_ref3.ctid, share0_ref3.xmin, share0_ref3.cmin, share0_ref3.xmax, share0_ref3.cmax, share0_ref3.tableoid, share0_ref3.gp_segment_id, share0_ref3.gp_foreign_server |
| -> Hash |
| Output: (count(DISTINCT share0_ref2.b)), share0_ref2.c |
| -> GroupAggregate |
| Output: count(DISTINCT share0_ref2.b), share0_ref2.c |
| Group Key: share0_ref2.c |
| -> Sort |
| Output: share0_ref2.b, share0_ref2.c |
| Sort Key: share0_ref2.c |
| -> Redistribute Motion 3:3 (slice3; segments: 3) |
| Output: share0_ref2.b, share0_ref2.c |
| Hash Key: share0_ref2.c |
| -> Result |
| Output: share0_ref2.b, share0_ref2.c |
| -> Shared Scan (share slice:id 3:0) |
| Output: share0_ref2.a, share0_ref2.b, share0_ref2.c, share0_ref2.ctid, share0_ref2.xmin, share0_ref2.cmin, share0_ref2.xmax, share0_ref2.cmax, share0_ref2.tableoid, share0_ref2.gp_segment_id, share0_ref2.gp_foreign_server |
| Settings: enable_groupagg = 'off', enable_hashagg = 'on', enable_parallel = 'off', gp_motion_cost_per_row = '2', optimizer = 'on' |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (41 rows) |
| |
| select count(distinct a), count(distinct b) from dqa_f4 group by c; |
| count | count |
| -------+------- |
| 1 | 1 |
| 1 | 1 |
| 0 | 0 |
| (3 rows) |
| |
| reset optimizer_enable_multiple_distinct_aggs; |
| drop table dqa_f4; |