| -- |
| -- PARTITION_AGGREGATE |
| -- Test partitionwise aggregation on partitioned tables |
| -- |
| -- Note: to ensure plan stability, it's a good idea to make the partitions of |
| -- any one partitioned table in this test all have different numbers of rows. |
| -- |
| -- Disable ORCA since it does support partition-wise aggregates |
| set optimizer to off; |
| -- Enable partitionwise aggregate, which by default is disabled. |
| SET enable_partitionwise_aggregate TO true; |
| -- Enable partitionwise join, which by default is disabled. |
| SET enable_partitionwise_join TO true; |
| -- Disable parallel plans. |
| SET max_parallel_workers_per_gather TO 0; |
| -- Disable incremental sort, which can influence selected plans due to fuzz factor. |
| SET enable_incremental_sort TO off; |
| -- |
| -- Tests for list partitioned tables. |
| -- |
| CREATE TABLE pagg_tab (a int, b int, c text, d int) PARTITION BY LIST(c); |
| CREATE TABLE pagg_tab_p1 PARTITION OF pagg_tab FOR VALUES IN ('0000', '0001', '0002', '0003', '0004'); |
| CREATE TABLE pagg_tab_p2 PARTITION OF pagg_tab FOR VALUES IN ('0005', '0006', '0007', '0008'); |
| CREATE TABLE pagg_tab_p3 PARTITION OF pagg_tab FOR VALUES IN ('0009', '0010', '0011'); |
| INSERT INTO pagg_tab SELECT i % 20, i % 30, to_char(i % 12, 'FM0000'), i % 30 FROM generate_series(0, 2999) i; |
| ANALYZE pagg_tab; |
| -- When GROUP BY clause matches; full aggregation is performed for each partition. |
| EXPLAIN (COSTS OFF) |
| SELECT c, sum(a), avg(b), count(*), min(a), max(b) FROM pagg_tab GROUP BY c HAVING avg(d) < 15 ORDER BY 1, 2, 3; |
| QUERY PLAN |
| ----------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Merge Key: pagg_tab.c, (sum(pagg_tab.a)), (avg(pagg_tab.b)) |
| -> Sort |
| Sort Key: pagg_tab.c, (sum(pagg_tab.a)), (avg(pagg_tab.b)) |
| -> Append |
| -> Finalize HashAggregate |
| Group Key: pagg_tab.c |
| Filter: (avg(pagg_tab.d) < '15'::numeric) |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: pagg_tab.c |
| -> Partial HashAggregate |
| Group Key: pagg_tab.c |
| -> Seq Scan on pagg_tab_p1 pagg_tab |
| -> Finalize HashAggregate |
| Group Key: pagg_tab_1.c |
| Filter: (avg(pagg_tab_1.d) < '15'::numeric) |
| -> Redistribute Motion 3:3 (slice3; segments: 3) |
| Hash Key: pagg_tab_1.c |
| -> Partial HashAggregate |
| Group Key: pagg_tab_1.c |
| -> Seq Scan on pagg_tab_p2 pagg_tab_1 |
| -> Finalize GroupAggregate |
| Group Key: pagg_tab_2.c |
| Filter: (avg(pagg_tab_2.d) < '15'::numeric) |
| -> Sort |
| Sort Key: pagg_tab_2.c |
| -> Redistribute Motion 3:3 (slice4; segments: 3) |
| Hash Key: pagg_tab_2.c |
| -> Partial HashAggregate |
| Group Key: pagg_tab_2.c |
| -> Seq Scan on pagg_tab_p3 pagg_tab_2 |
| Optimizer: Postgres query optimizer |
| (30 rows) |
| |
| SELECT c, sum(a), avg(b), count(*), min(a), max(b) FROM pagg_tab GROUP BY c HAVING avg(d) < 15 ORDER BY 1, 2, 3; |
| c | sum | avg | count | min | max |
| ------+------+---------------------+-------+-----+----- |
| 0000 | 2000 | 12.0000000000000000 | 250 | 0 | 24 |
| 0001 | 2250 | 13.0000000000000000 | 250 | 1 | 25 |
| 0002 | 2500 | 14.0000000000000000 | 250 | 2 | 26 |
| 0006 | 2500 | 12.0000000000000000 | 250 | 2 | 24 |
| 0007 | 2750 | 13.0000000000000000 | 250 | 3 | 25 |
| 0008 | 2000 | 14.0000000000000000 | 250 | 0 | 26 |
| (6 rows) |
| |
| -- When GROUP BY clause does not match; partial aggregation is performed for each partition. |
| EXPLAIN (COSTS OFF) |
| SELECT a, sum(b), avg(b), count(*), min(a), max(b) FROM pagg_tab GROUP BY a HAVING avg(d) < 15 ORDER BY 1, 2, 3; |
| QUERY PLAN |
| ----------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Merge Key: pagg_tab.a, (sum(pagg_tab.b)), (avg(pagg_tab.b)) |
| -> Sort |
| Sort Key: pagg_tab.a, (sum(pagg_tab.b)), (avg(pagg_tab.b)) |
| -> Finalize HashAggregate |
| Group Key: pagg_tab.a |
| Filter: (avg(pagg_tab.d) < '15'::numeric) |
| -> Append |
| -> Partial HashAggregate |
| Group Key: pagg_tab.a |
| -> Seq Scan on pagg_tab_p1 pagg_tab |
| -> Partial HashAggregate |
| Group Key: pagg_tab_1.a |
| -> Seq Scan on pagg_tab_p2 pagg_tab_1 |
| -> Partial HashAggregate |
| Group Key: pagg_tab_2.a |
| -> Seq Scan on pagg_tab_p3 pagg_tab_2 |
| Optimizer: Postgres query optimizer |
| (18 rows) |
| |
| SELECT a, sum(b), avg(b), count(*), min(a), max(b) FROM pagg_tab GROUP BY a HAVING avg(d) < 15 ORDER BY 1, 2, 3; |
| a | sum | avg | count | min | max |
| ----+------+---------------------+-------+-----+----- |
| 0 | 1500 | 10.0000000000000000 | 150 | 0 | 20 |
| 1 | 1650 | 11.0000000000000000 | 150 | 1 | 21 |
| 2 | 1800 | 12.0000000000000000 | 150 | 2 | 22 |
| 3 | 1950 | 13.0000000000000000 | 150 | 3 | 23 |
| 4 | 2100 | 14.0000000000000000 | 150 | 4 | 24 |
| 10 | 1500 | 10.0000000000000000 | 150 | 10 | 20 |
| 11 | 1650 | 11.0000000000000000 | 150 | 11 | 21 |
| 12 | 1800 | 12.0000000000000000 | 150 | 12 | 22 |
| 13 | 1950 | 13.0000000000000000 | 150 | 13 | 23 |
| 14 | 2100 | 14.0000000000000000 | 150 | 14 | 24 |
| (10 rows) |
| |
| -- Check with multiple columns in GROUP BY |
| EXPLAIN (COSTS OFF) |
| SELECT a, c, count(*) FROM pagg_tab GROUP BY a, c; |
| QUERY PLAN |
| ------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Append |
| -> HashAggregate |
| Group Key: pagg_tab.a, pagg_tab.c |
| -> Seq Scan on pagg_tab_p1 pagg_tab |
| -> HashAggregate |
| Group Key: pagg_tab_1.a, pagg_tab_1.c |
| -> Seq Scan on pagg_tab_p2 pagg_tab_1 |
| -> HashAggregate |
| Group Key: pagg_tab_2.a, pagg_tab_2.c |
| -> Seq Scan on pagg_tab_p3 pagg_tab_2 |
| Optimizer: Postgres query optimizer |
| (12 rows) |
| |
| -- Check with multiple columns in GROUP BY, order in GROUP BY is reversed |
| EXPLAIN (COSTS OFF) |
| SELECT a, c, count(*) FROM pagg_tab GROUP BY c, a; |
| QUERY PLAN |
| ------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Append |
| -> HashAggregate |
| Group Key: pagg_tab.c, pagg_tab.a |
| -> Seq Scan on pagg_tab_p1 pagg_tab |
| -> HashAggregate |
| Group Key: pagg_tab_1.c, pagg_tab_1.a |
| -> Seq Scan on pagg_tab_p2 pagg_tab_1 |
| -> HashAggregate |
| Group Key: pagg_tab_2.c, pagg_tab_2.a |
| -> Seq Scan on pagg_tab_p3 pagg_tab_2 |
| Optimizer: Postgres query optimizer |
| (12 rows) |
| |
| -- Check with multiple columns in GROUP BY, order in target-list is reversed |
| EXPLAIN (COSTS OFF) |
| SELECT c, a, count(*) FROM pagg_tab GROUP BY a, c; |
| QUERY PLAN |
| ------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Append |
| -> HashAggregate |
| Group Key: pagg_tab.a, pagg_tab.c |
| -> Seq Scan on pagg_tab_p1 pagg_tab |
| -> HashAggregate |
| Group Key: pagg_tab_1.a, pagg_tab_1.c |
| -> Seq Scan on pagg_tab_p2 pagg_tab_1 |
| -> HashAggregate |
| Group Key: pagg_tab_2.a, pagg_tab_2.c |
| -> Seq Scan on pagg_tab_p3 pagg_tab_2 |
| Optimizer: Postgres query optimizer |
| (12 rows) |
| |
| -- Test when input relation for grouping is dummy |
| EXPLAIN (COSTS OFF) |
| SELECT c, sum(a) FROM pagg_tab WHERE 1 = 2 GROUP BY c; |
| QUERY PLAN |
| -------------------------------- |
| HashAggregate |
| Group Key: c |
| -> Result |
| One-Time Filter: false |
| (4 rows) |
| |
| SELECT c, sum(a) FROM pagg_tab WHERE 1 = 2 GROUP BY c; |
| c | sum |
| ---+----- |
| (0 rows) |
| |
| EXPLAIN (COSTS OFF) |
| SELECT c, sum(a) FROM pagg_tab WHERE c = 'x' GROUP BY c; |
| QUERY PLAN |
| -------------------------------- |
| GroupAggregate |
| -> Result |
| One-Time Filter: false |
| (3 rows) |
| |
| SELECT c, sum(a) FROM pagg_tab WHERE c = 'x' GROUP BY c; |
| c | sum |
| ---+----- |
| (0 rows) |
| |
| -- Test GroupAggregate paths by disabling hash aggregates. |
| SET enable_hashagg TO false; |
| -- When GROUP BY clause matches full aggregation is performed for each partition. |
| EXPLAIN (COSTS OFF) |
| SELECT c, sum(a), avg(b), count(*) FROM pagg_tab GROUP BY 1 HAVING avg(d) < 15 ORDER BY 1, 2, 3; |
| QUERY PLAN |
| -------------------------------------------------------------- |
| Sort |
| Sort Key: pagg_tab.c, (sum(pagg_tab.a)), (avg(pagg_tab.b)) |
| -> Append |
| -> Finalize GroupAggregate |
| Group Key: pagg_tab.c |
| Filter: (avg(pagg_tab.d) < '15'::numeric) |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| Merge Key: pagg_tab.c |
| -> Partial GroupAggregate |
| Group Key: pagg_tab.c |
| -> Sort |
| Sort Key: pagg_tab.c |
| -> Seq Scan on pagg_tab_p1 pagg_tab |
| -> Finalize GroupAggregate |
| Group Key: pagg_tab_1.c |
| Filter: (avg(pagg_tab_1.d) < '15'::numeric) |
| -> Gather Motion 3:1 (slice2; segments: 3) |
| Merge Key: pagg_tab_1.c |
| -> Partial GroupAggregate |
| Group Key: pagg_tab_1.c |
| -> Sort |
| Sort Key: pagg_tab_1.c |
| -> Seq Scan on pagg_tab_p2 pagg_tab_1 |
| -> Finalize GroupAggregate |
| Group Key: pagg_tab_2.c |
| Filter: (avg(pagg_tab_2.d) < '15'::numeric) |
| -> Gather Motion 3:1 (slice3; segments: 3) |
| Merge Key: pagg_tab_2.c |
| -> Partial GroupAggregate |
| Group Key: pagg_tab_2.c |
| -> Sort |
| Sort Key: pagg_tab_2.c |
| -> Seq Scan on pagg_tab_p3 pagg_tab_2 |
| Optimizer: Postgres query optimizer |
| (34 rows) |
| |
| SELECT c, sum(a), avg(b), count(*) FROM pagg_tab GROUP BY 1 HAVING avg(d) < 15 ORDER BY 1, 2, 3; |
| c | sum | avg | count |
| ------+------+---------------------+------- |
| 0000 | 2000 | 12.0000000000000000 | 250 |
| 0001 | 2250 | 13.0000000000000000 | 250 |
| 0002 | 2500 | 14.0000000000000000 | 250 |
| 0006 | 2500 | 12.0000000000000000 | 250 |
| 0007 | 2750 | 13.0000000000000000 | 250 |
| 0008 | 2000 | 14.0000000000000000 | 250 |
| (6 rows) |
| |
| -- When GROUP BY clause does not match; partial aggregation is performed for each partition. |
| EXPLAIN (COSTS OFF) |
| SELECT a, sum(b), avg(b), count(*) FROM pagg_tab GROUP BY 1 HAVING avg(d) < 15 ORDER BY 1, 2, 3; |
| QUERY PLAN |
| ------------------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| Merge Key: pagg_tab.a, (sum(pagg_tab.b)), (avg(pagg_tab.b)) |
| -> Sort |
| Sort Key: pagg_tab.a, (sum(pagg_tab.b)), (avg(pagg_tab.b)) |
| -> Finalize GroupAggregate |
| Group Key: pagg_tab.a |
| Filter: (avg(pagg_tab.d) < '15'::numeric) |
| -> Merge Append |
| Sort Key: pagg_tab.a |
| -> Partial GroupAggregate |
| Group Key: pagg_tab.a |
| -> Sort |
| Sort Key: pagg_tab.a |
| -> Seq Scan on pagg_tab_p1 pagg_tab |
| -> Partial GroupAggregate |
| Group Key: pagg_tab_1.a |
| -> Sort |
| Sort Key: pagg_tab_1.a |
| -> Seq Scan on pagg_tab_p2 pagg_tab_1 |
| -> Partial GroupAggregate |
| Group Key: pagg_tab_2.a |
| -> Sort |
| Sort Key: pagg_tab_2.a |
| -> Seq Scan on pagg_tab_p3 pagg_tab_2 |
| Optimizer: Postgres query optimizer |
| (25 rows) |
| |
| SELECT a, sum(b), avg(b), count(*) FROM pagg_tab GROUP BY 1 HAVING avg(d) < 15 ORDER BY 1, 2, 3; |
| a | sum | avg | count |
| ----+------+---------------------+------- |
| 0 | 1500 | 10.0000000000000000 | 150 |
| 1 | 1650 | 11.0000000000000000 | 150 |
| 2 | 1800 | 12.0000000000000000 | 150 |
| 3 | 1950 | 13.0000000000000000 | 150 |
| 4 | 2100 | 14.0000000000000000 | 150 |
| 10 | 1500 | 10.0000000000000000 | 150 |
| 11 | 1650 | 11.0000000000000000 | 150 |
| 12 | 1800 | 12.0000000000000000 | 150 |
| 13 | 1950 | 13.0000000000000000 | 150 |
| 14 | 2100 | 14.0000000000000000 | 150 |
| (10 rows) |
| |
| -- Test partitionwise grouping without any aggregates |
| EXPLAIN (COSTS OFF) |
| SELECT c FROM pagg_tab GROUP BY c ORDER BY 1; |
| QUERY PLAN |
| ------------------------------------------------------- |
| Merge Append |
| Sort Key: pagg_tab.c |
| -> GroupAggregate |
| Group Key: pagg_tab.c |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| Merge Key: pagg_tab.c |
| -> GroupAggregate |
| Group Key: pagg_tab.c |
| -> Sort |
| Sort Key: pagg_tab.c |
| -> Seq Scan on pagg_tab_p1 pagg_tab |
| -> GroupAggregate |
| Group Key: pagg_tab_1.c |
| -> Gather Motion 3:1 (slice2; segments: 3) |
| Merge Key: pagg_tab_1.c |
| -> GroupAggregate |
| Group Key: pagg_tab_1.c |
| -> Sort |
| Sort Key: pagg_tab_1.c |
| -> Seq Scan on pagg_tab_p2 pagg_tab_1 |
| -> GroupAggregate |
| Group Key: pagg_tab_2.c |
| -> Gather Motion 3:1 (slice3; segments: 3) |
| Merge Key: pagg_tab_2.c |
| -> GroupAggregate |
| Group Key: pagg_tab_2.c |
| -> Sort |
| Sort Key: pagg_tab_2.c |
| -> Seq Scan on pagg_tab_p3 pagg_tab_2 |
| Optimizer: Postgres query optimizer |
| (30 rows) |
| |
| SELECT c FROM pagg_tab GROUP BY c ORDER BY 1; |
| c |
| ------ |
| 0000 |
| 0001 |
| 0002 |
| 0003 |
| 0004 |
| 0005 |
| 0006 |
| 0007 |
| 0008 |
| 0009 |
| 0010 |
| 0011 |
| (12 rows) |
| |
| EXPLAIN (COSTS OFF) |
| SELECT a FROM pagg_tab WHERE a < 3 GROUP BY a ORDER BY 1; |
| QUERY PLAN |
| ------------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| Merge Key: pagg_tab.a |
| -> Finalize GroupAggregate |
| Group Key: pagg_tab.a |
| -> Merge Append |
| Sort Key: pagg_tab.a |
| -> Partial GroupAggregate |
| Group Key: pagg_tab.a |
| -> Sort |
| Sort Key: pagg_tab.a |
| -> Seq Scan on pagg_tab_p1 pagg_tab |
| Filter: (a < 3) |
| -> Partial GroupAggregate |
| Group Key: pagg_tab_1.a |
| -> Sort |
| Sort Key: pagg_tab_1.a |
| -> Seq Scan on pagg_tab_p2 pagg_tab_1 |
| Filter: (a < 3) |
| -> Partial GroupAggregate |
| Group Key: pagg_tab_2.a |
| -> Sort |
| Sort Key: pagg_tab_2.a |
| -> Seq Scan on pagg_tab_p3 pagg_tab_2 |
| Filter: (a < 3) |
| Optimizer: Postgres query optimizer |
| (25 rows) |
| |
| SELECT a FROM pagg_tab WHERE a < 3 GROUP BY a ORDER BY 1; |
| a |
| --- |
| 0 |
| 1 |
| 2 |
| (3 rows) |
| |
| RESET enable_hashagg; |
| -- ROLLUP, partitionwise aggregation does not apply |
| EXPLAIN (COSTS OFF) |
| SELECT c, sum(a) FROM pagg_tab GROUP BY rollup(c) ORDER BY 1, 2; |
| QUERY PLAN |
| ------------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| Merge Key: pagg_tab.c, (sum(pagg_tab.a)) |
| -> Sort |
| Sort Key: pagg_tab.c, (sum(pagg_tab.a)) |
| -> Finalize HashAggregate |
| Group Key: pagg_tab.c, (GROUPINGSET_ID()) |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: pagg_tab.c, (GROUPINGSET_ID()) |
| -> Partial MixedAggregate |
| Hash Key: pagg_tab.c |
| Group Key: () |
| -> Append |
| -> Seq Scan on pagg_tab_p1 pagg_tab_1 |
| -> Seq Scan on pagg_tab_p2 pagg_tab_2 |
| -> Seq Scan on pagg_tab_p3 pagg_tab_3 |
| Optimizer: Postgres query optimizer |
| (16 rows) |
| |
| -- ORDERED SET within the aggregate. |
| -- Full aggregation; since all the rows that belong to the same group come |
| -- from the same partition, having an ORDER BY within the aggregate doesn't |
| -- make any difference. |
| EXPLAIN (COSTS OFF) |
| SELECT c, sum(b order by a) FROM pagg_tab GROUP BY c ORDER BY 1, 2; |
| QUERY PLAN |
| --------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Merge Key: pagg_tab.c, (sum(pagg_tab.b ORDER BY pagg_tab.a)) |
| -> Sort |
| Sort Key: pagg_tab.c, (sum(pagg_tab.b ORDER BY pagg_tab.a)) |
| -> Append |
| -> GroupAggregate |
| Group Key: pagg_tab.c |
| -> Sort |
| Sort Key: pagg_tab.c, pagg_tab.a |
| -> Seq Scan on pagg_tab_p1 pagg_tab |
| -> GroupAggregate |
| Group Key: pagg_tab_1.c |
| -> Sort |
| Sort Key: pagg_tab_1.c, pagg_tab_1.a |
| -> Seq Scan on pagg_tab_p2 pagg_tab_1 |
| -> GroupAggregate |
| Group Key: pagg_tab_2.c |
| -> Sort |
| Sort Key: pagg_tab_2.c, pagg_tab_2.a |
| -> Seq Scan on pagg_tab_p3 pagg_tab_2 |
| Optimizer: Postgres query optimizer |
| (21 rows) |
| |
| -- Since GROUP BY clause does not match with PARTITION KEY; we need to do |
| -- partial aggregation. However, ORDERED SET are not partial safe and thus |
| -- partitionwise aggregation plan is not generated. |
| EXPLAIN (COSTS OFF) |
| SELECT a, sum(b order by a) FROM pagg_tab GROUP BY a ORDER BY 1, 2; |
| QUERY PLAN |
| ------------------------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| Merge Key: pagg_tab.a, (sum(pagg_tab.b ORDER BY pagg_tab.a)) |
| -> Sort |
| Sort Key: pagg_tab.a, (sum(pagg_tab.b ORDER BY pagg_tab.a)) |
| -> GroupAggregate |
| Group Key: pagg_tab.a |
| -> Sort |
| Sort Key: pagg_tab.a |
| -> Append |
| -> Seq Scan on pagg_tab_p1 pagg_tab_1 |
| -> Seq Scan on pagg_tab_p2 pagg_tab_2 |
| -> Seq Scan on pagg_tab_p3 pagg_tab_3 |
| Optimizer: Postgres query optimizer |
| (13 rows) |
| |
| -- JOIN query |
| CREATE TABLE pagg_tab1(x int, y int) PARTITION BY RANGE(x); |
| CREATE TABLE pagg_tab1_p1 PARTITION OF pagg_tab1 FOR VALUES FROM (0) TO (10); |
| CREATE TABLE pagg_tab1_p2 PARTITION OF pagg_tab1 FOR VALUES FROM (10) TO (20); |
| CREATE TABLE pagg_tab1_p3 PARTITION OF pagg_tab1 FOR VALUES FROM (20) TO (30); |
| CREATE TABLE pagg_tab2(x int, y int) PARTITION BY RANGE(y); |
| CREATE TABLE pagg_tab2_p1 PARTITION OF pagg_tab2 FOR VALUES FROM (0) TO (10); |
| CREATE TABLE pagg_tab2_p2 PARTITION OF pagg_tab2 FOR VALUES FROM (10) TO (20); |
| CREATE TABLE pagg_tab2_p3 PARTITION OF pagg_tab2 FOR VALUES FROM (20) TO (30); |
| INSERT INTO pagg_tab1 SELECT i % 30, i % 20 FROM generate_series(0, 299, 2) i; |
| INSERT INTO pagg_tab2 SELECT i % 20, i % 30 FROM generate_series(0, 299, 3) i; |
| ANALYZE pagg_tab1; |
| ANALYZE pagg_tab2; |
| -- When GROUP BY clause matches; full aggregation is performed for each partition. |
| EXPLAIN (COSTS OFF) |
| SELECT t1.x, sum(t1.y), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x ORDER BY 1, 2, 3; |
| QUERY PLAN |
| ------------------------------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| Merge Key: t1.x, (sum(t1.y)), (count(*)) |
| -> Sort |
| Sort Key: t1.x, (sum(t1.y)), (count(*)) |
| -> Append |
| -> HashAggregate |
| Group Key: t1.x |
| -> Hash Join |
| Hash Cond: (t2.y = t1.x) |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: t2.y |
| -> Seq Scan on pagg_tab2_p1 t2 |
| -> Hash |
| -> Seq Scan on pagg_tab1_p1 t1 |
| -> HashAggregate |
| Group Key: t1_1.x |
| -> Hash Join |
| Hash Cond: (t1_1.x = t2_1.y) |
| -> Seq Scan on pagg_tab1_p2 t1_1 |
| -> Hash |
| -> Redistribute Motion 3:3 (slice3; segments: 3) |
| Hash Key: t2_1.y |
| -> Seq Scan on pagg_tab2_p2 t2_1 |
| -> HashAggregate |
| Group Key: t1_2.x |
| -> Hash Join |
| Hash Cond: (t1_2.x = t2_2.y) |
| -> Seq Scan on pagg_tab1_p3 t1_2 |
| -> Hash |
| -> Redistribute Motion 3:3 (slice4; segments: 3) |
| Hash Key: t2_2.y |
| -> Seq Scan on pagg_tab2_p3 t2_2 |
| Optimizer: Postgres query optimizer |
| (33 rows) |
| |
| SELECT t1.x, sum(t1.y), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x ORDER BY 1, 2, 3; |
| x | sum | count |
| ----+------+------- |
| 0 | 500 | 100 |
| 6 | 1100 | 100 |
| 12 | 700 | 100 |
| 18 | 1300 | 100 |
| 24 | 900 | 100 |
| (5 rows) |
| |
| -- Check with whole-row reference; partitionwise aggregation does not apply |
| EXPLAIN (COSTS OFF) |
| SELECT t1.x, sum(t1.y), count(t1) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x ORDER BY 1, 2, 3; |
| QUERY PLAN |
| ------------------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| Merge Key: t1.x, (sum(t1.y)), (count(((t1.*)::pagg_tab1))) |
| -> Sort |
| Sort Key: t1.x, (sum(t1.y)), (count(((t1.*)::pagg_tab1))) |
| -> HashAggregate |
| Group Key: t1.x |
| -> Hash Join |
| Hash Cond: (t2.y = t1.x) |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: t2.y |
| -> Append |
| -> Seq Scan on pagg_tab2_p1 t2_1 |
| -> Seq Scan on pagg_tab2_p2 t2_2 |
| -> Seq Scan on pagg_tab2_p3 t2_3 |
| -> Hash |
| -> Append |
| -> Seq Scan on pagg_tab1_p1 t1_1 |
| -> Seq Scan on pagg_tab1_p2 t1_2 |
| -> Seq Scan on pagg_tab1_p3 t1_3 |
| Optimizer: Postgres query optimizer |
| (20 rows) |
| |
| SELECT t1.x, sum(t1.y), count(t1) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x ORDER BY 1, 2, 3; |
| x | sum | count |
| ----+------+------- |
| 0 | 500 | 100 |
| 6 | 1100 | 100 |
| 12 | 700 | 100 |
| 18 | 1300 | 100 |
| 24 | 900 | 100 |
| (5 rows) |
| |
| -- GROUP BY having other matching key |
| EXPLAIN (COSTS OFF) |
| SELECT t2.y, sum(t1.y), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t2.y ORDER BY 1, 2, 3; |
| QUERY PLAN |
| ------------------------------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| Merge Key: t2.y, (sum(t1.y)), (count(*)) |
| -> Sort |
| Sort Key: t2.y, (sum(t1.y)), (count(*)) |
| -> Append |
| -> HashAggregate |
| Group Key: t2.y |
| -> Hash Join |
| Hash Cond: (t2.y = t1.x) |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: t2.y |
| -> Seq Scan on pagg_tab2_p1 t2 |
| -> Hash |
| -> Seq Scan on pagg_tab1_p1 t1 |
| -> HashAggregate |
| Group Key: t2_1.y |
| -> Hash Join |
| Hash Cond: (t1_1.x = t2_1.y) |
| -> Seq Scan on pagg_tab1_p2 t1_1 |
| -> Hash |
| -> Redistribute Motion 3:3 (slice3; segments: 3) |
| Hash Key: t2_1.y |
| -> Seq Scan on pagg_tab2_p2 t2_1 |
| -> HashAggregate |
| Group Key: t2_2.y |
| -> Hash Join |
| Hash Cond: (t1_2.x = t2_2.y) |
| -> Seq Scan on pagg_tab1_p3 t1_2 |
| -> Hash |
| -> Redistribute Motion 3:3 (slice4; segments: 3) |
| Hash Key: t2_2.y |
| -> Seq Scan on pagg_tab2_p3 t2_2 |
| Optimizer: Postgres query optimizer |
| (33 rows) |
| |
| -- When GROUP BY clause does not match; partial aggregation is performed for each partition. |
| -- Also test GroupAggregate paths by disabling hash aggregates. |
| SET enable_hashagg TO false; |
| EXPLAIN (COSTS OFF) |
| SELECT t1.y, sum(t1.x), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.y HAVING avg(t1.x) > 10 ORDER BY 1, 2, 3; |
| QUERY PLAN |
| ------------------------------------------------------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| Merge Key: t1.y, (sum(t1.x)), (count(*)) |
| -> Sort |
| Sort Key: t1.y, (sum(t1.x)), (count(*)) |
| -> Finalize GroupAggregate |
| Group Key: t1.y |
| Filter: (avg(t1.x) > '10'::numeric) |
| -> Sort |
| Sort Key: t1.y |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: t1.y |
| -> Append |
| -> Partial GroupAggregate |
| Group Key: t1.y |
| -> Sort |
| Sort Key: t1.y |
| -> Hash Join |
| Hash Cond: (t2.y = t1.x) |
| -> Redistribute Motion 3:3 (slice3; segments: 3) |
| Hash Key: t2.y |
| -> Seq Scan on pagg_tab2_p1 t2 |
| -> Hash |
| -> Seq Scan on pagg_tab1_p1 t1 |
| -> Partial GroupAggregate |
| Group Key: t1_1.y |
| -> Sort |
| Sort Key: t1_1.y |
| -> Hash Join |
| Hash Cond: (t1_1.x = t2_1.y) |
| -> Seq Scan on pagg_tab1_p2 t1_1 |
| -> Hash |
| -> Redistribute Motion 3:3 (slice4; segments: 3) |
| Hash Key: t2_1.y |
| -> Seq Scan on pagg_tab2_p2 t2_1 |
| -> Partial GroupAggregate |
| Group Key: t1_2.y |
| -> Sort |
| Sort Key: t1_2.y |
| -> Hash Join |
| Hash Cond: (t1_2.x = t2_2.y) |
| -> Seq Scan on pagg_tab1_p3 t1_2 |
| -> Hash |
| -> Redistribute Motion 3:3 (slice5; segments: 3) |
| Hash Key: t2_2.y |
| -> Seq Scan on pagg_tab2_p3 t2_2 |
| Optimizer: Postgres query optimizer |
| (46 rows) |
| |
| SELECT t1.y, sum(t1.x), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.y HAVING avg(t1.x) > 10 ORDER BY 1, 2, 3; |
| y | sum | count |
| ----+------+------- |
| 2 | 600 | 50 |
| 4 | 1200 | 50 |
| 8 | 900 | 50 |
| 12 | 600 | 50 |
| 14 | 1200 | 50 |
| 18 | 900 | 50 |
| (6 rows) |
| |
| RESET enable_hashagg; |
| -- Check with LEFT/RIGHT/FULL OUTER JOINs which produces NULL values for |
| -- aggregation |
| -- LEFT JOIN, should produce partial partitionwise aggregation plan as |
| -- GROUP BY is on nullable column |
| EXPLAIN (COSTS OFF) |
| SELECT b.y, sum(a.y) FROM pagg_tab1 a LEFT JOIN pagg_tab2 b ON a.x = b.y GROUP BY b.y ORDER BY 1 NULLS LAST; |
| QUERY PLAN |
| ------------------------------------------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| Merge Key: b.y |
| -> Sort |
| Sort Key: b.y |
| -> Finalize HashAggregate |
| Group Key: b.y |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: b.y |
| -> Append |
| -> Partial HashAggregate |
| Group Key: b.y |
| -> Hash Right Join |
| Hash Cond: (b.y = a.x) |
| -> Redistribute Motion 3:3 (slice3; segments: 3) |
| Hash Key: b.y |
| -> Seq Scan on pagg_tab2_p1 b |
| -> Hash |
| -> Seq Scan on pagg_tab1_p1 a |
| -> Partial HashAggregate |
| Group Key: b_1.y |
| -> Hash Left Join |
| Hash Cond: (a_1.x = b_1.y) |
| -> Seq Scan on pagg_tab1_p2 a_1 |
| -> Hash |
| -> Redistribute Motion 3:3 (slice4; segments: 3) |
| Hash Key: b_1.y |
| -> Seq Scan on pagg_tab2_p2 b_1 |
| -> Partial HashAggregate |
| Group Key: b_2.y |
| -> Hash Left Join |
| Hash Cond: (a_2.x = b_2.y) |
| -> Seq Scan on pagg_tab1_p3 a_2 |
| -> Hash |
| -> Redistribute Motion 3:3 (slice5; segments: 3) |
| Hash Key: b_2.y |
| -> Seq Scan on pagg_tab2_p3 b_2 |
| Optimizer: Postgres query optimizer |
| (37 rows) |
| |
| SELECT b.y, sum(a.y) FROM pagg_tab1 a LEFT JOIN pagg_tab2 b ON a.x = b.y GROUP BY b.y ORDER BY 1 NULLS LAST; |
| y | sum |
| ----+------ |
| 0 | 500 |
| 6 | 1100 |
| 12 | 700 |
| 18 | 1300 |
| 24 | 900 |
| | 900 |
| (6 rows) |
| |
| -- RIGHT JOIN, should produce full partitionwise aggregation plan as |
| -- GROUP BY is on non-nullable column |
| EXPLAIN (COSTS OFF) |
| SELECT b.y, sum(a.y) FROM pagg_tab1 a RIGHT JOIN pagg_tab2 b ON a.x = b.y GROUP BY b.y ORDER BY 1 NULLS LAST; |
| QUERY PLAN |
| ------------------------------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| Merge Key: b.y |
| -> Sort |
| Sort Key: b.y |
| -> Append |
| -> HashAggregate |
| Group Key: b.y |
| -> Hash Left Join |
| Hash Cond: (b.y = a.x) |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: b.y |
| -> Seq Scan on pagg_tab2_p1 b |
| -> Hash |
| -> Seq Scan on pagg_tab1_p1 a |
| -> HashAggregate |
| Group Key: b_1.y |
| -> Hash Right Join |
| Hash Cond: (a_1.x = b_1.y) |
| -> Seq Scan on pagg_tab1_p2 a_1 |
| -> Hash |
| -> Redistribute Motion 3:3 (slice3; segments: 3) |
| Hash Key: b_1.y |
| -> Seq Scan on pagg_tab2_p2 b_1 |
| -> HashAggregate |
| Group Key: b_2.y |
| -> Hash Right Join |
| Hash Cond: (a_2.x = b_2.y) |
| -> Seq Scan on pagg_tab1_p3 a_2 |
| -> Hash |
| -> Redistribute Motion 3:3 (slice4; segments: 3) |
| Hash Key: b_2.y |
| -> Seq Scan on pagg_tab2_p3 b_2 |
| Optimizer: Postgres query optimizer |
| (33 rows) |
| |
| SELECT b.y, sum(a.y) FROM pagg_tab1 a RIGHT JOIN pagg_tab2 b ON a.x = b.y GROUP BY b.y ORDER BY 1 NULLS LAST; |
| y | sum |
| ----+------ |
| 0 | 500 |
| 3 | |
| 6 | 1100 |
| 9 | |
| 12 | 700 |
| 15 | |
| 18 | 1300 |
| 21 | |
| 24 | 900 |
| 27 | |
| (10 rows) |
| |
| -- FULL JOIN, should produce partial partitionwise aggregation plan as |
| -- GROUP BY is on nullable column |
| EXPLAIN (COSTS OFF) |
| SELECT a.x, sum(b.x) FROM pagg_tab1 a FULL OUTER JOIN pagg_tab2 b ON a.x = b.y GROUP BY a.x ORDER BY 1 NULLS LAST; |
| QUERY PLAN |
| ------------------------------------------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| Merge Key: a.x |
| -> Sort |
| Sort Key: a.x |
| -> Finalize HashAggregate |
| Group Key: a.x |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: a.x |
| -> Append |
| -> Partial HashAggregate |
| Group Key: a.x |
| -> Hash Full Join |
| Hash Cond: (b.y = a.x) |
| -> Redistribute Motion 3:3 (slice3; segments: 3) |
| Hash Key: b.y |
| -> Seq Scan on pagg_tab2_p1 b |
| -> Hash |
| -> Seq Scan on pagg_tab1_p1 a |
| -> Partial HashAggregate |
| Group Key: a_1.x |
| -> Hash Full Join |
| Hash Cond: (a_1.x = b_1.y) |
| -> Seq Scan on pagg_tab1_p2 a_1 |
| -> Hash |
| -> Redistribute Motion 3:3 (slice4; segments: 3) |
| Hash Key: b_1.y |
| -> Seq Scan on pagg_tab2_p2 b_1 |
| -> Partial HashAggregate |
| Group Key: a_2.x |
| -> Hash Full Join |
| Hash Cond: (a_2.x = b_2.y) |
| -> Seq Scan on pagg_tab1_p3 a_2 |
| -> Hash |
| -> Redistribute Motion 3:3 (slice5; segments: 3) |
| Hash Key: b_2.y |
| -> Seq Scan on pagg_tab2_p3 b_2 |
| Optimizer: Postgres query optimizer |
| (37 rows) |
| |
| SELECT a.x, sum(b.x) FROM pagg_tab1 a FULL OUTER JOIN pagg_tab2 b ON a.x = b.y GROUP BY a.x ORDER BY 1 NULLS LAST; |
| x | sum |
| ----+------ |
| 0 | 500 |
| 2 | |
| 4 | |
| 6 | 1100 |
| 8 | |
| 10 | |
| 12 | 700 |
| 14 | |
| 16 | |
| 18 | 1300 |
| 20 | |
| 22 | |
| 24 | 900 |
| 26 | |
| 28 | |
| | 500 |
| (16 rows) |
| |
| -- LEFT JOIN, with dummy relation on right side, ideally |
| -- should produce full partitionwise aggregation plan as GROUP BY is on |
| -- non-nullable columns. |
| -- But right now we are unable to do partitionwise join in this case. |
| EXPLAIN (COSTS OFF) |
| SELECT a.x, b.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x < 20) a LEFT JOIN (SELECT * FROM pagg_tab2 WHERE y > 10) b ON a.x = b.y WHERE a.x > 5 or b.y < 20 GROUP BY a.x, b.y ORDER BY 1, 2; |
| QUERY PLAN |
| ------------------------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| Merge Key: pagg_tab1.x, pagg_tab2.y |
| -> Sort |
| Sort Key: pagg_tab1.x, pagg_tab2.y |
| -> HashAggregate |
| Group Key: pagg_tab1.x, pagg_tab2.y |
| -> Hash Left Join |
| Hash Cond: (pagg_tab1.x = pagg_tab2.y) |
| Filter: ((pagg_tab1.x > 5) OR (pagg_tab2.y < 20)) |
| -> Append |
| -> Seq Scan on pagg_tab1_p1 pagg_tab1_1 |
| Filter: (x < 20) |
| -> Seq Scan on pagg_tab1_p2 pagg_tab1_2 |
| Filter: (x < 20) |
| -> Hash |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: pagg_tab2.y |
| -> Append |
| -> Seq Scan on pagg_tab2_p2 pagg_tab2_1 |
| Filter: (y > 10) |
| -> Seq Scan on pagg_tab2_p3 pagg_tab2_2 |
| Filter: (y > 10) |
| Optimizer: Postgres query optimizer |
| (23 rows) |
| |
| SELECT a.x, b.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x < 20) a LEFT JOIN (SELECT * FROM pagg_tab2 WHERE y > 10) b ON a.x = b.y WHERE a.x > 5 or b.y < 20 GROUP BY a.x, b.y ORDER BY 1, 2; |
| x | y | count |
| ----+----+------- |
| 6 | | 10 |
| 8 | | 10 |
| 10 | | 10 |
| 12 | 12 | 100 |
| 14 | | 10 |
| 16 | | 10 |
| 18 | 18 | 100 |
| (7 rows) |
| |
| -- FULL JOIN, with dummy relations on both sides, ideally |
| -- should produce partial partitionwise aggregation plan as GROUP BY is on |
| -- nullable columns. |
| -- But right now we are unable to do partitionwise join in this case. |
| EXPLAIN (COSTS OFF) |
| SELECT a.x, b.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x < 20) a FULL JOIN (SELECT * FROM pagg_tab2 WHERE y > 10) b ON a.x = b.y WHERE a.x > 5 or b.y < 20 GROUP BY a.x, b.y ORDER BY 1, 2; |
| QUERY PLAN |
| ------------------------------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| Merge Key: pagg_tab1.x, pagg_tab2.y |
| -> Sort |
| Sort Key: pagg_tab1.x, pagg_tab2.y |
| -> HashAggregate |
| Group Key: pagg_tab1.x, pagg_tab2.y |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: pagg_tab1.x, pagg_tab2.y |
| -> Hash Full Join |
| Hash Cond: (pagg_tab1.x = pagg_tab2.y) |
| Filter: ((pagg_tab1.x > 5) OR (pagg_tab2.y < 20)) |
| -> Append |
| -> Seq Scan on pagg_tab1_p1 pagg_tab1_1 |
| Filter: (x < 20) |
| -> Seq Scan on pagg_tab1_p2 pagg_tab1_2 |
| Filter: (x < 20) |
| -> Hash |
| -> Redistribute Motion 3:3 (slice3; segments: 3) |
| Hash Key: pagg_tab2.y |
| -> Append |
| -> Seq Scan on pagg_tab2_p2 pagg_tab2_1 |
| Filter: (y > 10) |
| -> Seq Scan on pagg_tab2_p3 pagg_tab2_2 |
| Filter: (y > 10) |
| Optimizer: Postgres query optimizer |
| (25 rows) |
| |
| SELECT a.x, b.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x < 20) a FULL JOIN (SELECT * FROM pagg_tab2 WHERE y > 10) b ON a.x = b.y WHERE a.x > 5 or b.y < 20 GROUP BY a.x, b.y ORDER BY 1, 2; |
| x | y | count |
| ----+----+------- |
| 6 | | 10 |
| 8 | | 10 |
| 10 | | 10 |
| 12 | 12 | 100 |
| 14 | | 10 |
| 16 | | 10 |
| 18 | 18 | 100 |
| | 15 | 10 |
| (8 rows) |
| |
| -- Empty join relation because of empty outer side, no partitionwise agg plan |
| EXPLAIN (COSTS OFF) |
| SELECT a.x, a.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x = 1 AND x = 2) a LEFT JOIN pagg_tab2 b ON a.x = b.y GROUP BY a.x, a.y ORDER BY 1, 2; |
| QUERY PLAN |
| -------------------------------------- |
| GroupAggregate |
| Group Key: pagg_tab1.y |
| -> Sort |
| Sort Key: pagg_tab1.y |
| -> Result |
| One-Time Filter: false |
| (6 rows) |
| |
| SELECT a.x, a.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x = 1 AND x = 2) a LEFT JOIN pagg_tab2 b ON a.x = b.y GROUP BY a.x, a.y ORDER BY 1, 2; |
| x | y | count |
| ---+---+------- |
| (0 rows) |
| |
| -- Partition by multiple columns |
| CREATE TABLE pagg_tab_m (a int, b int, c int) PARTITION BY RANGE(a, ((a+b)/2)); |
| CREATE TABLE pagg_tab_m_p1 PARTITION OF pagg_tab_m FOR VALUES FROM (0, 0) TO (12, 12); |
| CREATE TABLE pagg_tab_m_p2 PARTITION OF pagg_tab_m FOR VALUES FROM (12, 12) TO (22, 22); |
| CREATE TABLE pagg_tab_m_p3 PARTITION OF pagg_tab_m FOR VALUES FROM (22, 22) TO (30, 30); |
| INSERT INTO pagg_tab_m SELECT i % 30, i % 40, i % 50 FROM generate_series(0, 2999) i; |
| ANALYZE pagg_tab_m; |
| -- Partial aggregation as GROUP BY clause does not match with PARTITION KEY |
| EXPLAIN (COSTS OFF) |
| SELECT a, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY a HAVING avg(c) < 22 ORDER BY 1, 2, 3; |
| QUERY PLAN |
| ----------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Merge Key: pagg_tab_m.a, (sum(pagg_tab_m.b)), (avg(pagg_tab_m.c)) |
| -> Sort |
| Sort Key: pagg_tab_m.a, (sum(pagg_tab_m.b)), (avg(pagg_tab_m.c)) |
| -> Finalize HashAggregate |
| Group Key: pagg_tab_m.a |
| Filter: (avg(pagg_tab_m.c) < '22'::numeric) |
| -> Append |
| -> Partial HashAggregate |
| Group Key: pagg_tab_m.a |
| -> Seq Scan on pagg_tab_m_p1 pagg_tab_m |
| -> Partial HashAggregate |
| Group Key: pagg_tab_m_1.a |
| -> Seq Scan on pagg_tab_m_p2 pagg_tab_m_1 |
| -> Partial HashAggregate |
| Group Key: pagg_tab_m_2.a |
| -> Seq Scan on pagg_tab_m_p3 pagg_tab_m_2 |
| Optimizer: Postgres query optimizer |
| (18 rows) |
| |
| SELECT a, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY a HAVING avg(c) < 22 ORDER BY 1, 2, 3; |
| a | sum | avg | count |
| ----+------+---------------------+------- |
| 0 | 1500 | 20.0000000000000000 | 100 |
| 1 | 1600 | 21.0000000000000000 | 100 |
| 10 | 1500 | 20.0000000000000000 | 100 |
| 11 | 1600 | 21.0000000000000000 | 100 |
| 20 | 1500 | 20.0000000000000000 | 100 |
| 21 | 1600 | 21.0000000000000000 | 100 |
| (6 rows) |
| |
| -- Full aggregation as GROUP BY clause matches with PARTITION KEY |
| EXPLAIN (COSTS OFF) |
| SELECT a, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY a, (a+b)/2 HAVING sum(b) < 50 ORDER BY 1, 2, 3; |
| QUERY PLAN |
| ------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Merge Key: pagg_tab_m.a, (sum(pagg_tab_m.b)), (avg(pagg_tab_m.c)) |
| -> Sort |
| Sort Key: pagg_tab_m.a, (sum(pagg_tab_m.b)), (avg(pagg_tab_m.c)) |
| -> Append |
| -> HashAggregate |
| Group Key: pagg_tab_m.a, ((pagg_tab_m.a + pagg_tab_m.b) / 2) |
| Filter: (sum(pagg_tab_m.b) < 50) |
| -> Seq Scan on pagg_tab_m_p1 pagg_tab_m |
| -> HashAggregate |
| Group Key: pagg_tab_m_1.a, ((pagg_tab_m_1.a + pagg_tab_m_1.b) / 2) |
| Filter: (sum(pagg_tab_m_1.b) < 50) |
| -> Seq Scan on pagg_tab_m_p2 pagg_tab_m_1 |
| -> HashAggregate |
| Group Key: pagg_tab_m_2.a, ((pagg_tab_m_2.a + pagg_tab_m_2.b) / 2) |
| Filter: (sum(pagg_tab_m_2.b) < 50) |
| -> Seq Scan on pagg_tab_m_p3 pagg_tab_m_2 |
| Optimizer: Postgres query optimizer |
| (18 rows) |
| |
| SELECT a, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY a, (a+b)/2 HAVING sum(b) < 50 ORDER BY 1, 2, 3; |
| a | sum | avg | count |
| ----+-----+---------------------+------- |
| 0 | 0 | 20.0000000000000000 | 25 |
| 1 | 25 | 21.0000000000000000 | 25 |
| 10 | 0 | 20.0000000000000000 | 25 |
| 11 | 25 | 21.0000000000000000 | 25 |
| 20 | 0 | 20.0000000000000000 | 25 |
| 21 | 25 | 21.0000000000000000 | 25 |
| (6 rows) |
| |
| -- Full aggregation as PARTITION KEY is part of GROUP BY clause |
| EXPLAIN (COSTS OFF) |
| SELECT a, c, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY (a+b)/2, 2, 1 HAVING sum(b) = 50 AND avg(c) > 25 ORDER BY 1, 2, 3; |
| QUERY PLAN |
| ------------------------------------------------------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| Merge Key: pagg_tab_m.a, pagg_tab_m.c, (sum(pagg_tab_m.b)) |
| -> Sort |
| Sort Key: pagg_tab_m.a, pagg_tab_m.c, (sum(pagg_tab_m.b)) |
| -> Append |
| -> HashAggregate |
| Group Key: ((pagg_tab_m.a + pagg_tab_m.b) / 2), pagg_tab_m.c, pagg_tab_m.a |
| Filter: ((sum(pagg_tab_m.b) = 50) AND (avg(pagg_tab_m.c) > '25'::numeric)) |
| -> Seq Scan on pagg_tab_m_p1 pagg_tab_m |
| -> HashAggregate |
| Group Key: ((pagg_tab_m_1.a + pagg_tab_m_1.b) / 2), pagg_tab_m_1.c, pagg_tab_m_1.a |
| Filter: ((sum(pagg_tab_m_1.b) = 50) AND (avg(pagg_tab_m_1.c) > '25'::numeric)) |
| -> Seq Scan on pagg_tab_m_p2 pagg_tab_m_1 |
| -> HashAggregate |
| Group Key: ((pagg_tab_m_2.a + pagg_tab_m_2.b) / 2), pagg_tab_m_2.c, pagg_tab_m_2.a |
| Filter: ((sum(pagg_tab_m_2.b) = 50) AND (avg(pagg_tab_m_2.c) > '25'::numeric)) |
| -> Seq Scan on pagg_tab_m_p3 pagg_tab_m_2 |
| Optimizer: Postgres query optimizer |
| (18 rows) |
| |
| SELECT a, c, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY (a+b)/2, 2, 1 HAVING sum(b) = 50 AND avg(c) > 25 ORDER BY 1, 2, 3; |
| a | c | sum | avg | count |
| ----+----+-----+---------------------+------- |
| 0 | 30 | 50 | 30.0000000000000000 | 5 |
| 0 | 40 | 50 | 40.0000000000000000 | 5 |
| 10 | 30 | 50 | 30.0000000000000000 | 5 |
| 10 | 40 | 50 | 40.0000000000000000 | 5 |
| 20 | 30 | 50 | 30.0000000000000000 | 5 |
| 20 | 40 | 50 | 40.0000000000000000 | 5 |
| (6 rows) |
| |
| -- Test with multi-level partitioning scheme |
| CREATE TABLE pagg_tab_ml (a int, b int, c text) PARTITION BY RANGE(a); |
| CREATE TABLE pagg_tab_ml_p1 PARTITION OF pagg_tab_ml FOR VALUES FROM (0) TO (12); |
| CREATE TABLE pagg_tab_ml_p2 PARTITION OF pagg_tab_ml FOR VALUES FROM (12) TO (20) PARTITION BY LIST (c); |
| CREATE TABLE pagg_tab_ml_p2_s1 PARTITION OF pagg_tab_ml_p2 FOR VALUES IN ('0000', '0001', '0002'); |
| CREATE TABLE pagg_tab_ml_p2_s2 PARTITION OF pagg_tab_ml_p2 FOR VALUES IN ('0003'); |
| -- This level of partitioning has different column positions than the parent |
| CREATE TABLE pagg_tab_ml_p3(b int, c text, a int) PARTITION BY RANGE (b); |
| ALTER TABLE pagg_tab_ml_p3 SET DISTRIBUTED BY (a); |
| CREATE TABLE pagg_tab_ml_p3_s1(c text, a int, b int); |
| ALTER TABLE pagg_tab_ml_p3_s1 SET DISTRIBUTED BY (a); |
| CREATE TABLE pagg_tab_ml_p3_s2 PARTITION OF pagg_tab_ml_p3 FOR VALUES FROM (7) TO (10); |
| ALTER TABLE pagg_tab_ml_p3 ATTACH PARTITION pagg_tab_ml_p3_s1 FOR VALUES FROM (0) TO (7); |
| ALTER TABLE pagg_tab_ml ATTACH PARTITION pagg_tab_ml_p3 FOR VALUES FROM (20) TO (30); |
| INSERT INTO pagg_tab_ml SELECT i % 30, i % 10, to_char(i % 4, 'FM0000') FROM generate_series(0, 29999) i; |
| ANALYZE pagg_tab_ml; |
| -- For Parallel Append |
| SET max_parallel_workers_per_gather TO 2; |
| SET parallel_setup_cost = 0; |
| -- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY |
| -- for level 1 only. For subpartitions, GROUP BY clause does not match with |
| -- PARTITION KEY, but still we do not see a partial aggregation as array_agg() |
| -- is not partial agg safe. |
| EXPLAIN (COSTS OFF) |
| SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3; |
| QUERY PLAN |
| -------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Merge Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c)) |
| -> Sort |
| Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c)) |
| -> Append |
| -> GroupAggregate |
| Group Key: pagg_tab_ml.a |
| Filter: (avg(pagg_tab_ml.b) < '3'::numeric) |
| -> Sort |
| Sort Key: pagg_tab_ml.a, pagg_tab_ml.c |
| -> Seq Scan on pagg_tab_ml_p1 pagg_tab_ml |
| -> GroupAggregate |
| Group Key: pagg_tab_ml_2.a |
| Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric) |
| -> Sort |
| Sort Key: pagg_tab_ml_2.a, pagg_tab_ml_2.c |
| -> Append |
| -> Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2 |
| -> Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3 |
| -> GroupAggregate |
| Group Key: pagg_tab_ml_5.a |
| Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric) |
| -> Sort |
| Sort Key: pagg_tab_ml_5.a, pagg_tab_ml_5.c |
| -> Append |
| -> Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5 |
| -> Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6 |
| Optimizer: Postgres query optimizer |
| (28 rows) |
| |
| SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3; |
| a | sum | array_agg | count |
| ----+------+-------------+------- |
| 0 | 0 | {0000,0002} | 1000 |
| 1 | 1000 | {0001,0003} | 1000 |
| 2 | 2000 | {0000,0002} | 1000 |
| 10 | 0 | {0000,0002} | 1000 |
| 11 | 1000 | {0001,0003} | 1000 |
| 12 | 2000 | {0000,0002} | 1000 |
| 20 | 0 | {0000,0002} | 1000 |
| 21 | 1000 | {0001,0003} | 1000 |
| 22 | 2000 | {0000,0002} | 1000 |
| (9 rows) |
| |
| -- Without ORDER BY clause, to test Gather at top-most path |
| EXPLAIN (COSTS OFF) |
| SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3; |
| QUERY PLAN |
| --------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| -> Append |
| -> GroupAggregate |
| Group Key: pagg_tab_ml.a |
| Filter: (avg(pagg_tab_ml.b) < '3'::numeric) |
| -> Sort |
| Sort Key: pagg_tab_ml.a, pagg_tab_ml.c |
| -> Seq Scan on pagg_tab_ml_p1 pagg_tab_ml |
| -> GroupAggregate |
| Group Key: pagg_tab_ml_2.a |
| Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric) |
| -> Sort |
| Sort Key: pagg_tab_ml_2.a, pagg_tab_ml_2.c |
| -> Append |
| -> Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2 |
| -> Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3 |
| -> GroupAggregate |
| Group Key: pagg_tab_ml_5.a |
| Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric) |
| -> Sort |
| Sort Key: pagg_tab_ml_5.a, pagg_tab_ml_5.c |
| -> Append |
| -> Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5 |
| -> Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6 |
| Optimizer: Postgres query optimizer |
| (25 rows) |
| |
| RESET parallel_setup_cost; |
| -- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY |
| -- for level 1 only. For subpartitions, GROUP BY clause does not match with |
| -- PARTITION KEY, thus we will have a partial aggregation for them. |
| EXPLAIN (COSTS OFF) |
| SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3; |
| QUERY PLAN |
| --------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Merge Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (count(*)) |
| -> Sort |
| Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (count(*)) |
| -> Append |
| -> HashAggregate |
| Group Key: pagg_tab_ml.a |
| Filter: (avg(pagg_tab_ml.b) < '3'::numeric) |
| -> Seq Scan on pagg_tab_ml_p1 pagg_tab_ml |
| -> Finalize GroupAggregate |
| Group Key: pagg_tab_ml_2.a |
| Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric) |
| -> Sort |
| Sort Key: pagg_tab_ml_2.a |
| -> Append |
| -> Partial HashAggregate |
| Group Key: pagg_tab_ml_2.a |
| -> Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2 |
| -> Partial HashAggregate |
| Group Key: pagg_tab_ml_3.a |
| -> Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3 |
| -> Finalize GroupAggregate |
| Group Key: pagg_tab_ml_5.a |
| Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric) |
| -> Sort |
| Sort Key: pagg_tab_ml_5.a |
| -> Append |
| -> Partial HashAggregate |
| Group Key: pagg_tab_ml_5.a |
| -> Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5 |
| -> Partial HashAggregate |
| Group Key: pagg_tab_ml_6.a |
| -> Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6 |
| Optimizer: Postgres query optimizer |
| (34 rows) |
| |
| SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3; |
| a | sum | count |
| ----+------+------- |
| 0 | 0 | 1000 |
| 1 | 1000 | 1000 |
| 2 | 2000 | 1000 |
| 10 | 0 | 1000 |
| 11 | 1000 | 1000 |
| 12 | 2000 | 1000 |
| 20 | 0 | 1000 |
| 21 | 1000 | 1000 |
| 22 | 2000 | 1000 |
| (9 rows) |
| |
| -- Partial aggregation at all levels as GROUP BY clause does not match with |
| -- PARTITION KEY |
| EXPLAIN (COSTS OFF) |
| SELECT b, sum(a), count(*) FROM pagg_tab_ml GROUP BY b ORDER BY 1, 2, 3; |
| QUERY PLAN |
| --------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Merge Key: pagg_tab_ml.b, (sum(pagg_tab_ml.a)), (count(*)) |
| -> Sort |
| Sort Key: pagg_tab_ml.b, (sum(pagg_tab_ml.a)), (count(*)) |
| -> Finalize GroupAggregate |
| Group Key: pagg_tab_ml.b |
| -> Sort |
| Sort Key: pagg_tab_ml.b |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: pagg_tab_ml.b |
| -> Append |
| -> Partial HashAggregate |
| Group Key: pagg_tab_ml.b |
| -> Seq Scan on pagg_tab_ml_p1 pagg_tab_ml |
| -> Partial HashAggregate |
| Group Key: pagg_tab_ml_1.b |
| -> Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_1 |
| -> Partial HashAggregate |
| Group Key: pagg_tab_ml_2.b |
| -> Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_2 |
| -> Partial HashAggregate |
| Group Key: pagg_tab_ml_3.b |
| -> Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_3 |
| -> Partial HashAggregate |
| Group Key: pagg_tab_ml_4.b |
| -> Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_4 |
| Optimizer: Postgres query optimizer |
| (27 rows) |
| |
| SELECT b, sum(a), count(*) FROM pagg_tab_ml GROUP BY b HAVING avg(a) < 15 ORDER BY 1, 2, 3; |
| b | sum | count |
| ---+-------+------- |
| 0 | 30000 | 3000 |
| 1 | 33000 | 3000 |
| 2 | 36000 | 3000 |
| 3 | 39000 | 3000 |
| 4 | 42000 | 3000 |
| (5 rows) |
| |
| -- Full aggregation at all levels as GROUP BY clause matches with PARTITION KEY |
| EXPLAIN (COSTS OFF) |
| SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a, b, c HAVING avg(b) > 7 ORDER BY 1, 2, 3; |
| QUERY PLAN |
| ---------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Merge Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (count(*)) |
| -> Sort |
| Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (count(*)) |
| -> Append |
| -> HashAggregate |
| Group Key: pagg_tab_ml.a, pagg_tab_ml.b, pagg_tab_ml.c |
| Filter: (avg(pagg_tab_ml.b) > '7'::numeric) |
| -> Seq Scan on pagg_tab_ml_p1 pagg_tab_ml |
| -> HashAggregate |
| Group Key: pagg_tab_ml_1.a, pagg_tab_ml_1.b, pagg_tab_ml_1.c |
| Filter: (avg(pagg_tab_ml_1.b) > '7'::numeric) |
| -> Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_1 |
| -> HashAggregate |
| Group Key: pagg_tab_ml_2.a, pagg_tab_ml_2.b, pagg_tab_ml_2.c |
| Filter: (avg(pagg_tab_ml_2.b) > '7'::numeric) |
| -> Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_2 |
| -> HashAggregate |
| Group Key: pagg_tab_ml_3.a, pagg_tab_ml_3.b, pagg_tab_ml_3.c |
| Filter: (avg(pagg_tab_ml_3.b) > '7'::numeric) |
| -> Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_3 |
| -> HashAggregate |
| Group Key: pagg_tab_ml_4.a, pagg_tab_ml_4.b, pagg_tab_ml_4.c |
| Filter: (avg(pagg_tab_ml_4.b) > '7'::numeric) |
| -> Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_4 |
| Optimizer: Postgres query optimizer |
| (26 rows) |
| |
| SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a, b, c HAVING avg(b) > 7 ORDER BY 1, 2, 3; |
| a | sum | count |
| ----+------+------- |
| 8 | 4000 | 500 |
| 8 | 4000 | 500 |
| 9 | 4500 | 500 |
| 9 | 4500 | 500 |
| 18 | 4000 | 500 |
| 18 | 4000 | 500 |
| 19 | 4500 | 500 |
| 19 | 4500 | 500 |
| 28 | 4000 | 500 |
| 28 | 4000 | 500 |
| 29 | 4500 | 500 |
| 29 | 4500 | 500 |
| (12 rows) |
| |
| -- Parallelism within partitionwise aggregates |
| SET min_parallel_table_scan_size TO '8kB'; |
| SET parallel_setup_cost TO 0; |
| -- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY |
| -- for level 1 only. For subpartitions, GROUP BY clause does not match with |
| -- PARTITION KEY, thus we will have a partial aggregation for them. |
| EXPLAIN (COSTS OFF) |
| SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3; |
| QUERY PLAN |
| --------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Merge Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (count(*)) |
| -> Sort |
| Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (count(*)) |
| -> Append |
| -> HashAggregate |
| Group Key: pagg_tab_ml.a |
| Filter: (avg(pagg_tab_ml.b) < '3'::numeric) |
| -> Seq Scan on pagg_tab_ml_p1 pagg_tab_ml |
| -> Finalize GroupAggregate |
| Group Key: pagg_tab_ml_2.a |
| Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric) |
| -> Sort |
| Sort Key: pagg_tab_ml_2.a |
| -> Append |
| -> Partial HashAggregate |
| Group Key: pagg_tab_ml_2.a |
| -> Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2 |
| -> Partial HashAggregate |
| Group Key: pagg_tab_ml_3.a |
| -> Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3 |
| -> Finalize GroupAggregate |
| Group Key: pagg_tab_ml_5.a |
| Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric) |
| -> Sort |
| Sort Key: pagg_tab_ml_5.a |
| -> Append |
| -> Partial HashAggregate |
| Group Key: pagg_tab_ml_5.a |
| -> Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5 |
| -> Partial HashAggregate |
| Group Key: pagg_tab_ml_6.a |
| -> Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6 |
| Optimizer: Postgres query optimizer |
| (34 rows) |
| |
| SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3; |
| a | sum | count |
| ----+------+------- |
| 0 | 0 | 1000 |
| 1 | 1000 | 1000 |
| 2 | 2000 | 1000 |
| 10 | 0 | 1000 |
| 11 | 1000 | 1000 |
| 12 | 2000 | 1000 |
| 20 | 0 | 1000 |
| 21 | 1000 | 1000 |
| 22 | 2000 | 1000 |
| (9 rows) |
| |
| -- Partial aggregation at all levels as GROUP BY clause does not match with |
| -- PARTITION KEY |
| EXPLAIN (COSTS OFF) |
| SELECT b, sum(a), count(*) FROM pagg_tab_ml GROUP BY b ORDER BY 1, 2, 3; |
| QUERY PLAN |
| --------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Merge Key: pagg_tab_ml.b, (sum(pagg_tab_ml.a)), (count(*)) |
| -> Sort |
| Sort Key: pagg_tab_ml.b, (sum(pagg_tab_ml.a)), (count(*)) |
| -> Finalize GroupAggregate |
| Group Key: pagg_tab_ml.b |
| -> Sort |
| Sort Key: pagg_tab_ml.b |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: pagg_tab_ml.b |
| -> Append |
| -> Partial HashAggregate |
| Group Key: pagg_tab_ml.b |
| -> Seq Scan on pagg_tab_ml_p1 pagg_tab_ml |
| -> Partial HashAggregate |
| Group Key: pagg_tab_ml_1.b |
| -> Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_1 |
| -> Partial HashAggregate |
| Group Key: pagg_tab_ml_2.b |
| -> Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_2 |
| -> Partial HashAggregate |
| Group Key: pagg_tab_ml_3.b |
| -> Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_3 |
| -> Partial HashAggregate |
| Group Key: pagg_tab_ml_4.b |
| -> Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_4 |
| Optimizer: Postgres query optimizer |
| (27 rows) |
| |
| SELECT b, sum(a), count(*) FROM pagg_tab_ml GROUP BY b HAVING avg(a) < 15 ORDER BY 1, 2, 3; |
| b | sum | count |
| ---+-------+------- |
| 0 | 30000 | 3000 |
| 1 | 33000 | 3000 |
| 2 | 36000 | 3000 |
| 3 | 39000 | 3000 |
| 4 | 42000 | 3000 |
| (5 rows) |
| |
| -- Full aggregation at all levels as GROUP BY clause matches with PARTITION KEY |
| EXPLAIN (COSTS OFF) |
| SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a, b, c HAVING avg(b) > 7 ORDER BY 1, 2, 3; |
| QUERY PLAN |
| ---------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Merge Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (count(*)) |
| -> Sort |
| Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (count(*)) |
| -> Append |
| -> HashAggregate |
| Group Key: pagg_tab_ml.a, pagg_tab_ml.b, pagg_tab_ml.c |
| Filter: (avg(pagg_tab_ml.b) > '7'::numeric) |
| -> Seq Scan on pagg_tab_ml_p1 pagg_tab_ml |
| -> HashAggregate |
| Group Key: pagg_tab_ml_1.a, pagg_tab_ml_1.b, pagg_tab_ml_1.c |
| Filter: (avg(pagg_tab_ml_1.b) > '7'::numeric) |
| -> Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_1 |
| -> HashAggregate |
| Group Key: pagg_tab_ml_2.a, pagg_tab_ml_2.b, pagg_tab_ml_2.c |
| Filter: (avg(pagg_tab_ml_2.b) > '7'::numeric) |
| -> Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_2 |
| -> HashAggregate |
| Group Key: pagg_tab_ml_3.a, pagg_tab_ml_3.b, pagg_tab_ml_3.c |
| Filter: (avg(pagg_tab_ml_3.b) > '7'::numeric) |
| -> Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_3 |
| -> HashAggregate |
| Group Key: pagg_tab_ml_4.a, pagg_tab_ml_4.b, pagg_tab_ml_4.c |
| Filter: (avg(pagg_tab_ml_4.b) > '7'::numeric) |
| -> Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_4 |
| Optimizer: Postgres query optimizer |
| (26 rows) |
| |
| SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a, b, c HAVING avg(b) > 7 ORDER BY 1, 2, 3; |
| a | sum | count |
| ----+------+------- |
| 8 | 4000 | 500 |
| 8 | 4000 | 500 |
| 9 | 4500 | 500 |
| 9 | 4500 | 500 |
| 18 | 4000 | 500 |
| 18 | 4000 | 500 |
| 19 | 4500 | 500 |
| 19 | 4500 | 500 |
| 28 | 4000 | 500 |
| 28 | 4000 | 500 |
| 29 | 4500 | 500 |
| 29 | 4500 | 500 |
| (12 rows) |
| |
| -- Parallelism within partitionwise aggregates (single level) |
| -- Add few parallel setup cost, so that we will see a plan which gathers |
| -- partially created paths even for full aggregation and sticks a single Gather |
| -- followed by finalization step. |
| -- Without this, the cost of doing partial aggregation + Gather + finalization |
| -- for each partition and then Append over it turns out to be same and this |
| -- wins as we add it first. This parallel_setup_cost plays a vital role in |
| -- costing such plans. |
| SET parallel_setup_cost TO 10; |
| CREATE TABLE pagg_tab_para(x int, y int) PARTITION BY RANGE(x); |
| CREATE TABLE pagg_tab_para_p1 PARTITION OF pagg_tab_para FOR VALUES FROM (0) TO (12); |
| CREATE TABLE pagg_tab_para_p2 PARTITION OF pagg_tab_para FOR VALUES FROM (12) TO (22); |
| CREATE TABLE pagg_tab_para_p3 PARTITION OF pagg_tab_para FOR VALUES FROM (22) TO (30); |
| INSERT INTO pagg_tab_para SELECT i % 30, i % 20 FROM generate_series(0, 29999) i; |
| ANALYZE pagg_tab_para; |
| -- When GROUP BY clause matches; full aggregation is performed for each partition. |
| EXPLAIN (COSTS OFF) |
| SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; |
| QUERY PLAN |
| ----------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Merge Key: pagg_tab_para.x, (sum(pagg_tab_para.y)), (avg(pagg_tab_para.y)) |
| -> Sort |
| Sort Key: pagg_tab_para.x, (sum(pagg_tab_para.y)), (avg(pagg_tab_para.y)) |
| -> Append |
| -> HashAggregate |
| Group Key: pagg_tab_para.x |
| Filter: (avg(pagg_tab_para.y) < '7'::numeric) |
| -> Seq Scan on pagg_tab_para_p1 pagg_tab_para |
| -> HashAggregate |
| Group Key: pagg_tab_para_1.x |
| Filter: (avg(pagg_tab_para_1.y) < '7'::numeric) |
| -> Seq Scan on pagg_tab_para_p2 pagg_tab_para_1 |
| -> HashAggregate |
| Group Key: pagg_tab_para_2.x |
| Filter: (avg(pagg_tab_para_2.y) < '7'::numeric) |
| -> Seq Scan on pagg_tab_para_p3 pagg_tab_para_2 |
| Optimizer: Postgres query optimizer |
| (18 rows) |
| |
| SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; |
| x | sum | avg | count |
| ----+------+--------------------+------- |
| 0 | 5000 | 5.0000000000000000 | 1000 |
| 1 | 6000 | 6.0000000000000000 | 1000 |
| 10 | 5000 | 5.0000000000000000 | 1000 |
| 11 | 6000 | 6.0000000000000000 | 1000 |
| 20 | 5000 | 5.0000000000000000 | 1000 |
| 21 | 6000 | 6.0000000000000000 | 1000 |
| (6 rows) |
| |
| -- When GROUP BY clause does not match; partial aggregation is performed for each partition. |
| EXPLAIN (COSTS OFF) |
| SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3; |
| QUERY PLAN |
| ---------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Merge Key: pagg_tab_para.y, (sum(pagg_tab_para.x)), (avg(pagg_tab_para.x)) |
| -> Sort |
| Sort Key: pagg_tab_para.y, (sum(pagg_tab_para.x)), (avg(pagg_tab_para.x)) |
| -> Finalize GroupAggregate |
| Group Key: pagg_tab_para.y |
| Filter: (avg(pagg_tab_para.x) < '12'::numeric) |
| -> Sort |
| Sort Key: pagg_tab_para.y |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: pagg_tab_para.y |
| -> Append |
| -> Partial HashAggregate |
| Group Key: pagg_tab_para.y |
| -> Seq Scan on pagg_tab_para_p1 pagg_tab_para |
| -> Partial HashAggregate |
| Group Key: pagg_tab_para_1.y |
| -> Seq Scan on pagg_tab_para_p2 pagg_tab_para_1 |
| -> Partial HashAggregate |
| Group Key: pagg_tab_para_2.y |
| -> Seq Scan on pagg_tab_para_p3 pagg_tab_para_2 |
| Optimizer: Postgres query optimizer |
| (22 rows) |
| |
| SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3; |
| y | sum | avg | count |
| ----+-------+---------------------+------- |
| 0 | 15000 | 10.0000000000000000 | 1500 |
| 1 | 16500 | 11.0000000000000000 | 1500 |
| 10 | 15000 | 10.0000000000000000 | 1500 |
| 11 | 16500 | 11.0000000000000000 | 1500 |
| (4 rows) |
| |
| -- Test when parent can produce parallel paths but not any (or some) of its children |
| -- (Use one more aggregate to tilt the cost estimates for the plan we want) |
| ALTER TABLE pagg_tab_para_p1 SET (parallel_workers = 0); |
| ALTER TABLE pagg_tab_para_p3 SET (parallel_workers = 0); |
| ANALYZE pagg_tab_para; |
| EXPLAIN (COSTS OFF) |
| SELECT x, sum(y), avg(y), sum(x+y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; |
| QUERY PLAN |
| ----------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Merge Key: pagg_tab_para.x, (sum(pagg_tab_para.y)), (avg(pagg_tab_para.y)) |
| -> Sort |
| Sort Key: pagg_tab_para.x, (sum(pagg_tab_para.y)), (avg(pagg_tab_para.y)) |
| -> Append |
| -> HashAggregate |
| Group Key: pagg_tab_para.x |
| Filter: (avg(pagg_tab_para.y) < '7'::numeric) |
| -> Seq Scan on pagg_tab_para_p1 pagg_tab_para |
| -> HashAggregate |
| Group Key: pagg_tab_para_1.x |
| Filter: (avg(pagg_tab_para_1.y) < '7'::numeric) |
| -> Seq Scan on pagg_tab_para_p2 pagg_tab_para_1 |
| -> HashAggregate |
| Group Key: pagg_tab_para_2.x |
| Filter: (avg(pagg_tab_para_2.y) < '7'::numeric) |
| -> Seq Scan on pagg_tab_para_p3 pagg_tab_para_2 |
| Optimizer: Postgres query optimizer |
| (18 rows) |
| |
| SELECT x, sum(y), avg(y), sum(x+y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; |
| x | sum | avg | sum | count |
| ----+------+--------------------+-------+------- |
| 0 | 5000 | 5.0000000000000000 | 5000 | 1000 |
| 1 | 6000 | 6.0000000000000000 | 7000 | 1000 |
| 10 | 5000 | 5.0000000000000000 | 15000 | 1000 |
| 11 | 6000 | 6.0000000000000000 | 17000 | 1000 |
| 20 | 5000 | 5.0000000000000000 | 25000 | 1000 |
| 21 | 6000 | 6.0000000000000000 | 27000 | 1000 |
| (6 rows) |
| |
| ALTER TABLE pagg_tab_para_p2 SET (parallel_workers = 0); |
| ANALYZE pagg_tab_para; |
| EXPLAIN (COSTS OFF) |
| SELECT x, sum(y), avg(y), sum(x+y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; |
| QUERY PLAN |
| ----------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Merge Key: pagg_tab_para.x, (sum(pagg_tab_para.y)), (avg(pagg_tab_para.y)) |
| -> Sort |
| Sort Key: pagg_tab_para.x, (sum(pagg_tab_para.y)), (avg(pagg_tab_para.y)) |
| -> Append |
| -> HashAggregate |
| Group Key: pagg_tab_para.x |
| Filter: (avg(pagg_tab_para.y) < '7'::numeric) |
| -> Seq Scan on pagg_tab_para_p1 pagg_tab_para |
| -> HashAggregate |
| Group Key: pagg_tab_para_1.x |
| Filter: (avg(pagg_tab_para_1.y) < '7'::numeric) |
| -> Seq Scan on pagg_tab_para_p2 pagg_tab_para_1 |
| -> HashAggregate |
| Group Key: pagg_tab_para_2.x |
| Filter: (avg(pagg_tab_para_2.y) < '7'::numeric) |
| -> Seq Scan on pagg_tab_para_p3 pagg_tab_para_2 |
| Optimizer: Postgres query optimizer |
| (18 rows) |
| |
| SELECT x, sum(y), avg(y), sum(x+y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; |
| x | sum | avg | sum | count |
| ----+------+--------------------+-------+------- |
| 0 | 5000 | 5.0000000000000000 | 5000 | 1000 |
| 1 | 6000 | 6.0000000000000000 | 7000 | 1000 |
| 10 | 5000 | 5.0000000000000000 | 15000 | 1000 |
| 11 | 6000 | 6.0000000000000000 | 17000 | 1000 |
| 20 | 5000 | 5.0000000000000000 | 25000 | 1000 |
| 21 | 6000 | 6.0000000000000000 | 27000 | 1000 |
| (6 rows) |
| |
| -- Reset parallelism parameters to get partitionwise aggregation plan. |
| RESET min_parallel_table_scan_size; |
| RESET parallel_setup_cost; |
| EXPLAIN (COSTS OFF) |
| SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; |
| QUERY PLAN |
| ----------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Merge Key: pagg_tab_para.x, (sum(pagg_tab_para.y)), (avg(pagg_tab_para.y)) |
| -> Sort |
| Sort Key: pagg_tab_para.x, (sum(pagg_tab_para.y)), (avg(pagg_tab_para.y)) |
| -> Append |
| -> HashAggregate |
| Group Key: pagg_tab_para.x |
| Filter: (avg(pagg_tab_para.y) < '7'::numeric) |
| -> Seq Scan on pagg_tab_para_p1 pagg_tab_para |
| -> HashAggregate |
| Group Key: pagg_tab_para_1.x |
| Filter: (avg(pagg_tab_para_1.y) < '7'::numeric) |
| -> Seq Scan on pagg_tab_para_p2 pagg_tab_para_1 |
| -> HashAggregate |
| Group Key: pagg_tab_para_2.x |
| Filter: (avg(pagg_tab_para_2.y) < '7'::numeric) |
| -> Seq Scan on pagg_tab_para_p3 pagg_tab_para_2 |
| Optimizer: Postgres query optimizer |
| (18 rows) |
| |
| SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; |
| x | sum | avg | count |
| ----+------+--------------------+------- |
| 0 | 5000 | 5.0000000000000000 | 1000 |
| 1 | 6000 | 6.0000000000000000 | 1000 |
| 10 | 5000 | 5.0000000000000000 | 1000 |
| 11 | 6000 | 6.0000000000000000 | 1000 |
| 20 | 5000 | 5.0000000000000000 | 1000 |
| 21 | 6000 | 6.0000000000000000 | 1000 |
| (6 rows) |
| |