| -- disable ORCA |
| SET optimizer TO off; |
| -- Test case group 1: basic functions |
| CREATE TABLE agg_pushdown_parent ( |
| i int primary key, |
| x int); |
| CREATE TABLE agg_pushdown_child1 ( |
| j int, |
| parent int, |
| v double precision, |
| PRIMARY KEY (j, parent)); |
| CREATE INDEX ON agg_pushdown_child1(parent); |
| CREATE TABLE agg_pushdown_child2 ( |
| k int, |
| parent int, |
| v double precision, |
| PRIMARY KEY (k, parent));; |
| INSERT INTO agg_pushdown_parent(i, x) |
| SELECT n, n |
| FROM generate_series(0, 7) AS s(n); |
| INSERT INTO agg_pushdown_child1(j, parent, v) |
| SELECT 128 * i + n, i, random() |
| FROM generate_series(0, 127) AS s(n), agg_pushdown_parent; |
| INSERT INTO agg_pushdown_child2(k, parent, v) |
| SELECT 128 * i + n, i, random() |
| FROM generate_series(0, 127) AS s(n), agg_pushdown_parent; |
| ANALYZE agg_pushdown_parent; |
| ANALYZE agg_pushdown_child1; |
| ANALYZE agg_pushdown_child2; |
| SET enable_nestloop TO on; |
| SET enable_hashjoin TO off; |
| SET enable_mergejoin TO off; |
| -- Perform scan of a table, aggregate the result, join it to the other table |
| -- and finalize the aggregation. |
| -- |
| -- In addition, check that functionally dependent column "p.x" can be |
| -- referenced by SELECT although GROUP BY references "p.i". |
| SET gp_enable_agg_pushdown TO off; |
| EXPLAIN (VERBOSE on, COSTS off) |
| SELECT p.x, avg(c1.v) FROM agg_pushdown_parent AS p JOIN agg_pushdown_child1 |
| AS c1 ON c1.parent = p.i GROUP BY p.i; |
| QUERY PLAN |
| ---------------------------------------------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Output: p.x, (avg(c1.v)), p.i |
| -> HashAggregate |
| Output: p.x, avg(c1.v), p.i |
| Group Key: p.i |
| -> Nested Loop |
| Output: p.i, p.x, c1.v |
| Inner Unique: true |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Output: c1.v, c1.parent |
| Hash Key: c1.parent |
| -> Seq Scan on public.agg_pushdown_child1 c1 |
| Output: c1.v, c1.parent |
| -> Memoize |
| Output: p.x, p.i |
| Cache Key: c1.parent |
| Cache Mode: logical |
| -> Index Scan using agg_pushdown_parent_pkey on public.agg_pushdown_parent p |
| Output: p.x, p.i |
| Index Cond: (p.i = c1.parent) |
| Settings: enable_hashjoin = 'off', enable_mergejoin = 'off', enable_nestloop = 'on', gp_enable_agg_pushdown = 'off', optimizer = 'off' |
| Optimizer: Postgres query optimizer |
| (22 rows) |
| |
| SET gp_enable_agg_pushdown TO on; |
| EXPLAIN (VERBOSE on, COSTS off) |
| SELECT p.x, avg(c1.v) FROM agg_pushdown_parent AS p JOIN agg_pushdown_child1 |
| AS c1 ON c1.parent = p.i GROUP BY p.i; |
| QUERY PLAN |
| --------------------------------------------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Output: p.x, (avg(c1.v)), p.i |
| -> Finalize GroupAggregate |
| Output: p.x, avg(c1.v), p.i |
| Group Key: p.i |
| -> Sort |
| Output: p.i, p.x, (PARTIAL avg(c1.v)) |
| Sort Key: p.i |
| -> Nested Loop |
| Output: p.i, p.x, (PARTIAL avg(c1.v)) |
| Inner Unique: true |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Output: c1.parent, (PARTIAL avg(c1.v)) |
| Hash Key: c1.parent |
| -> Partial HashAggregate |
| Output: c1.parent, PARTIAL avg(c1.v) |
| Group Key: c1.parent |
| -> Seq Scan on public.agg_pushdown_child1 c1 |
| Output: c1.j, c1.parent, c1.v |
| -> Index Scan using agg_pushdown_parent_pkey on public.agg_pushdown_parent p |
| Output: p.i, p.x |
| Index Cond: (p.i = c1.parent) |
| Settings: enable_hashjoin = 'off', enable_mergejoin = 'off', enable_nestloop = 'on', gp_enable_agg_pushdown = 'on', optimizer = 'off' |
| Optimizer: Postgres query optimizer |
| (24 rows) |
| |
| -- The same for hash join. |
| SET enable_nestloop TO off; |
| SET enable_hashjoin TO on; |
| EXPLAIN (VERBOSE on, COSTS off) |
| SELECT p.i, avg(c1.v) FROM agg_pushdown_parent AS p JOIN agg_pushdown_child1 |
| AS c1 ON c1.parent = p.i GROUP BY p.i; |
| QUERY PLAN |
| --------------------------------------------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Output: p.i, (avg(c1.v)) |
| -> Finalize GroupAggregate |
| Output: p.i, avg(c1.v) |
| Group Key: p.i |
| -> Sort |
| Output: p.i, (PARTIAL avg(c1.v)) |
| Sort Key: p.i |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Output: p.i, (PARTIAL avg(c1.v)) |
| Hash Key: p.i |
| -> Hash Join |
| Output: p.i, (PARTIAL avg(c1.v)) |
| Hash Cond: (p.i = c1.parent) |
| -> Broadcast Motion 3:3 (slice3; segments: 3) |
| Output: p.i |
| -> Seq Scan on public.agg_pushdown_parent p |
| Output: p.i |
| -> Hash |
| Output: c1.parent, (PARTIAL avg(c1.v)) |
| -> Partial HashAggregate |
| Output: c1.parent, PARTIAL avg(c1.v) |
| Group Key: c1.parent |
| -> Seq Scan on public.agg_pushdown_child1 c1 |
| Output: c1.j, c1.parent, c1.v |
| Settings: enable_hashjoin = 'on', enable_mergejoin = 'off', enable_nestloop = 'off', gp_enable_agg_pushdown = 'on', optimizer = 'off' |
| Optimizer: Postgres query optimizer |
| (27 rows) |
| |
| -- The same for merge join. |
| SET enable_hashjoin TO off; |
| SET enable_mergejoin TO on; |
| EXPLAIN (VERBOSE on, COSTS off) |
| SELECT p.i, avg(c1.v) FROM agg_pushdown_parent AS p JOIN agg_pushdown_child1 |
| AS c1 ON c1.parent = p.i GROUP BY p.i; |
| QUERY PLAN |
| --------------------------------------------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Output: p.i, (avg(c1.v)) |
| -> Finalize HashAggregate |
| Output: p.i, avg(c1.v) |
| Group Key: p.i |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Output: p.i, (PARTIAL avg(c1.v)) |
| Hash Key: p.i |
| -> Merge Join |
| Output: p.i, (PARTIAL avg(c1.v)) |
| Merge Cond: (p.i = c1.parent) |
| -> Sort |
| Output: p.i |
| Sort Key: p.i |
| -> Broadcast Motion 3:3 (slice3; segments: 3) |
| Output: p.i |
| -> Seq Scan on public.agg_pushdown_parent p |
| Output: p.i |
| -> Sort |
| Output: c1.parent, (PARTIAL avg(c1.v)) |
| Sort Key: c1.parent |
| -> Partial HashAggregate |
| Output: c1.parent, PARTIAL avg(c1.v) |
| Group Key: c1.parent |
| -> Seq Scan on public.agg_pushdown_child1 c1 |
| Output: c1.j, c1.parent, c1.v |
| Settings: enable_hashjoin = 'off', enable_mergejoin = 'on', enable_nestloop = 'off', gp_enable_agg_pushdown = 'on', optimizer = 'off' |
| Optimizer: Postgres query optimizer |
| (28 rows) |
| |
| -- Restore the default values. |
| SET enable_nestloop TO on; |
| SET enable_hashjoin TO on; |
| -- Scan index on agg_pushdown_child1(parent) column and aggregate the result |
| -- using AGG_SORTED strategy. |
| SET gp_enable_agg_pushdown TO off; |
| SET enable_seqscan TO off; |
| EXPLAIN (VERBOSE on, COSTS off) |
| SELECT p.i, avg(c1.v) FROM agg_pushdown_parent AS p JOIN agg_pushdown_child1 |
| AS c1 ON c1.parent = p.i GROUP BY p.i; |
| QUERY PLAN |
| -------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Output: p.i, (avg(c1.v)) |
| -> HashAggregate |
| Output: p.i, avg(c1.v) |
| Group Key: p.i |
| -> Nested Loop |
| Output: p.i, c1.v |
| Inner Unique: true |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Output: c1.v, c1.parent |
| Hash Key: c1.parent |
| -> Index Scan using agg_pushdown_child1_parent_idx on public.agg_pushdown_child1 c1 |
| Output: c1.v, c1.parent |
| -> Memoize |
| Output: p.i |
| Cache Key: c1.parent |
| Cache Mode: logical |
| -> Index Only Scan using agg_pushdown_parent_pkey on public.agg_pushdown_parent p |
| Output: p.i |
| Index Cond: (p.i = c1.parent) |
| Settings: enable_hashjoin = 'on', enable_mergejoin = 'on', enable_nestloop = 'on', enable_seqscan = 'off', gp_enable_agg_pushdown = 'off', optimizer = 'off' |
| Optimizer: Postgres query optimizer |
| (22 rows) |
| |
| SET gp_enable_agg_pushdown TO on; |
| EXPLAIN (VERBOSE on, COSTS off) |
| SELECT p.i, avg(c1.v) FROM agg_pushdown_parent AS p JOIN agg_pushdown_child1 |
| AS c1 ON c1.parent = p.i GROUP BY p.i; |
| QUERY PLAN |
| ------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Output: p.i, (avg(c1.v)) |
| -> Finalize GroupAggregate |
| Output: p.i, avg(c1.v) |
| Group Key: p.i |
| -> Sort |
| Output: p.i, (PARTIAL avg(c1.v)) |
| Sort Key: p.i |
| -> Nested Loop |
| Output: p.i, (PARTIAL avg(c1.v)) |
| Inner Unique: true |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Output: c1.parent, (PARTIAL avg(c1.v)) |
| Hash Key: c1.parent |
| -> Partial GroupAggregate |
| Output: c1.parent, PARTIAL avg(c1.v) |
| Group Key: c1.parent |
| -> Index Scan using agg_pushdown_child1_parent_idx on public.agg_pushdown_child1 c1 |
| Output: c1.j, c1.parent, c1.v |
| -> Index Only Scan using agg_pushdown_parent_pkey on public.agg_pushdown_parent p |
| Output: p.i |
| Index Cond: (p.i = c1.parent) |
| Settings: enable_hashjoin = 'on', enable_mergejoin = 'on', enable_nestloop = 'on', enable_seqscan = 'off', gp_enable_agg_pushdown = 'on', optimizer = 'off' |
| Optimizer: Postgres query optimizer |
| (24 rows) |
| |
| SET enable_seqscan TO on; |
| -- Join "c1" to "p.x" column, i.e. one that is not in the GROUP BY clause. The |
| -- planner should still use "c1.parent" as grouping expression for partial |
| -- aggregation, although it's not in the same equivalence class as the GROUP |
| -- BY expression ("p.i"). The reason to use "c1.parent" for partial |
| -- aggregation is that this is the only way for "c1" to provide the join |
| -- expression with input data. |
| SET gp_enable_agg_pushdown TO off; |
| EXPLAIN (VERBOSE on, COSTS off) |
| SELECT p.i, avg(c1.v) FROM agg_pushdown_parent AS p JOIN agg_pushdown_child1 |
| AS c1 ON c1.parent = p.x GROUP BY p.i; |
| QUERY PLAN |
| ------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Output: p.i, (avg(c1.v)) |
| -> Finalize HashAggregate |
| Output: p.i, avg(c1.v) |
| Group Key: p.i |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Output: p.i, (PARTIAL avg(c1.v)) |
| Hash Key: p.i |
| -> Partial HashAggregate |
| Output: p.i, PARTIAL avg(c1.v) |
| Group Key: p.i |
| -> Hash Join |
| Output: p.i, c1.v |
| Hash Cond: (c1.parent = p.x) |
| -> Seq Scan on public.agg_pushdown_child1 c1 |
| Output: c1.j, c1.parent, c1.v |
| -> Hash |
| Output: p.i, p.x |
| -> Broadcast Motion 3:3 (slice3; segments: 3) |
| Output: p.i, p.x |
| -> Seq Scan on public.agg_pushdown_parent p |
| Output: p.i, p.x |
| Settings: enable_hashjoin = 'on', enable_mergejoin = 'on', enable_nestloop = 'on', enable_seqscan = 'on', gp_enable_agg_pushdown = 'off', optimizer = 'off' |
| Optimizer: Postgres query optimizer |
| (24 rows) |
| |
| SET gp_enable_agg_pushdown TO on; |
| EXPLAIN (VERBOSE on, COSTS off) |
| SELECT p.i, avg(c1.v) FROM agg_pushdown_parent AS p JOIN agg_pushdown_child1 |
| AS c1 ON c1.parent = p.x GROUP BY p.i; |
| QUERY PLAN |
| ------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| Output: p.i, (avg(c1.v)) |
| -> Finalize GroupAggregate |
| Output: p.i, avg(c1.v) |
| Group Key: p.i |
| -> Sort |
| Output: p.i, (PARTIAL avg(c1.v)) |
| Sort Key: p.i |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Output: p.i, (PARTIAL avg(c1.v)) |
| Hash Key: p.i |
| -> Hash Join |
| Output: p.i, (PARTIAL avg(c1.v)) |
| Hash Cond: (p.x = c1.parent) |
| -> Broadcast Motion 3:3 (slice3; segments: 3) |
| Output: p.i, p.x |
| -> Seq Scan on public.agg_pushdown_parent p |
| Output: p.i, p.x |
| -> Hash |
| Output: c1.parent, (PARTIAL avg(c1.v)) |
| -> Partial HashAggregate |
| Output: c1.parent, PARTIAL avg(c1.v) |
| Group Key: c1.parent |
| -> Seq Scan on public.agg_pushdown_child1 c1 |
| Output: c1.j, c1.parent, c1.v |
| Settings: enable_hashjoin = 'on', enable_mergejoin = 'on', enable_nestloop = 'on', enable_seqscan = 'on', gp_enable_agg_pushdown = 'on', optimizer = 'off' |
| Optimizer: Postgres query optimizer |
| (27 rows) |
| |
| -- Perform nestloop join between agg_pushdown_child1 and agg_pushdown_child2 |
| -- and aggregate the result. |
| SET enable_nestloop TO on; |
| SET enable_hashjoin TO off; |
| SET enable_mergejoin TO off; |
| SET gp_enable_agg_pushdown TO off; |
| EXPLAIN (VERBOSE on, COSTS off) |
| SELECT p.i, avg(c1.v + c2.v) FROM agg_pushdown_parent AS p JOIN |
| agg_pushdown_child1 AS c1 ON c1.parent = p.i JOIN agg_pushdown_child2 AS c2 ON |
| c2.parent = p.i WHERE c1.j = c2.k GROUP BY p.i; |
| QUERY PLAN |
| --------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Output: p.i, (avg((c1.v + c2.v))) |
| -> HashAggregate |
| Output: p.i, avg((c1.v + c2.v)) |
| Group Key: p.i |
| -> Nested Loop |
| Output: p.i, c1.v, c2.v |
| Inner Unique: true |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Output: c1.v, c1.parent, c2.v, c2.parent |
| Hash Key: c1.parent |
| -> Nested Loop |
| Output: c1.v, c1.parent, c2.v, c2.parent |
| Inner Unique: true |
| -> Seq Scan on public.agg_pushdown_child1 c1 |
| Output: c1.j, c1.parent, c1.v |
| -> Index Scan using agg_pushdown_child2_pkey on public.agg_pushdown_child2 c2 |
| Output: c2.k, c2.parent, c2.v |
| Index Cond: ((c2.k = c1.j) AND (c2.parent = c1.parent)) |
| -> Memoize |
| Output: p.i |
| Cache Key: c1.parent |
| Cache Mode: logical |
| -> Index Only Scan using agg_pushdown_parent_pkey on public.agg_pushdown_parent p |
| Output: p.i |
| Index Cond: (p.i = c1.parent) |
| Settings: enable_hashjoin = 'off', enable_mergejoin = 'off', enable_nestloop = 'on', enable_seqscan = 'on', gp_enable_agg_pushdown = 'off', optimizer = 'off' |
| Optimizer: Postgres query optimizer |
| (28 rows) |
| |
| SET gp_enable_agg_pushdown TO on; |
| EXPLAIN (VERBOSE on, COSTS off) |
| SELECT p.i, avg(c1.v + c2.v) FROM agg_pushdown_parent AS p JOIN |
| agg_pushdown_child1 AS c1 ON c1.parent = p.i JOIN agg_pushdown_child2 AS c2 ON |
| c2.parent = p.i WHERE c1.j = c2.k GROUP BY p.i; |
| QUERY PLAN |
| -------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Output: p.i, (avg((c1.v + c2.v))) |
| -> Finalize GroupAggregate |
| Output: p.i, avg((c1.v + c2.v)) |
| Group Key: p.i |
| -> Sort |
| Output: p.i, (PARTIAL avg((c1.v + c2.v))) |
| Sort Key: p.i |
| -> Nested Loop |
| Output: p.i, (PARTIAL avg((c1.v + c2.v))) |
| Inner Unique: true |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Output: c1.parent, c2.parent, (PARTIAL avg((c1.v + c2.v))) |
| Hash Key: c1.parent |
| -> Partial HashAggregate |
| Output: c1.parent, c2.parent, PARTIAL avg((c1.v + c2.v)) |
| Group Key: c1.parent |
| -> Nested Loop |
| Output: c1.v, c1.parent, c2.v, c2.parent |
| Inner Unique: true |
| -> Seq Scan on public.agg_pushdown_child1 c1 |
| Output: c1.j, c1.parent, c1.v |
| -> Index Scan using agg_pushdown_child2_pkey on public.agg_pushdown_child2 c2 |
| Output: c2.k, c2.parent, c2.v |
| Index Cond: ((c2.k = c1.j) AND (c2.parent = c1.parent)) |
| -> Index Only Scan using agg_pushdown_parent_pkey on public.agg_pushdown_parent p |
| Output: p.i |
| Index Cond: (p.i = c1.parent) |
| Settings: enable_hashjoin = 'off', enable_mergejoin = 'off', enable_nestloop = 'on', enable_seqscan = 'on', gp_enable_agg_pushdown = 'on', optimizer = 'off' |
| Optimizer: Postgres query optimizer |
| (30 rows) |
| |
| -- The same for hash join. |
| SET enable_nestloop TO off; |
| SET enable_hashjoin TO on; |
| EXPLAIN (VERBOSE on, COSTS off) |
| SELECT p.i, avg(c1.v + c2.v) FROM agg_pushdown_parent AS p JOIN |
| agg_pushdown_child1 AS c1 ON c1.parent = p.i JOIN agg_pushdown_child2 AS c2 ON |
| c2.parent = p.i WHERE c1.j = c2.k GROUP BY p.i; |
| QUERY PLAN |
| -------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Output: p.i, (avg((c1.v + c2.v))) |
| -> Finalize GroupAggregate |
| Output: p.i, avg((c1.v + c2.v)) |
| Group Key: p.i |
| -> Sort |
| Output: p.i, (PARTIAL avg((c1.v + c2.v))) |
| Sort Key: p.i |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Output: p.i, (PARTIAL avg((c1.v + c2.v))) |
| Hash Key: p.i |
| -> Hash Join |
| Output: p.i, (PARTIAL avg((c1.v + c2.v))) |
| Hash Cond: (p.i = c1.parent) |
| -> Broadcast Motion 3:3 (slice3; segments: 3) |
| Output: p.i |
| -> Seq Scan on public.agg_pushdown_parent p |
| Output: p.i |
| -> Hash |
| Output: c1.parent, c2.parent, (PARTIAL avg((c1.v + c2.v))) |
| -> Partial HashAggregate |
| Output: c1.parent, c2.parent, PARTIAL avg((c1.v + c2.v)) |
| Group Key: c1.parent |
| -> Hash Join |
| Output: c1.v, c1.parent, c2.v, c2.parent |
| Inner Unique: true |
| Hash Cond: ((c1.parent = c2.parent) AND (c1.j = c2.k)) |
| -> Seq Scan on public.agg_pushdown_child1 c1 |
| Output: c1.j, c1.parent, c1.v |
| -> Hash |
| Output: c2.v, c2.parent, c2.k |
| -> Seq Scan on public.agg_pushdown_child2 c2 |
| Output: c2.v, c2.parent, c2.k |
| Settings: enable_hashjoin = 'on', enable_mergejoin = 'off', enable_nestloop = 'off', enable_seqscan = 'on', gp_enable_agg_pushdown = 'on', optimizer = 'off' |
| Optimizer: Postgres query optimizer |
| (35 rows) |
| |
| -- The same for merge join. |
| SET enable_hashjoin TO off; |
| SET enable_mergejoin TO on; |
| SET enable_seqscan TO off; |
| EXPLAIN (VERBOSE on, COSTS off) |
| SELECT p.i, avg(c1.v + c2.v) FROM agg_pushdown_parent AS p JOIN |
| agg_pushdown_child1 AS c1 ON c1.parent = p.i JOIN agg_pushdown_child2 AS c2 ON |
| c2.parent = p.i WHERE c1.j = c2.k GROUP BY p.i; |
| QUERY PLAN |
| --------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Output: p.i, (avg((c1.v + c2.v))) |
| -> Finalize GroupAggregate |
| Output: p.i, avg((c1.v + c2.v)) |
| Group Key: p.i |
| -> Merge Join |
| Output: p.i, (PARTIAL avg((c1.v + c2.v))) |
| Inner Unique: true |
| Merge Cond: (c1.parent = p.i) |
| -> Sort |
| Output: c1.parent, c2.parent, (PARTIAL avg((c1.v + c2.v))) |
| Sort Key: c1.parent |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Output: c1.parent, c2.parent, (PARTIAL avg((c1.v + c2.v))) |
| Hash Key: c1.parent |
| -> Partial HashAggregate |
| Output: c1.parent, c2.parent, PARTIAL avg((c1.v + c2.v)) |
| Group Key: c1.parent |
| -> Merge Join |
| Output: c1.v, c1.parent, c2.v, c2.parent |
| Inner Unique: true |
| Merge Cond: ((c1.j = c2.k) AND (c1.parent = c2.parent)) |
| -> Index Scan using agg_pushdown_child1_pkey on public.agg_pushdown_child1 c1 |
| Output: c1.j, c1.parent, c1.v |
| -> Index Scan using agg_pushdown_child2_pkey on public.agg_pushdown_child2 c2 |
| Output: c2.k, c2.parent, c2.v |
| -> Index Only Scan using agg_pushdown_parent_pkey on public.agg_pushdown_parent p |
| Output: p.i |
| Settings: enable_hashjoin = 'off', enable_mergejoin = 'on', enable_nestloop = 'off', enable_seqscan = 'off', gp_enable_agg_pushdown = 'on', optimizer = 'off' |
| Optimizer: Postgres query optimizer |
| (30 rows) |
| |
| SET enable_seqscan TO on; |
| -- Clear tables |
| DROP TABLE agg_pushdown_child1; |
| DROP TABLE agg_pushdown_child2; |
| DROP TABLE agg_pushdown_parent; |
| -- Test case group 2: Pushdown with different join keys and group keys. |
| DROP TABLE IF EXISTS t1, t2; |
| NOTICE: table "t1" does not exist, skipping |
| NOTICE: table "t2" does not exist, skipping |
| CREATE TABLE t1 (id int, val int, comment VARCHAR(20)); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| CREATE TABLE t2 (id int, val int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| SET enable_nestloop TO off; |
| SET enable_hashjoin TO on; |
| SET enable_mergejoin TO off; |
| SET gp_enable_agg_pushdown TO ON; |
| -- Join key and group key are the same. |
| EXPLAIN (VERBOSE on, COSTS off) |
| SELECT t1.id, SUM(t1.val) FROM t1, t2 WHERE t1.id = t2.id GROUP BY t1.id; |
| QUERY PLAN |
| -------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Output: t1.id, (sum(t1.val)) |
| -> Finalize HashAggregate |
| Output: t1.id, sum(t1.val) |
| Group Key: t1.id |
| -> Hash Join |
| Output: t1.id, (PARTIAL sum(t1.val)) |
| Hash Cond: (t2.id = t1.id) |
| -> Seq Scan on public.t2 |
| Output: t2.id, t2.val |
| -> Hash |
| Output: t1.id, (PARTIAL sum(t1.val)) |
| -> Partial HashAggregate |
| Output: t1.id, PARTIAL sum(t1.val) |
| Group Key: t1.id |
| -> Seq Scan on public.t1 |
| Output: t1.id, t1.val, t1.comment |
| Settings: enable_hashjoin = 'on', enable_mergejoin = 'off', enable_nestloop = 'off', enable_seqscan = 'on', gp_enable_agg_pushdown = 'on', optimizer = 'off' |
| Optimizer: Postgres query optimizer |
| (19 rows) |
| |
| -- Join key and group key are different. |
| EXPLAIN (VERBOSE on, COSTS off) |
| SELECT t1.val, SUM(t1.id) FROM t1, t2 WHERE t1.id = t2.id GROUP BY t1.val; |
| QUERY PLAN |
| -------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Output: t1.val, (sum(t1.id)) |
| -> Finalize HashAggregate |
| Output: t1.val, sum(t1.id) |
| Group Key: t1.val |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Output: t1.val, (PARTIAL sum(t1.id)) |
| Hash Key: t1.val |
| -> Hash Join |
| Output: t1.val, (PARTIAL sum(t1.id)) |
| Hash Cond: (t2.id = t1.id) |
| -> Seq Scan on public.t2 |
| Output: t2.id, t2.val |
| -> Hash |
| Output: t1.val, t1.id, (PARTIAL sum(t1.id)) |
| -> Partial HashAggregate |
| Output: t1.val, t1.id, PARTIAL sum(t1.id) |
| Group Key: t1.val, t1.id |
| -> Seq Scan on public.t1 |
| Output: t1.id, t1.val, t1.comment |
| Settings: enable_hashjoin = 'on', enable_mergejoin = 'off', enable_nestloop = 'off', enable_seqscan = 'on', gp_enable_agg_pushdown = 'on', optimizer = 'off' |
| Optimizer: Postgres query optimizer |
| (22 rows) |
| |
| -- Pushdown with equivclass. |
| EXPLAIN (VERBOSE on, COSTS off) |
| SELECT t2.id, SUM(t1.val) FROM t1, t2 WHERE t1.id = t2.id GROUP BY t2.id; |
| QUERY PLAN |
| -------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Output: t2.id, (sum(t1.val)) |
| -> Finalize HashAggregate |
| Output: t2.id, sum(t1.val) |
| Group Key: t2.id |
| -> Hash Join |
| Output: t2.id, (PARTIAL sum(t1.val)) |
| Hash Cond: (t2.id = t1.id) |
| -> Seq Scan on public.t2 |
| Output: t2.id, t2.val |
| -> Hash |
| Output: t1.id, (PARTIAL sum(t1.val)) |
| -> Partial HashAggregate |
| Output: t1.id, PARTIAL sum(t1.val) |
| Group Key: t1.id |
| -> Seq Scan on public.t1 |
| Output: t1.id, t1.val, t1.comment |
| Settings: enable_hashjoin = 'on', enable_mergejoin = 'off', enable_nestloop = 'off', enable_seqscan = 'on', gp_enable_agg_pushdown = 'on', optimizer = 'off' |
| Optimizer: Postgres query optimizer |
| (19 rows) |
| |
| -- Group by column from t2 and aggregate column from t1. |
| EXPLAIN (VERBOSE on, COSTS off) |
| SELECT t2.val, SUM(t1.val) FROM t1, t2 WHERE t1.id = t2.id GROUP BY t2.val; |
| QUERY PLAN |
| -------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Output: t2.val, (sum(t1.val)) |
| -> Finalize HashAggregate |
| Output: t2.val, sum(t1.val) |
| Group Key: t2.val |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Output: t2.val, (PARTIAL sum(t1.val)) |
| Hash Key: t2.val |
| -> Hash Join |
| Output: t2.val, (PARTIAL sum(t1.val)) |
| Hash Cond: (t2.id = t1.id) |
| -> Seq Scan on public.t2 |
| Output: t2.id, t2.val |
| -> Hash |
| Output: t1.id, (PARTIAL sum(t1.val)) |
| -> Partial HashAggregate |
| Output: t1.id, PARTIAL sum(t1.val) |
| Group Key: t1.id |
| -> Seq Scan on public.t1 |
| Output: t1.id, t1.val, t1.comment |
| Settings: enable_hashjoin = 'on', enable_mergejoin = 'off', enable_nestloop = 'off', enable_seqscan = 'on', gp_enable_agg_pushdown = 'on', optimizer = 'off' |
| Optimizer: Postgres query optimizer |
| (22 rows) |
| |
| -- Pushdown with multiply group keys. |
| EXPLAIN (VERBOSE on, COSTS off) |
| SELECT t1.id, t1.comment, SUM(t1.val) FROM t1, t2 WHERE t1.id = t2.id GROUP BY t1.id, t1.comment; |
| QUERY PLAN |
| -------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Output: t1.id, t1.comment, (sum(t1.val)) |
| -> Finalize HashAggregate |
| Output: t1.id, t1.comment, sum(t1.val) |
| Group Key: t1.id, t1.comment |
| -> Hash Join |
| Output: t1.id, t1.comment, (PARTIAL sum(t1.val)) |
| Hash Cond: (t2.id = t1.id) |
| -> Seq Scan on public.t2 |
| Output: t2.id, t2.val |
| -> Hash |
| Output: t1.id, t1.comment, (PARTIAL sum(t1.val)) |
| -> Partial HashAggregate |
| Output: t1.id, t1.comment, PARTIAL sum(t1.val) |
| Group Key: t1.id, t1.comment |
| -> Seq Scan on public.t1 |
| Output: t1.id, t1.val, t1.comment |
| Settings: enable_hashjoin = 'on', enable_mergejoin = 'off', enable_nestloop = 'off', enable_seqscan = 'on', gp_enable_agg_pushdown = 'on', optimizer = 'off' |
| Optimizer: Postgres query optimizer |
| (19 rows) |
| |
| -- Pushdown with multiply join keys. |
| EXPLAIN (VERBOSE on, COSTS off) |
| SELECT t1.id, t1.comment, SUM(t1.val) FROM t1, t2 WHERE t1.id = t2.id and t1.val = t2.val GROUP BY t1.id, t1.comment; |
| QUERY PLAN |
| -------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Output: t1.id, t1.comment, (sum(t1.val)) |
| -> Finalize GroupAggregate |
| Output: t1.id, t1.comment, sum(t1.val) |
| Group Key: t1.id, t1.comment |
| -> Sort |
| Output: t1.id, t1.comment, (PARTIAL sum(t1.val)) |
| Sort Key: t1.id, t1.comment |
| -> Hash Join |
| Output: t1.id, t1.comment, (PARTIAL sum(t1.val)) |
| Hash Cond: ((t2.id = t1.id) AND (t2.val = t1.val)) |
| -> Seq Scan on public.t2 |
| Output: t2.id, t2.val |
| -> Hash |
| Output: t1.id, t1.comment, t1.val, (PARTIAL sum(t1.val)) |
| -> Partial HashAggregate |
| Output: t1.id, t1.comment, t1.val, PARTIAL sum(t1.val) |
| Group Key: t1.id, t1.comment, t1.val |
| -> Seq Scan on public.t1 |
| Output: t1.id, t1.val, t1.comment |
| Settings: enable_hashjoin = 'on', enable_mergejoin = 'off', enable_nestloop = 'off', enable_seqscan = 'on', gp_enable_agg_pushdown = 'on', optimizer = 'off' |
| Optimizer: Postgres query optimizer |
| (22 rows) |
| |
| -- Test above case with different data distributions |
| INSERT INTO t1 SELECT i, i, 'asd' FROM generate_series(1, 10000) s(i); |
| ANALYZE t1; |
| EXPLAIN (VERBOSE on, COSTS off) |
| SELECT t1.id, t1.comment, SUM(t1.val) FROM t1, t2 WHERE t1.id = t2.id and t1.val = t2.val GROUP BY t1.id, t1.comment; |
| QUERY PLAN |
| -------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Output: t1.id, t1.comment, (sum(t1.val)) |
| -> GroupAggregate |
| Output: t1.id, t1.comment, sum(t1.val) |
| Group Key: t1.id, t1.comment |
| -> Sort |
| Output: t1.id, t1.comment, t1.val |
| Sort Key: t1.id, t1.comment |
| -> Hash Join |
| Output: t1.id, t1.comment, t1.val |
| Hash Cond: ((t2.id = t1.id) AND (t2.val = t1.val)) |
| -> Seq Scan on public.t2 |
| Output: t2.id, t2.val |
| -> Hash |
| Output: t1.id, t1.comment, t1.val |
| -> Seq Scan on public.t1 |
| Output: t1.id, t1.comment, t1.val |
| Settings: enable_hashjoin = 'on', enable_mergejoin = 'off', enable_nestloop = 'off', enable_seqscan = 'on', gp_enable_agg_pushdown = 'on', optimizer = 'off' |
| Optimizer: Postgres query optimizer |
| (19 rows) |
| |
| DELETE FROM t1; |
| INSERT INTO t1 SELECT i % 10, 1, 'asd' FROM generate_series(1, 10000) s(i); |
| ANALYZE t1; |
| EXPLAIN (VERBOSE on, COSTS off) |
| SELECT t1.id, t1.comment, SUM(t1.val) FROM t1, t2 WHERE t1.id = t2.id and t1.val = t2.val GROUP BY t1.id, t1.comment; |
| QUERY PLAN |
| -------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Output: t1.id, t1.comment, (sum(t1.val)) |
| -> Finalize GroupAggregate |
| Output: t1.id, t1.comment, sum(t1.val) |
| Group Key: t1.id, t1.comment |
| -> Sort |
| Output: t1.id, t1.comment, (PARTIAL sum(t1.val)) |
| Sort Key: t1.id, t1.comment |
| -> Hash Join |
| Output: t1.id, t1.comment, (PARTIAL sum(t1.val)) |
| Hash Cond: ((t2.id = t1.id) AND (t2.val = t1.val)) |
| -> Seq Scan on public.t2 |
| Output: t2.id, t2.val |
| -> Hash |
| Output: t1.id, t1.comment, t1.val, (PARTIAL sum(t1.val)) |
| -> Partial HashAggregate |
| Output: t1.id, t1.comment, t1.val, PARTIAL sum(t1.val) |
| Group Key: t1.id, t1.comment, t1.val |
| -> Seq Scan on public.t1 |
| Output: t1.id, t1.val, t1.comment |
| Settings: enable_hashjoin = 'on', enable_mergejoin = 'off', enable_nestloop = 'off', enable_seqscan = 'on', gp_enable_agg_pushdown = 'on', optimizer = 'off' |
| Optimizer: Postgres query optimizer |
| (22 rows) |
| |
| -- Clear tables |
| DROP TABLE t1, t2; |
| -- Test case group 3: Pushdown in subquery and group from subquery. |
| DROP TABLE IF EXISTS part, lineitem; |
| NOTICE: table "part" does not exist, skipping |
| NOTICE: table "lineitem" does not exist, skipping |
| CREATE TABLE part (p_partkey int, p_size int, p_price int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'p_partkey' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| CREATE TABLE lineitem (l_orderkey int, l_partkey int, l_amount int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'l_orderkey' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| SET enable_nestloop TO off; |
| SET enable_hashjoin TO on; |
| SET enable_mergejoin TO off; |
| SET gp_enable_agg_pushdown TO ON; |
| -- Pushdown within subquery. |
| EXPLAIN (VERBOSE on, COSTS off) |
| SELECT SUM(slp) FROM |
| (SELECT l_partkey, SUM(p_price) from lineitem, part |
| WHERE l_partkey = p_partkey AND p_size < 40 |
| GROUP BY l_partkey |
| ORDER BY l_partkey |
| LIMIT 100) temp(lp, slp) |
| WHERE slp > 10; |
| QUERY PLAN |
| -------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| Aggregate |
| Output: sum(temp.slp) |
| -> Subquery Scan on temp |
| Output: temp.lp, temp.slp |
| Filter: (temp.slp > 10) |
| -> Limit |
| Output: lineitem.l_partkey, (sum(part.p_price)) |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| Output: lineitem.l_partkey, (sum(part.p_price)) |
| Merge Key: lineitem.l_partkey |
| -> Limit |
| Output: lineitem.l_partkey, (sum(part.p_price)) |
| -> Sort |
| Output: lineitem.l_partkey, (sum(part.p_price)) |
| Sort Key: lineitem.l_partkey |
| -> Finalize HashAggregate |
| Output: lineitem.l_partkey, sum(part.p_price) |
| Group Key: lineitem.l_partkey |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Output: lineitem.l_partkey, (PARTIAL sum(part.p_price)) |
| Hash Key: lineitem.l_partkey |
| -> Hash Join |
| Output: lineitem.l_partkey, (PARTIAL sum(part.p_price)) |
| Hash Cond: (lineitem.l_partkey = part.p_partkey) |
| -> Seq Scan on public.lineitem |
| Output: lineitem.l_orderkey, lineitem.l_partkey, lineitem.l_amount |
| -> Hash |
| Output: part.p_partkey, (PARTIAL sum(part.p_price)) |
| -> Broadcast Motion 3:3 (slice3; segments: 3) |
| Output: part.p_partkey, (PARTIAL sum(part.p_price)) |
| -> Partial HashAggregate |
| Output: part.p_partkey, PARTIAL sum(part.p_price) |
| Group Key: part.p_partkey |
| -> Seq Scan on public.part |
| Output: part.p_partkey, part.p_size, part.p_price |
| Filter: (part.p_size < 40) |
| Settings: enable_hashjoin = 'on', enable_mergejoin = 'off', enable_nestloop = 'off', enable_seqscan = 'on', gp_enable_agg_pushdown = 'on', optimizer = 'off' |
| Optimizer: Postgres query optimizer |
| (38 rows) |
| |
| -- Group base on subquery. |
| EXPLAIN (VERBOSE on, COSTS off) |
| SELECT p_partkey, SUM(l_amount) FROM |
| part, (SELECT l_partkey, l_amount + 10 |
| FROM lineitem ORDER BY l_partkey LIMIT 10000) li(l_partkey, l_amount) |
| WHERE l_partkey = p_partkey |
| GROUP BY p_partkey; |
| QUERY PLAN |
| -------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Output: part.p_partkey, (sum(((lineitem.l_amount + 10)))) |
| -> Finalize HashAggregate |
| Output: part.p_partkey, sum(((lineitem.l_amount + 10))) |
| Group Key: part.p_partkey |
| -> Hash Join |
| Output: part.p_partkey, (PARTIAL sum(((lineitem.l_amount + 10)))) |
| Hash Cond: (part.p_partkey = lineitem.l_partkey) |
| -> Seq Scan on public.part |
| Output: part.p_partkey, part.p_size, part.p_price |
| -> Hash |
| Output: lineitem.l_partkey, (PARTIAL sum(((lineitem.l_amount + 10)))) |
| -> Redistribute Motion 1:3 (slice2; segments: 1) |
| Output: lineitem.l_partkey, (PARTIAL sum(((lineitem.l_amount + 10)))) |
| Hash Key: lineitem.l_partkey |
| -> Partial GroupAggregate |
| Output: lineitem.l_partkey, PARTIAL sum(((lineitem.l_amount + 10))) |
| Group Key: lineitem.l_partkey |
| -> Limit |
| Output: lineitem.l_partkey, ((lineitem.l_amount + 10)) |
| -> Gather Motion 3:1 (slice3; segments: 3) |
| Output: lineitem.l_partkey, ((lineitem.l_amount + 10)) |
| Merge Key: lineitem.l_partkey |
| -> Limit |
| Output: lineitem.l_partkey, ((lineitem.l_amount + 10)) |
| -> Sort |
| Output: lineitem.l_partkey, ((lineitem.l_amount + 10)) |
| Sort Key: lineitem.l_partkey |
| -> Seq Scan on public.lineitem |
| Output: lineitem.l_partkey, (lineitem.l_amount + 10) |
| Settings: enable_hashjoin = 'on', enable_mergejoin = 'off', enable_nestloop = 'off', enable_seqscan = 'on', gp_enable_agg_pushdown = 'on', optimizer = 'off' |
| Optimizer: Postgres query optimizer |
| (32 rows) |
| |
| -- Clear tables |
| DROP TABLE part, lineitem; |
| -- Test case group 4: construct grouped join rel from 2 plain rels |
| DROP TABLE IF EXISTS vendor_pd, customer_pd, nation_pd; |
| NOTICE: table "vendor_pd" does not exist, skipping |
| NOTICE: table "customer_pd" does not exist, skipping |
| NOTICE: table "nation_pd" does not exist, skipping |
| CREATE TABLE vendor_pd (v_id int, v_name VARCHAR(20)) WITH (APPENDONLY=true, ORIENTATION=column); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'v_id' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| CREATE TABLE customer_pd (c_id int primary key, c_v_id int, c_n_id int, c_type int, c_consumption int); |
| CREATE TABLE nation_pd (n_id int, n_name VARCHAR(20), n_type int, n_population int) WITH (APPENDONLY=true, ORIENTATION=column); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'n_id' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| INSERT INTO nation_pd SELECT i, 'abc', 1, 1 from generate_series(1, 100) s(i); |
| INSERT INTO customer_pd SELECT i, i % 100, i % 100, 1, 100 from generate_series(1, 10000) s(i); |
| ANALYZE nation_pd, customer_pd; |
| -- For each vendor, calculate the total consumption of qualified customers |
| EXPLAIN (VERBOSE on, COSTS off) |
| SELECT v_id, v_name, SUM(c_consumption) |
| FROM vendor_pd, customer_pd, nation_pd |
| WHERE v_id = c_v_id AND c_n_id = n_id AND c_id > n_population |
| GROUP BY v_id, v_name; |
| QUERY PLAN |
| -------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Output: vendor_pd.v_id, vendor_pd.v_name, (sum(customer_pd.c_consumption)) |
| -> Finalize HashAggregate |
| Output: vendor_pd.v_id, vendor_pd.v_name, sum(customer_pd.c_consumption) |
| Group Key: vendor_pd.v_id, vendor_pd.v_name |
| -> Hash Join |
| Output: vendor_pd.v_id, vendor_pd.v_name, (PARTIAL sum(customer_pd.c_consumption)) |
| Hash Cond: (vendor_pd.v_id = customer_pd.c_v_id) |
| -> Seq Scan on public.vendor_pd |
| Output: vendor_pd.v_id, vendor_pd.v_name |
| -> Hash |
| Output: customer_pd.c_v_id, (PARTIAL sum(customer_pd.c_consumption)) |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Output: customer_pd.c_v_id, (PARTIAL sum(customer_pd.c_consumption)) |
| Hash Key: customer_pd.c_v_id |
| -> Partial HashAggregate |
| Output: customer_pd.c_v_id, PARTIAL sum(customer_pd.c_consumption) |
| Group Key: customer_pd.c_v_id |
| -> Hash Join |
| Output: customer_pd.c_consumption, customer_pd.c_v_id |
| Hash Cond: (customer_pd.c_n_id = nation_pd.n_id) |
| Join Filter: (customer_pd.c_id > nation_pd.n_population) |
| -> Seq Scan on public.customer_pd |
| Output: customer_pd.c_id, customer_pd.c_v_id, customer_pd.c_n_id, customer_pd.c_type, customer_pd.c_consumption |
| -> Hash |
| Output: nation_pd.n_id, nation_pd.n_population |
| -> Broadcast Motion 3:3 (slice3; segments: 3) |
| Output: nation_pd.n_id, nation_pd.n_population |
| -> Seq Scan on public.nation_pd |
| Output: nation_pd.n_id, nation_pd.n_population |
| Settings: enable_hashjoin = 'on', enable_mergejoin = 'off', enable_nestloop = 'off', enable_seqscan = 'on', gp_enable_agg_pushdown = 'on', optimizer = 'off' |
| Optimizer: Postgres query optimizer |
| (32 rows) |
| |
| -- For each vendor/c_type/n_type, calculate the total consumption of qualified customers |
| EXPLAIN (VERBOSE on, COSTS off) |
| SELECT v_id, c_type, n_type, SUM(c_consumption) |
| FROM vendor_pd, customer_pd, nation_pd |
| WHERE v_id = c_v_id AND c_n_id = n_id AND c_id > n_population |
| GROUP BY v_id, c_type, n_type; |
| QUERY PLAN |
| -------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Output: vendor_pd.v_id, customer_pd.c_type, nation_pd.n_type, (sum(customer_pd.c_consumption)) |
| -> Finalize HashAggregate |
| Output: vendor_pd.v_id, customer_pd.c_type, nation_pd.n_type, sum(customer_pd.c_consumption) |
| Group Key: vendor_pd.v_id, customer_pd.c_type, nation_pd.n_type |
| -> Hash Join |
| Output: vendor_pd.v_id, customer_pd.c_type, nation_pd.n_type, (PARTIAL sum(customer_pd.c_consumption)) |
| Hash Cond: (vendor_pd.v_id = customer_pd.c_v_id) |
| -> Seq Scan on public.vendor_pd |
| Output: vendor_pd.v_id |
| -> Hash |
| Output: customer_pd.c_type, customer_pd.c_v_id, nation_pd.n_type, (PARTIAL sum(customer_pd.c_consumption)) |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Output: customer_pd.c_type, customer_pd.c_v_id, nation_pd.n_type, (PARTIAL sum(customer_pd.c_consumption)) |
| Hash Key: customer_pd.c_v_id |
| -> Partial HashAggregate |
| Output: customer_pd.c_type, customer_pd.c_v_id, nation_pd.n_type, PARTIAL sum(customer_pd.c_consumption) |
| Group Key: customer_pd.c_type, customer_pd.c_v_id, nation_pd.n_type |
| -> Hash Join |
| Output: customer_pd.c_type, customer_pd.c_consumption, customer_pd.c_v_id, nation_pd.n_type |
| Hash Cond: (customer_pd.c_n_id = nation_pd.n_id) |
| Join Filter: (customer_pd.c_id > nation_pd.n_population) |
| -> Seq Scan on public.customer_pd |
| Output: customer_pd.c_id, customer_pd.c_v_id, customer_pd.c_n_id, customer_pd.c_type, customer_pd.c_consumption |
| -> Hash |
| Output: nation_pd.n_type, nation_pd.n_id, nation_pd.n_population |
| -> Broadcast Motion 3:3 (slice3; segments: 3) |
| Output: nation_pd.n_type, nation_pd.n_id, nation_pd.n_population |
| -> Seq Scan on public.nation_pd |
| Output: nation_pd.n_type, nation_pd.n_id, nation_pd.n_population |
| Settings: enable_hashjoin = 'on', enable_mergejoin = 'off', enable_nestloop = 'off', enable_seqscan = 'on', gp_enable_agg_pushdown = 'on', optimizer = 'off' |
| Optimizer: Postgres query optimizer |
| (32 rows) |
| |
| -- For each vendor/n_type, calculate the total consumption of customers from nation with condition. |
| EXPLAIN (VERBOSE on, COSTS off) |
| SELECT v_id, v_name, n_type, SUM(c_consumption) |
| FROM vendor_pd, customer_pd, nation_pd |
| WHERE v_id = c_v_id AND c_n_id = n_id AND n_population > 100 |
| GROUP BY v_id, v_name, n_type; |
| QUERY PLAN |
| -------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Output: vendor_pd.v_id, vendor_pd.v_name, nation_pd.n_type, (sum(customer_pd.c_consumption)) |
| -> Finalize HashAggregate |
| Output: vendor_pd.v_id, vendor_pd.v_name, nation_pd.n_type, sum(customer_pd.c_consumption) |
| Group Key: vendor_pd.v_id, vendor_pd.v_name, nation_pd.n_type |
| -> Hash Join |
| Output: vendor_pd.v_id, vendor_pd.v_name, nation_pd.n_type, (PARTIAL sum(customer_pd.c_consumption)) |
| Hash Cond: (vendor_pd.v_id = customer_pd.c_v_id) |
| -> Seq Scan on public.vendor_pd |
| Output: vendor_pd.v_id, vendor_pd.v_name |
| -> Hash |
| Output: customer_pd.c_v_id, nation_pd.n_type, (PARTIAL sum(customer_pd.c_consumption)) |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Output: customer_pd.c_v_id, nation_pd.n_type, (PARTIAL sum(customer_pd.c_consumption)) |
| Hash Key: customer_pd.c_v_id |
| -> Partial HashAggregate |
| Output: customer_pd.c_v_id, nation_pd.n_type, PARTIAL sum(customer_pd.c_consumption) |
| Group Key: customer_pd.c_v_id, nation_pd.n_type |
| -> Hash Join |
| Output: customer_pd.c_consumption, customer_pd.c_v_id, nation_pd.n_type |
| Hash Cond: (customer_pd.c_n_id = nation_pd.n_id) |
| -> Seq Scan on public.customer_pd |
| Output: customer_pd.c_id, customer_pd.c_v_id, customer_pd.c_n_id, customer_pd.c_type, customer_pd.c_consumption |
| -> Hash |
| Output: nation_pd.n_type, nation_pd.n_id |
| -> Broadcast Motion 3:3 (slice3; segments: 3) |
| Output: nation_pd.n_type, nation_pd.n_id |
| -> Seq Scan on public.nation_pd |
| Output: nation_pd.n_type, nation_pd.n_id |
| Filter: (nation_pd.n_population > 100) |
| Settings: enable_hashjoin = 'on', enable_mergejoin = 'off', enable_nestloop = 'off', enable_seqscan = 'on', gp_enable_agg_pushdown = 'on', optimizer = 'off' |
| Optimizer: Postgres query optimizer |
| (32 rows) |
| |
| -- Clear tables |
| DROP TABLE vendor_pd, customer_pd, nation_pd; |
| -- Test case group 4: OLAP-like cases |
| DROP TABLE IF EXISTS fact, dim; |
| NOTICE: table "fact" does not exist, skipping |
| NOTICE: table "dim" does not exist, skipping |
| CREATE TABLE fact (id int, did int, fact_time int, val int) WITH (APPENDONLY=true, ORIENTATION=column); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| CREATE TABLE dim (did int, proj_name varchar(20), brand int, model int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'did' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| INSERT INTO dim SELECT i % 100, 1, 1 FROM generate_series(1, 100) s(i); |
| INSERT INTO fact SELECT i % 10, i % 100, 30, 1 FROM generate_series(1, 10000) s(i); |
| ANALYZE dim, fact; |
| -- Test sum fact vals group by dim column |
| EXPLAIN (VERBOSE on, COSTS off) |
| SELECT dim.did, sum(val) |
| FROM fact JOIN dim ON fact.did = dim.did |
| WHERE fact_time > 10 AND fact_time < 2000 |
| GROUP BY dim.did; |
| QUERY PLAN |
| -------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Output: dim.did, (sum(fact.val)) |
| -> Finalize HashAggregate |
| Output: dim.did, sum(fact.val) |
| Group Key: dim.did |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Output: dim.did, (PARTIAL sum(fact.val)) |
| Hash Key: dim.did |
| -> Hash Join |
| Output: dim.did, (PARTIAL sum(fact.val)) |
| Hash Cond: (dim.did = fact.did) |
| -> Broadcast Motion 3:3 (slice3; segments: 3) |
| Output: dim.did |
| -> Seq Scan on public.dim |
| Output: dim.did |
| -> Hash |
| Output: fact.did, (PARTIAL sum(fact.val)) |
| -> Partial HashAggregate |
| Output: fact.did, PARTIAL sum(fact.val) |
| Group Key: fact.did |
| -> Seq Scan on public.fact |
| Output: fact.val, fact.did |
| Filter: ((fact.fact_time > 10) AND (fact.fact_time < 2000)) |
| Settings: enable_hashjoin = 'on', enable_mergejoin = 'off', enable_nestloop = 'off', enable_seqscan = 'on', gp_enable_agg_pushdown = 'on', optimizer = 'off' |
| Optimizer: Postgres query optimizer |
| (25 rows) |
| |
| EXPLAIN (VERBOSE on, COSTS off) |
| SELECT dim.proj_name, sum(val) |
| FROM fact JOIN dim ON fact.did = dim.did |
| WHERE fact_time > 10 AND fact_time < 2000 |
| GROUP BY dim.proj_name; |
| QUERY PLAN |
| -------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Output: dim.proj_name, (sum(fact.val)) |
| -> Finalize HashAggregate |
| Output: dim.proj_name, sum(fact.val) |
| Group Key: dim.proj_name |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Output: dim.proj_name, (PARTIAL sum(fact.val)) |
| Hash Key: dim.proj_name |
| -> Hash Join |
| Output: dim.proj_name, (PARTIAL sum(fact.val)) |
| Hash Cond: (dim.did = fact.did) |
| -> Broadcast Motion 3:3 (slice3; segments: 3) |
| Output: dim.proj_name, dim.did |
| -> Seq Scan on public.dim |
| Output: dim.proj_name, dim.did |
| -> Hash |
| Output: fact.did, (PARTIAL sum(fact.val)) |
| -> Partial HashAggregate |
| Output: fact.did, PARTIAL sum(fact.val) |
| Group Key: fact.did |
| -> Seq Scan on public.fact |
| Output: fact.val, fact.did |
| Filter: ((fact.fact_time > 10) AND (fact.fact_time < 2000)) |
| Settings: enable_hashjoin = 'on', enable_mergejoin = 'off', enable_nestloop = 'off', enable_seqscan = 'on', gp_enable_agg_pushdown = 'on', optimizer = 'off' |
| Optimizer: Postgres query optimizer |
| (25 rows) |
| |
| -- Clear tables |
| DROP TABLE dim, fact; |
| -- Test case group 5: partition table and inherit table |
| SET enable_incremental_sort TO off; |
| DROP TABLE IF EXISTS pagg_pd; |
| NOTICE: table "pagg_pd" does not exist, skipping |
| CREATE TABLE pagg_pd (a int, b int, c text, d int) PARTITION BY LIST(c); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| CREATE TABLE pagg_pd_p1 PARTITION OF pagg_pd FOR VALUES IN ('0000', '0001', '0002', '0003', '0004'); |
| NOTICE: table has parent, setting distribution columns to match parent table |
| CREATE TABLE pagg_pd_p2 PARTITION OF pagg_pd FOR VALUES IN ('0005', '0006', '0007', '0008'); |
| NOTICE: table has parent, setting distribution columns to match parent table |
| CREATE TABLE pagg_pd_p3 PARTITION OF pagg_pd FOR VALUES IN ('0009', '0010', '0011'); |
| NOTICE: table has parent, setting distribution columns to match parent table |
| INSERT INTO pagg_pd SELECT i % 20, i % 30, to_char(i % 12, 'FM0000'), i % 30 FROM generate_series(0, 2999) i; |
| ANALYZE pagg_pd; |
| EXPLAIN (VERBOSE on, COSTS off) |
| SELECT t1.c, sum(t1.a) |
| FROM pagg_pd t1 JOIN pagg_pd t2 ON t1.c < t2.c |
| GROUP BY t1.c |
| ORDER BY 1, 2; |
| QUERY PLAN |
| ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Output: t1.c, (sum(t1.a)) |
| Merge Key: t1.c, (sum(t1.a)) |
| -> Sort |
| Output: t1.c, (sum(t1.a)) |
| Sort Key: t1.c, (sum(t1.a)) |
| -> Finalize GroupAggregate |
| Output: t1.c, sum(t1.a) |
| Group Key: t1.c |
| -> Sort |
| Output: t1.c, (PARTIAL sum(t1.a)) |
| Sort Key: t1.c |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Output: t1.c, (PARTIAL sum(t1.a)) |
| Hash Key: t1.c |
| -> Nested Loop |
| Output: t1.c, (PARTIAL sum(t1.a)) |
| Join Filter: (t1.c < t2.c) |
| -> Broadcast Motion 3:3 (slice3; segments: 3) |
| Output: t1.c, (PARTIAL sum(t1.a)) |
| -> Partial HashAggregate |
| Output: t1.c, PARTIAL sum(t1.a) |
| Group Key: t1.c |
| -> Append |
| -> Seq Scan on public.pagg_pd_p1 t1_1 |
| Output: t1_1.c, t1_1.a |
| -> Seq Scan on public.pagg_pd_p2 t1_2 |
| Output: t1_2.c, t1_2.a |
| -> Seq Scan on public.pagg_pd_p3 t1_3 |
| Output: t1_3.c, t1_3.a |
| -> Materialize |
| Output: t2.c |
| -> Append |
| -> Seq Scan on public.pagg_pd_p1 t2_1 |
| Output: t2_1.c |
| -> Seq Scan on public.pagg_pd_p2 t2_2 |
| Output: t2_2.c |
| -> Seq Scan on public.pagg_pd_p3 t2_3 |
| Output: t2_3.c |
| Settings: enable_hashjoin = 'on', enable_incremental_sort = 'off', enable_mergejoin = 'off', enable_nestloop = 'off', enable_seqscan = 'on', gp_enable_agg_pushdown = 'on', optimizer = 'off' |
| Optimizer: Postgres query optimizer |
| (41 rows) |
| |
| SELECT t1.c, sum(t1.a) |
| FROM pagg_pd t1 JOIN pagg_pd t2 ON t1.c < t2.c |
| GROUP BY t1.c |
| ORDER BY 1, 2; |
| c | sum |
| ------+--------- |
| 0000 | 5500000 |
| 0001 | 5625000 |
| 0002 | 5625000 |
| 0003 | 5500000 |
| 0004 | 3500000 |
| 0005 | 3375000 |
| 0006 | 3125000 |
| 0007 | 2750000 |
| 0008 | 1500000 |
| 0009 | 1125000 |
| 0010 | 625000 |
| (11 rows) |
| |
| DROP TABLE pagg_pd; |
| CREATE TABLE pagg_pd_p (a int, b int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| CREATE TABLE pagg_pd (c text, d int) inherits (pagg_pd_p) PARTITION BY LIST(c); |
| ERROR: cannot create partitioned table as inheritance child |
| DROP TABLE IF EXISTS pagg_pd, pagg_pd_p; |
| NOTICE: table "pagg_pd" does not exist, skipping |
| CREATE TABLE pagg_pd_p (a int, b int, c text) PARTITION BY LIST(c); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| CREATE TABLE pagg_pd (d int) inherits (pagg_pd_p); |
| NOTICE: table has parent, setting distribution columns to match parent table |
| ERROR: cannot inherit from partitioned table "pagg_pd_p" |
| DROP TABLE IF EXISTS pagg_pd, pagg_pd_p; |
| NOTICE: table "pagg_pd" does not exist, skipping |
| CREATE TABLE pagg_pd_p (a int, b int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| CREATE TABLE pagg_pd (c text, d int) inherits (pagg_pd_p); |
| NOTICE: table has parent, setting distribution columns to match parent table |
| INSERT INTO pagg_pd SELECT i % 20, i % 30, to_char(i % 12, 'FM0000'), i % 30 FROM generate_series(0, 2999) i; |
| ANALYZE pagg_pd; |
| EXPLAIN (VERBOSE on, COSTS off) |
| SELECT t1.c, sum(t1.a) |
| FROM pagg_pd t1 JOIN pagg_pd t2 ON t1.c < t2.c |
| GROUP BY t1.c |
| ORDER BY 1, 2; |
| QUERY PLAN |
| ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Output: t1.c, (sum(t1.a)) |
| Merge Key: t1.c, (sum(t1.a)) |
| -> Sort |
| Output: t1.c, (sum(t1.a)) |
| Sort Key: t1.c, (sum(t1.a)) |
| -> Finalize GroupAggregate |
| Output: t1.c, sum(t1.a) |
| Group Key: t1.c |
| -> Sort |
| Output: t1.c, (PARTIAL sum(t1.a)) |
| Sort Key: t1.c |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Output: t1.c, (PARTIAL sum(t1.a)) |
| Hash Key: t1.c |
| -> Nested Loop |
| Output: t1.c, (PARTIAL sum(t1.a)) |
| Join Filter: (t1.c < t2.c) |
| -> Broadcast Motion 3:3 (slice3; segments: 3) |
| Output: t1.c, (PARTIAL sum(t1.a)) |
| -> Partial HashAggregate |
| Output: t1.c, PARTIAL sum(t1.a) |
| Group Key: t1.c |
| -> Seq Scan on public.pagg_pd t1 |
| Output: t1.a, t1.b, t1.c, t1.d |
| -> Materialize |
| Output: t2.c |
| -> Seq Scan on public.pagg_pd t2 |
| Output: t2.c |
| Settings: enable_hashjoin = 'on', enable_incremental_sort = 'off', enable_mergejoin = 'off', enable_nestloop = 'off', enable_seqscan = 'on', gp_enable_agg_pushdown = 'on', optimizer = 'off' |
| Optimizer: Postgres query optimizer |
| (31 rows) |
| |
| SELECT t1.c, sum(t1.a) |
| FROM pagg_pd t1 JOIN pagg_pd t2 ON t1.c < t2.c |
| GROUP BY t1.c |
| ORDER BY 1, 2; |
| c | sum |
| ------+--------- |
| 0000 | 5500000 |
| 0001 | 5625000 |
| 0002 | 5625000 |
| 0003 | 5500000 |
| 0004 | 3500000 |
| 0005 | 3375000 |
| 0006 | 3125000 |
| 0007 | 2750000 |
| 0008 | 1500000 |
| 0009 | 1125000 |
| 0010 | 625000 |
| (11 rows) |
| |
| DROP TABLE pagg_pd, pagg_pd_p; |
| RESET enable_incremental_sort; |
| -- Clear settings |
| SET optimizer TO default; |
| SET gp_enable_agg_pushdown TO off; |
| SET enable_seqscan TO on; |
| SET enable_nestloop TO on; |
| SET enable_hashjoin TO on; |
| SET enable_mergejoin TO on; |