blob: 3378f8b64e66205754413ed67009592932d815d7 [file] [log] [blame]
-- disable ORCA
SET optimizer TO off;
-- Test case group 1: basic functions
CREATE TABLE agg_pushdown_parent (
i int primary key,
x int);
CREATE TABLE agg_pushdown_child1 (
j int,
parent int,
v double precision,
PRIMARY KEY (j, parent));
CREATE INDEX ON agg_pushdown_child1(parent);
CREATE TABLE agg_pushdown_child2 (
k int,
parent int,
v double precision,
PRIMARY KEY (k, parent));;
INSERT INTO agg_pushdown_parent(i, x)
SELECT n, n
FROM generate_series(0, 7) AS s(n);
INSERT INTO agg_pushdown_child1(j, parent, v)
SELECT 128 * i + n, i, random()
FROM generate_series(0, 127) AS s(n), agg_pushdown_parent;
INSERT INTO agg_pushdown_child2(k, parent, v)
SELECT 128 * i + n, i, random()
FROM generate_series(0, 127) AS s(n), agg_pushdown_parent;
ANALYZE agg_pushdown_parent;
ANALYZE agg_pushdown_child1;
ANALYZE agg_pushdown_child2;
SET enable_nestloop TO on;
SET enable_hashjoin TO off;
SET enable_mergejoin TO off;
-- Perform scan of a table, aggregate the result, join it to the other table
-- and finalize the aggregation.
--
-- In addition, check that functionally dependent column "p.x" can be
-- referenced by SELECT although GROUP BY references "p.i".
SET gp_enable_agg_pushdown TO off;
EXPLAIN (VERBOSE on, COSTS off)
SELECT p.x, avg(c1.v) FROM agg_pushdown_parent AS p JOIN agg_pushdown_child1
AS c1 ON c1.parent = p.i GROUP BY p.i;
SET gp_enable_agg_pushdown TO on;
EXPLAIN (VERBOSE on, COSTS off)
SELECT p.x, avg(c1.v) FROM agg_pushdown_parent AS p JOIN agg_pushdown_child1
AS c1 ON c1.parent = p.i GROUP BY p.i;
-- The same for hash join.
SET enable_nestloop TO off;
SET enable_hashjoin TO on;
EXPLAIN (VERBOSE on, COSTS off)
SELECT p.i, avg(c1.v) FROM agg_pushdown_parent AS p JOIN agg_pushdown_child1
AS c1 ON c1.parent = p.i GROUP BY p.i;
-- The same for merge join.
SET enable_hashjoin TO off;
SET enable_mergejoin TO on;
EXPLAIN (VERBOSE on, COSTS off)
SELECT p.i, avg(c1.v) FROM agg_pushdown_parent AS p JOIN agg_pushdown_child1
AS c1 ON c1.parent = p.i GROUP BY p.i;
-- Restore the default values.
SET enable_nestloop TO on;
SET enable_hashjoin TO on;
-- Scan index on agg_pushdown_child1(parent) column and aggregate the result
-- using AGG_SORTED strategy.
SET gp_enable_agg_pushdown TO off;
SET enable_seqscan TO off;
EXPLAIN (VERBOSE on, COSTS off)
SELECT p.i, avg(c1.v) FROM agg_pushdown_parent AS p JOIN agg_pushdown_child1
AS c1 ON c1.parent = p.i GROUP BY p.i;
SET gp_enable_agg_pushdown TO on;
EXPLAIN (VERBOSE on, COSTS off)
SELECT p.i, avg(c1.v) FROM agg_pushdown_parent AS p JOIN agg_pushdown_child1
AS c1 ON c1.parent = p.i GROUP BY p.i;
SET enable_seqscan TO on;
-- Join "c1" to "p.x" column, i.e. one that is not in the GROUP BY clause. The
-- planner should still use "c1.parent" as grouping expression for partial
-- aggregation, although it's not in the same equivalence class as the GROUP
-- BY expression ("p.i"). The reason to use "c1.parent" for partial
-- aggregation is that this is the only way for "c1" to provide the join
-- expression with input data.
SET gp_enable_agg_pushdown TO off;
EXPLAIN (VERBOSE on, COSTS off)
SELECT p.i, avg(c1.v) FROM agg_pushdown_parent AS p JOIN agg_pushdown_child1
AS c1 ON c1.parent = p.x GROUP BY p.i;
SET gp_enable_agg_pushdown TO on;
EXPLAIN (VERBOSE on, COSTS off)
SELECT p.i, avg(c1.v) FROM agg_pushdown_parent AS p JOIN agg_pushdown_child1
AS c1 ON c1.parent = p.x GROUP BY p.i;
-- Perform nestloop join between agg_pushdown_child1 and agg_pushdown_child2
-- and aggregate the result.
SET enable_nestloop TO on;
SET enable_hashjoin TO off;
SET enable_mergejoin TO off;
SET gp_enable_agg_pushdown TO off;
EXPLAIN (VERBOSE on, COSTS off)
SELECT p.i, avg(c1.v + c2.v) FROM agg_pushdown_parent AS p JOIN
agg_pushdown_child1 AS c1 ON c1.parent = p.i JOIN agg_pushdown_child2 AS c2 ON
c2.parent = p.i WHERE c1.j = c2.k GROUP BY p.i;
SET gp_enable_agg_pushdown TO on;
EXPLAIN (VERBOSE on, COSTS off)
SELECT p.i, avg(c1.v + c2.v) FROM agg_pushdown_parent AS p JOIN
agg_pushdown_child1 AS c1 ON c1.parent = p.i JOIN agg_pushdown_child2 AS c2 ON
c2.parent = p.i WHERE c1.j = c2.k GROUP BY p.i;
-- The same for hash join.
SET enable_nestloop TO off;
SET enable_hashjoin TO on;
EXPLAIN (VERBOSE on, COSTS off)
SELECT p.i, avg(c1.v + c2.v) FROM agg_pushdown_parent AS p JOIN
agg_pushdown_child1 AS c1 ON c1.parent = p.i JOIN agg_pushdown_child2 AS c2 ON
c2.parent = p.i WHERE c1.j = c2.k GROUP BY p.i;
-- The same for merge join.
SET enable_hashjoin TO off;
SET enable_mergejoin TO on;
SET enable_seqscan TO off;
EXPLAIN (VERBOSE on, COSTS off)
SELECT p.i, avg(c1.v + c2.v) FROM agg_pushdown_parent AS p JOIN
agg_pushdown_child1 AS c1 ON c1.parent = p.i JOIN agg_pushdown_child2 AS c2 ON
c2.parent = p.i WHERE c1.j = c2.k GROUP BY p.i;
SET enable_seqscan TO on;
-- Clear tables
DROP TABLE agg_pushdown_child1;
DROP TABLE agg_pushdown_child2;
DROP TABLE agg_pushdown_parent;
-- Test case group 2: Pushdown with different join keys and group keys.
DROP TABLE IF EXISTS t1, t2;
CREATE TABLE t1 (id int, val int, comment VARCHAR(20));
CREATE TABLE t2 (id int, val int);
SET enable_nestloop TO off;
SET enable_hashjoin TO on;
SET enable_mergejoin TO off;
SET gp_enable_agg_pushdown TO ON;
-- Join key and group key are the same.
EXPLAIN (VERBOSE on, COSTS off)
SELECT t1.id, SUM(t1.val) FROM t1, t2 WHERE t1.id = t2.id GROUP BY t1.id;
-- Join key and group key are different.
EXPLAIN (VERBOSE on, COSTS off)
SELECT t1.val, SUM(t1.id) FROM t1, t2 WHERE t1.id = t2.id GROUP BY t1.val;
-- Pushdown with equivclass.
EXPLAIN (VERBOSE on, COSTS off)
SELECT t2.id, SUM(t1.val) FROM t1, t2 WHERE t1.id = t2.id GROUP BY t2.id;
-- Group by column from t2 and aggregate column from t1.
EXPLAIN (VERBOSE on, COSTS off)
SELECT t2.val, SUM(t1.val) FROM t1, t2 WHERE t1.id = t2.id GROUP BY t2.val;
-- Pushdown with multiply group keys.
EXPLAIN (VERBOSE on, COSTS off)
SELECT t1.id, t1.comment, SUM(t1.val) FROM t1, t2 WHERE t1.id = t2.id GROUP BY t1.id, t1.comment;
-- Pushdown with multiply join keys.
EXPLAIN (VERBOSE on, COSTS off)
SELECT t1.id, t1.comment, SUM(t1.val) FROM t1, t2 WHERE t1.id = t2.id and t1.val = t2.val GROUP BY t1.id, t1.comment;
-- Test above case with different data distributions
INSERT INTO t1 SELECT i, i, 'asd' FROM generate_series(1, 10000) s(i);
ANALYZE t1;
EXPLAIN (VERBOSE on, COSTS off)
SELECT t1.id, t1.comment, SUM(t1.val) FROM t1, t2 WHERE t1.id = t2.id and t1.val = t2.val GROUP BY t1.id, t1.comment;
DELETE FROM t1;
INSERT INTO t1 SELECT i % 10, 1, 'asd' FROM generate_series(1, 10000) s(i);
ANALYZE t1;
EXPLAIN (VERBOSE on, COSTS off)
SELECT t1.id, t1.comment, SUM(t1.val) FROM t1, t2 WHERE t1.id = t2.id and t1.val = t2.val GROUP BY t1.id, t1.comment;
-- Clear tables
DROP TABLE t1, t2;
-- Test case group 3: Pushdown in subquery and group from subquery.
DROP TABLE IF EXISTS part, lineitem;
CREATE TABLE part (p_partkey int, p_size int, p_price int);
CREATE TABLE lineitem (l_orderkey int, l_partkey int, l_amount int);
SET enable_nestloop TO off;
SET enable_hashjoin TO on;
SET enable_mergejoin TO off;
SET gp_enable_agg_pushdown TO ON;
-- Pushdown within subquery.
EXPLAIN (VERBOSE on, COSTS off)
SELECT SUM(slp) FROM
(SELECT l_partkey, SUM(p_price) from lineitem, part
WHERE l_partkey = p_partkey AND p_size < 40
GROUP BY l_partkey
ORDER BY l_partkey
LIMIT 100) temp(lp, slp)
WHERE slp > 10;
-- Group base on subquery.
EXPLAIN (VERBOSE on, COSTS off)
SELECT p_partkey, SUM(l_amount) FROM
part, (SELECT l_partkey, l_amount + 10
FROM lineitem ORDER BY l_partkey LIMIT 10000) li(l_partkey, l_amount)
WHERE l_partkey = p_partkey
GROUP BY p_partkey;
-- Clear tables
DROP TABLE part, lineitem;
-- Test case group 4: construct grouped join rel from 2 plain rels
DROP TABLE IF EXISTS vendor_pd, customer_pd, nation_pd;
CREATE TABLE vendor_pd (v_id int, v_name VARCHAR(20)) WITH (APPENDONLY=true, ORIENTATION=column);
CREATE TABLE customer_pd (c_id int primary key, c_v_id int, c_n_id int, c_type int, c_consumption int);
CREATE TABLE nation_pd (n_id int, n_name VARCHAR(20), n_type int, n_population int) WITH (APPENDONLY=true, ORIENTATION=column);
INSERT INTO nation_pd SELECT i, 'abc', 1, 1 from generate_series(1, 100) s(i);
INSERT INTO customer_pd SELECT i, i % 100, i % 100, 1, 100 from generate_series(1, 10000) s(i);
ANALYZE nation_pd, customer_pd;
-- For each vendor, calculate the total consumption of qualified customers
EXPLAIN (VERBOSE on, COSTS off)
SELECT v_id, v_name, SUM(c_consumption)
FROM vendor_pd, customer_pd, nation_pd
WHERE v_id = c_v_id AND c_n_id = n_id AND c_id > n_population
GROUP BY v_id, v_name;
-- For each vendor/c_type/n_type, calculate the total consumption of qualified customers
EXPLAIN (VERBOSE on, COSTS off)
SELECT v_id, c_type, n_type, SUM(c_consumption)
FROM vendor_pd, customer_pd, nation_pd
WHERE v_id = c_v_id AND c_n_id = n_id AND c_id > n_population
GROUP BY v_id, c_type, n_type;
-- For each vendor/n_type, calculate the total consumption of customers from nation with condition.
EXPLAIN (VERBOSE on, COSTS off)
SELECT v_id, v_name, n_type, SUM(c_consumption)
FROM vendor_pd, customer_pd, nation_pd
WHERE v_id = c_v_id AND c_n_id = n_id AND n_population > 100
GROUP BY v_id, v_name, n_type;
-- Clear tables
DROP TABLE vendor_pd, customer_pd, nation_pd;
-- Test case group 4: OLAP-like cases
DROP TABLE IF EXISTS fact, dim;
CREATE TABLE fact (id int, did int, fact_time int, val int) WITH (APPENDONLY=true, ORIENTATION=column);
CREATE TABLE dim (did int, proj_name varchar(20), brand int, model int);
INSERT INTO dim SELECT i % 100, 1, 1 FROM generate_series(1, 100) s(i);
INSERT INTO fact SELECT i % 10, i % 100, 30, 1 FROM generate_series(1, 10000) s(i);
ANALYZE dim, fact;
-- Test sum fact vals group by dim column
EXPLAIN (VERBOSE on, COSTS off)
SELECT dim.did, sum(val)
FROM fact JOIN dim ON fact.did = dim.did
WHERE fact_time > 10 AND fact_time < 2000
GROUP BY dim.did;
EXPLAIN (VERBOSE on, COSTS off)
SELECT dim.proj_name, sum(val)
FROM fact JOIN dim ON fact.did = dim.did
WHERE fact_time > 10 AND fact_time < 2000
GROUP BY dim.proj_name;
-- Clear tables
DROP TABLE dim, fact;
-- Test case group 5: partition table and inherit table
SET enable_incremental_sort TO off;
DROP TABLE IF EXISTS pagg_pd;
CREATE TABLE pagg_pd (a int, b int, c text, d int) PARTITION BY LIST(c);
CREATE TABLE pagg_pd_p1 PARTITION OF pagg_pd FOR VALUES IN ('0000', '0001', '0002', '0003', '0004');
CREATE TABLE pagg_pd_p2 PARTITION OF pagg_pd FOR VALUES IN ('0005', '0006', '0007', '0008');
CREATE TABLE pagg_pd_p3 PARTITION OF pagg_pd FOR VALUES IN ('0009', '0010', '0011');
INSERT INTO pagg_pd SELECT i % 20, i % 30, to_char(i % 12, 'FM0000'), i % 30 FROM generate_series(0, 2999) i;
ANALYZE pagg_pd;
EXPLAIN (VERBOSE on, COSTS off)
SELECT t1.c, sum(t1.a)
FROM pagg_pd t1 JOIN pagg_pd t2 ON t1.c < t2.c
GROUP BY t1.c
ORDER BY 1, 2;
SELECT t1.c, sum(t1.a)
FROM pagg_pd t1 JOIN pagg_pd t2 ON t1.c < t2.c
GROUP BY t1.c
ORDER BY 1, 2;
DROP TABLE pagg_pd;
CREATE TABLE pagg_pd_p (a int, b int);
CREATE TABLE pagg_pd (c text, d int) inherits (pagg_pd_p) PARTITION BY LIST(c);
DROP TABLE IF EXISTS pagg_pd, pagg_pd_p;
CREATE TABLE pagg_pd_p (a int, b int, c text) PARTITION BY LIST(c);
CREATE TABLE pagg_pd (d int) inherits (pagg_pd_p);
DROP TABLE IF EXISTS pagg_pd, pagg_pd_p;
CREATE TABLE pagg_pd_p (a int, b int);
CREATE TABLE pagg_pd (c text, d int) inherits (pagg_pd_p);
INSERT INTO pagg_pd SELECT i % 20, i % 30, to_char(i % 12, 'FM0000'), i % 30 FROM generate_series(0, 2999) i;
ANALYZE pagg_pd;
EXPLAIN (VERBOSE on, COSTS off)
SELECT t1.c, sum(t1.a)
FROM pagg_pd t1 JOIN pagg_pd t2 ON t1.c < t2.c
GROUP BY t1.c
ORDER BY 1, 2;
SELECT t1.c, sum(t1.a)
FROM pagg_pd t1 JOIN pagg_pd t2 ON t1.c < t2.c
GROUP BY t1.c
ORDER BY 1, 2;
DROP TABLE pagg_pd, pagg_pd_p;
RESET enable_incremental_sort;
-- Clear settings
SET optimizer TO default;
SET gp_enable_agg_pushdown TO off;
SET enable_seqscan TO on;
SET enable_nestloop TO on;
SET enable_hashjoin TO on;
SET enable_mergejoin TO on;