| -- tests MPP-2614 |
| -- two segments no mirrors |
| drop table if exists hashagg_test; |
| NOTICE: table "hashagg_test" does not exist, skipping |
| create table hashagg_test (id1 int4, id2 int4, day date, grp text, v int4); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id1' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| create index hashagg_test_idx on hashagg_test (id1,id2,day,grp); |
| insert into hashagg_test values (1,1,'1/1/2006','there',1); |
| insert into hashagg_test values (1,1,'1/2/2006','there',2); |
| insert into hashagg_test values (1,1,'1/3/2006','there',3); |
| insert into hashagg_test values (1,1,'1/1/2006','hi',2); |
| insert into hashagg_test values (1,1,'1/2/2006','hi',3); |
| insert into hashagg_test values (1,1,'1/3/2006','hi',4); |
| -- this will get the wrong answer (right number of rows, wrong aggregates) |
| set enable_seqscan=off; |
| select grp,sum(v) from hashagg_test where id1 = 1 and id2 = 1 and day between '1/1/2006' and '1/31/2006' group by grp order by sum(v) desc; |
| grp | sum |
| -------+----- |
| hi | 9 |
| there | 6 |
| (2 rows) |
| |
| create index hashagg_test_idx_bm on hashagg_test using bitmap (id1,id2,day,grp); |
| set enable_indexscan=off; -- turn off b-tree index |
| select grp,sum(v) from hashagg_test where id1 = 1 and id2 = 1 and day between '1/1/2006' and '1/31/2006' group by grp order by sum(v) desc; |
| grp | sum |
| -------+----- |
| hi | 9 |
| there | 6 |
| (2 rows) |
| |
| -- correct answer |
| set enable_seqscan=on; |
| select grp,sum(v) from hashagg_test where id1 = 1 and id2 = 1 and day between '1/1/2006' and '1/31/2006' group by grp order by sum(v) desc; |
| grp | sum |
| -------+----- |
| hi | 9 |
| there | 6 |
| (2 rows) |
| |
| -- Test a window-aggregate (median) with a join where the join column |
| -- is further constrained by a constant. And the constant is of different |
| -- type (int4, while the column is bigint). We had a bug at one point |
| -- where this threw an error. |
| create table tbl_a (id bigint) distributed by (id); |
| create table tbl_b (id bigint, t numeric) distributed by (id); |
| insert into tbl_a values (1), (2), (3); |
| insert into tbl_b values (1, 50), (1, 100), (1, 150), (2, 200), (4, 400); |
| select tbl_a.id, median (t) from tbl_a, tbl_b |
| where tbl_a.id = tbl_b.id and tbl_a.id = 1::int4 |
| group by tbl_a.id ; |
| id | median |
| ----+-------- |
| 1 | 100 |
| (1 row) |
| |
| -- |
| -- Test that the planner doesn't try to use a hash agg for a type that is not hashable. |
| -- |
| -- First create a type to test with. |
| CREATE TYPE nohash_int; |
| CREATE FUNCTION nohash_int_in(cstring) RETURNS nohash_int IMMUTABLE STRICT LANGUAGE INTERNAL AS 'int4in'; |
| NOTICE: return type nohash_int is only a shell |
| CREATE FUNCTION nohash_int_out(nohash_int) RETURNS cstring IMMUTABLE STRICT LANGUAGE INTERNAL AS 'int4out'; |
| NOTICE: argument type nohash_int is only a shell |
| CREATE TYPE nohash_int (INPUT = nohash_int_in, OUTPUT=nohash_int_out, INTERNALLENGTH=4, PASSEDBYVALUE, ALIGNMENT=int4); |
| -- Create comparison operators and opclass. |
| CREATE FUNCTION nohash_int_cmp(nohash_int, nohash_int) RETURNS integer AS 'btint4cmp' LANGUAGE 'internal' IMMUTABLE STRICT; |
| CREATE FUNCTION nohash_int_lt(nohash_int, nohash_int) RETURNS bool IMMUTABLE STRICT LANGUAGE INTERNAL AS 'int4lt'; |
| CREATE FUNCTION nohash_int_le(nohash_int, nohash_int) RETURNS bool IMMUTABLE STRICT LANGUAGE INTERNAL AS 'int4le'; |
| CREATE FUNCTION nohash_int_eq(nohash_int, nohash_int) RETURNS bool IMMUTABLE STRICT LANGUAGE INTERNAL AS 'int4eq'; |
| CREATE FUNCTION nohash_int_ge(nohash_int, nohash_int) RETURNS bool IMMUTABLE STRICT LANGUAGE INTERNAL AS 'int4ge'; |
| CREATE FUNCTION nohash_int_gt(nohash_int, nohash_int) RETURNS bool IMMUTABLE STRICT LANGUAGE INTERNAL AS 'int4gt'; |
| CREATE OPERATOR < (PROCEDURE=nohash_int_lt, LEFTARG=nohash_int, RIGHTARG=nohash_int); |
| CREATE OPERATOR <= (PROCEDURE=nohash_int_le, LEFTARG=nohash_int, RIGHTARG=nohash_int); |
| CREATE OPERATOR = (PROCEDURE=nohash_int_eq, LEFTARG=nohash_int, RIGHTARG=nohash_int); |
| CREATE OPERATOR >= (PROCEDURE=nohash_int_ge, LEFTARG=nohash_int, RIGHTARG=nohash_int); |
| CREATE OPERATOR > (PROCEDURE=nohash_int_gt, LEFTARG=nohash_int, RIGHTARG=nohash_int); |
| CREATE OPERATOR CLASS nohash_int_bt_ops DEFAULT |
| FOR TYPE nohash_int USING btree AS |
| OPERATOR 1 <, |
| OPERATOR 2 <=, |
| OPERATOR 3 =, |
| OPERATOR 4 >=, |
| OPERATOR 5 >, |
| FUNCTION 1 nohash_int_cmp(nohash_int, nohash_int); |
| -- A test table |
| create table hashagg_test2(normal_int int4, nohash_int nohash_int) distributed randomly; |
| insert into hashagg_test2 select g%10, (g%10)::text::nohash_int from generate_series(1, 10000) g; |
| -- Prefer hash aggs |
| set enable_sort=off; |
| -- These should both work. The first is expected to use a hash agg. The second will |
| -- use a Sort + Group, because nohash_int type is not hashable. |
| select normal_int from hashagg_test2 group by normal_int; |
| normal_int |
| ------------ |
| 0 |
| 1 |
| 2 |
| 3 |
| 4 |
| 5 |
| 6 |
| 7 |
| 8 |
| 9 |
| (10 rows) |
| |
| select nohash_int from hashagg_test2 group by nohash_int; |
| nohash_int |
| ------------ |
| 0 |
| 1 |
| 2 |
| 3 |
| 4 |
| 5 |
| 6 |
| 7 |
| 8 |
| 9 |
| (10 rows) |
| |
| reset enable_sort; |
| -- We had a bug in the following combine functions where if the combine function |
| -- returned a NULL, it didn't set fcinfo->isnull = true. This led to a segfault |
| -- when we would spill in the final stage of a two-stage agg inside the serial |
| -- function. |
| CREATE TABLE test_combinefn_null (a int8, b int, c char(32000)); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| INSERT INTO test_combinefn_null SELECT i, (i | 15), i::text FROM generate_series(1, 1024) i; |
| ANALYZE test_combinefn_null; |
| SET statement_mem='2MB'; |
| set enable_sort=off; |
| -- Test int8_avg_combine() |
| SELECT $$ |
| SELECT |
| sum(a) FILTER (WHERE false) |
| FROM test_combinefn_null |
| GROUP BY b |
| HAVING max(c) = '31' |
| $$ AS qry \gset |
| EXPLAIN (COSTS OFF, VERBOSE) :qry; |
| QUERY PLAN |
| ------------------------------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| Output: (sum(a) FILTER (WHERE false)), b |
| -> Finalize HashAggregate |
| Output: sum(a) FILTER (WHERE false), b |
| Group Key: test_combinefn_null.b |
| Filter: (max(test_combinefn_null.c) = '31'::bpchar) |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Output: b, (PARTIAL sum(a) FILTER (WHERE false)), (PARTIAL max(c)) |
| Hash Key: b |
| -> Partial HashAggregate |
| Output: b, PARTIAL sum(a) FILTER (WHERE false), PARTIAL max(c) |
| Group Key: test_combinefn_null.b |
| -> Seq Scan on public.test_combinefn_null |
| Output: a, b, c |
| Optimizer: Postgres query optimizer |
| Settings: enable_indexscan=off, enable_seqscan=on, enable_sort=off, optimizer=off |
| (13 rows) |
| |
| :qry; |
| sum |
| ----- |
| |
| (1 row) |
| |
| -- Test numeric_poly_combine() |
| SELECT $$ |
| SELECT |
| var_pop(a::int) FILTER (WHERE false) |
| FROM test_combinefn_null |
| GROUP BY b |
| HAVING max(c) = '31' |
| $$ AS qry \gset |
| EXPLAIN (COSTS OFF, VERBOSE) :qry; |
| QUERY PLAN |
| --------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Output: (var_pop((a)::integer) FILTER (WHERE false)), b |
| -> Finalize HashAggregate |
| Output: var_pop((a)::integer) FILTER (WHERE false), b |
| Group Key: test_combinefn_null.b |
| Filter: (max(test_combinefn_null.c) = '31'::bpchar) |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Output: b, (PARTIAL var_pop((a)::integer) FILTER (WHERE false)), (PARTIAL max(c)) |
| Hash Key: b |
| -> Partial HashAggregate |
| Output: b, PARTIAL var_pop((a)::integer) FILTER (WHERE false), PARTIAL max(c) |
| Group Key: test_combinefn_null.b |
| -> Seq Scan on public.test_combinefn_null |
| Output: a, b, c |
| Optimizer: Postgres query optimizer |
| Settings: enable_indexscan=off, enable_seqscan=on, enable_sort=off, optimizer=off |
| (13 rows) |
| |
| :qry; |
| var_pop |
| --------- |
| |
| (1 row) |
| |
| -- Test numeric_avg_combine() |
| SELECT $$ |
| SELECT |
| sum(a::numeric) FILTER (WHERE false) |
| FROM test_combinefn_null |
| GROUP BY b |
| HAVING max(c) = '31' |
| $$ AS qry \gset |
| EXPLAIN (COSTS OFF, VERBOSE) :qry; |
| QUERY PLAN |
| ----------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Output: (sum((a)::numeric) FILTER (WHERE false)), b |
| -> Finalize HashAggregate |
| Output: sum((a)::numeric) FILTER (WHERE false), b |
| Group Key: test_combinefn_null.b |
| Filter: (max(test_combinefn_null.c) = '31'::bpchar) |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Output: b, (PARTIAL sum((a)::numeric) FILTER (WHERE false)), (PARTIAL max(c)) |
| Hash Key: b |
| -> Partial HashAggregate |
| Output: b, PARTIAL sum((a)::numeric) FILTER (WHERE false), PARTIAL max(c) |
| Group Key: test_combinefn_null.b |
| -> Seq Scan on public.test_combinefn_null |
| Output: a, b, c |
| Optimizer: Postgres query optimizer |
| Settings: enable_indexscan=off, enable_seqscan=on, enable_sort=off, optimizer=off |
| (13 rows) |
| |
| :qry; |
| sum |
| ----- |
| |
| (1 row) |
| |
| -- Test numeric_combine() |
| SELECT $$ |
| SELECT |
| var_pop(a::numeric) FILTER (WHERE false) |
| FROM test_combinefn_null |
| GROUP BY b |
| HAVING max(c) = '31' |
| $$ AS qry \gset |
| EXPLAIN (COSTS OFF, VERBOSE) :qry; |
| QUERY PLAN |
| --------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Output: (var_pop((a)::numeric) FILTER (WHERE false)), b |
| -> Finalize HashAggregate |
| Output: var_pop((a)::numeric) FILTER (WHERE false), b |
| Group Key: test_combinefn_null.b |
| Filter: (max(test_combinefn_null.c) = '31'::bpchar) |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Output: b, (PARTIAL var_pop((a)::numeric) FILTER (WHERE false)), (PARTIAL max(c)) |
| Hash Key: b |
| -> Partial HashAggregate |
| Output: b, PARTIAL var_pop((a)::numeric) FILTER (WHERE false), PARTIAL max(c) |
| Group Key: test_combinefn_null.b |
| -> Seq Scan on public.test_combinefn_null |
| Output: a, b, c |
| Optimizer: Postgres query optimizer |
| Settings: enable_indexscan=off, enable_seqscan=on, enable_sort=off, optimizer=off |
| (13 rows) |
| |
| :qry; |
| var_pop |
| --------- |
| |
| (1 row) |
| |