blob: 94af2d0d82f3aa1e06b718aabdc20a00e81d9565 [file] [log] [blame]
--
-- Test parallel array_agg(anynonarray) and array_agg(anyarray)
--
create schema test_gp_array_agg;
set search_path=test_gp_array_agg;
set optimizer_trace_fallback = on;
-- Test array_agg(anynonarray)
create table perct as select a, a / 10 as b from generate_series(1, 100)a distributed by (a);
drop table if exists t1;
NOTICE: table "t1" does not exist, skipping
create table t1 (a varchar, b character varying) distributed randomly;
insert into t1 values ('aaaaaaa', 'cccccccccc');
insert into t1 values ('aaaaaaa', 'ddddd');
insert into t1 values ('bbbbbbb', 'eeee');
insert into t1 values ('bbbbbbb', 'eeef');
insert into t1 values ('bbbbb', 'dfafa');
create temporary table aggordertest (a int4, b int4) distributed by (a);
insert into aggordertest values (1,1), (2,2), (1,3), (3,4), (null,5), (2,null);
create table mergeappend_test ( a int, b int, x int ) distributed by (a,b);
insert into mergeappend_test select g/100, g/100, g from generate_series(1, 500) g;
analyze mergeappend_test;
create table pagg_test (x int, y int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into pagg_test
select (case x % 4 when 1 then null else x end), x % 10
from generate_series(1,5000) x;
select (select gp_array_agg(a order by a) from perct where median(t.a) = 50.5) from (select * from perct t order by a offset 0) as t;
gp_array_agg
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100}
(1 row)
select gp_array_agg(f order by f) from (select b::text as f from t1 group by b order by b) q;
gp_array_agg
------------------------------------
{cccccccccc,ddddd,dfafa,eeee,eeef}
(1 row)
select gp_array_agg(a order by a nulls first) from aggordertest;
gp_array_agg
------------------
{NULL,1,1,2,2,3}
(1 row)
select gp_array_agg(a order by a nulls last) from aggordertest;
gp_array_agg
------------------
{1,1,2,2,3,NULL}
(1 row)
select gp_array_agg(a order by a desc nulls first) from aggordertest;
gp_array_agg
------------------
{NULL,3,2,2,1,1}
(1 row)
select gp_array_agg(a order by a desc nulls last) from aggordertest;
gp_array_agg
------------------
{3,2,2,1,1,NULL}
(1 row)
select gp_array_agg(a order by b nulls first) from aggordertest;
gp_array_agg
------------------
{2,1,2,1,3,NULL}
(1 row)
select gp_array_agg(a order by b nulls last) from aggordertest;
gp_array_agg
------------------
{1,2,1,3,NULL,2}
(1 row)
select gp_array_agg(a order by b desc nulls first) from aggordertest;
gp_array_agg
------------------
{2,NULL,3,1,2,1}
(1 row)
select gp_array_agg(a order by b desc nulls last) from aggordertest;
gp_array_agg
------------------
{NULL,3,1,2,1,2}
(1 row)
select a, b, array_dims(gp_array_agg(x)) from mergeappend_test r group by a, b
union all
select null, null, array_dims(gp_array_agg(x)) from mergeappend_test r
order by 1,2;
a | b | array_dims
---+---+------------
0 | 0 | [1:99]
1 | 1 | [1:100]
2 | 2 | [1:100]
3 | 3 | [1:100]
4 | 4 | [1:100]
5 | 5 | [1:1]
| | [1:500]
(7 rows)
explain (costs off)
select a, b, array_dims(gp_array_agg(x)) from mergeappend_test r group by a, b
union all
select null, null, array_dims(gp_array_agg(x)) from mergeappend_test r
order by 1,2;
QUERY PLAN
-------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
Merge Key: r.a, r.b
-> Sort
Sort Key: r.a, r.b
-> Append
-> HashAggregate
Group Key: r.a, r.b
-> Seq Scan on mergeappend_test r
-> Result
-> Redistribute Motion 1:3 (slice2)
-> Aggregate
-> Gather Motion 3:1 (slice3; segments: 3)
-> Seq Scan on mergeappend_test r_1
Optimizer: Pivotal Optimizer (GPORCA)
(14 rows)
select a, b, array_dims(gp_array_agg(x)) from mergeappend_test r group by a, b
union all
select null, null, array_dims(gp_array_agg(x)) from mergeappend_test r, pg_sleep(0)
order by 1,2;
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions
a | b | array_dims
---+---+------------
0 | 0 | [1:99]
1 | 1 | [1:100]
2 | 2 | [1:100]
3 | 3 | [1:100]
4 | 4 | [1:100]
5 | 5 | [1:1]
| | [1:500]
(7 rows)
explain analyze select a, b, array_dims(gp_array_agg(x)) from mergeappend_test r group by a, b
union all
select null, null, array_dims(gp_array_agg(x)) from mergeappend_test r
order by 1,2;
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.06 rows=22 width=16) (actual time=2.204..2.206 rows=7 loops=1)
Merge Key: r.a, r.b
-> Sort (cost=0.00..862.06 rows=8 width=16) (actual time=1.839..1.855 rows=5 loops=1)
Sort Key: r.a, r.b
Sort Method: quicksort Memory: 150kB
-> Append (cost=0.00..862.06 rows=8 width=16) (actual time=0.333..1.826 rows=5 loops=1)
-> HashAggregate (cost=0.00..431.05 rows=7 width=16) (actual time=0.332..0.341 rows=4 loops=1)
Group Key: r.a, r.b
Peak Memory Usage: 0 kB
-> Seq Scan on mergeappend_test r (cost=0.00..431.00 rows=167 width=12) (actual time=0.103..0.144 rows=301 loops=1)
-> Result (cost=0.00..431.01 rows=1 width=16) (actual time=1.481..1.483 rows=1 loops=1)
-> Redistribute Motion 1:3 (slice2) (cost=0.00..431.01 rows=1 width=8) (actual time=1.477..1.477 rows=1 loops=1)
-> Aggregate (cost=0.00..431.01 rows=1 width=8) (actual time=1.579..1.579 rows=1 loops=1)
-> Gather Motion 3:1 (slice3; segments: 3) (cost=0.00..431.01 rows=500 width=4) (actual time=1.284..1.451 rows=500 loops=1)
-> Seq Scan on mergeappend_test r_1 (cost=0.00..431.00 rows=167 width=4) (actual time=0.087..0.152 rows=301 loops=1)
Planning Time: 57.856 ms
(slice0) Executor memory: 56K bytes.
(slice1) Executor memory: 41K bytes avg x 3 workers, 42K bytes max (seg0). Work_mem: 26K bytes max.
(slice2) Executor memory: 58K bytes (entry db).
(slice3) Executor memory: 37K bytes avg x 3 workers, 37K bytes max (seg0).
Memory used: 128000kB
Optimizer: Pivotal Optimizer (GPORCA)
Execution Time: 2.943 ms
(23 rows)
-- create a view as we otherwise have to repeat this query a few times.
create view v_pagg_test as
select
y,
min(t) as tmin,max(t) as tmax,count(distinct t) as tndistinct,
min(a) as amin,max(a) as amax,count(distinct a) as andistinct
from (
select
y,
unnest(regexp_split_to_array(a1.t, ','))::int as t,
unnest(a1.a) as a
from (
select
y,
string_agg(x::text, ',') as t,
string_agg(x::text::bytea, ',') as b,
gp_array_agg(x) as a
from pagg_test
group by y
) a1
) a2
group by y;
-- ensure results are correct.
select * from v_pagg_test order by y;
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer
y | tmin | tmax | tndistinct | amin | amax | andistinct
---+------+------+------------+------+------+------------
0 | 10 | 5000 | 500 | 10 | 5000 | 500
1 | 11 | 4991 | 250 | 11 | 4991 | 250
2 | 2 | 4992 | 500 | 2 | 4992 | 500
3 | 3 | 4983 | 250 | 3 | 4983 | 250
4 | 4 | 4994 | 500 | 4 | 4994 | 500
5 | 15 | 4995 | 250 | 15 | 4995 | 250
6 | 6 | 4996 | 500 | 6 | 4996 | 500
7 | 7 | 4987 | 250 | 7 | 4987 | 250
8 | 8 | 4998 | 500 | 8 | 4998 | 500
9 | 19 | 4999 | 250 | 19 | 4999 | 250
(10 rows)
explain (costs off) select * from v_pagg_test order by y;
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer
QUERY PLAN
------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
Merge Key: pagg_test.y
-> GroupAggregate
Group Key: pagg_test.y
-> Sort
Sort Key: pagg_test.y
-> Result
-> ProjectSet
-> HashAggregate
Group Key: pagg_test.y
-> Redistribute Motion 3:3 (slice2; segments: 3)
Hash Key: pagg_test.y
-> Seq Scan on pagg_test
Optimizer: Postgres query optimizer
(14 rows)
-- Test array_agg(anyarray)
create table int_array_table (a int, arr int[]);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into int_array_table select i, array[i, i] from generate_series(1, 5)i;
insert into int_array_table select 6, '{6, NULL}'::int[];
insert into int_array_table select 7, '{6, NULL}'::int[];
insert into int_array_table select 8, '{NULL, 7}'::int[];
insert into int_array_table select 9, '{NULL, 7}'::int[];
insert into int_array_table select NULL, '{NULL, NULL}'::int[];
select gp_array_agg(arr order by arr) from int_array_table;
gp_array_agg
---------------------------------------------------------------------------------
{{1,1},{2,2},{3,3},{4,4},{5,5},{6,NULL},{6,NULL},{NULL,7},{NULL,7},{NULL,NULL}}
(1 row)
select gp_array_agg(arr order by arr desc) from int_array_table;
gp_array_agg
---------------------------------------------------------------------------------
{{NULL,NULL},{NULL,7},{NULL,7},{6,NULL},{6,NULL},{5,5},{4,4},{3,3},{2,2},{1,1}}
(1 row)
select gp_array_agg(arr order by a nulls first) from int_array_table;
gp_array_agg
---------------------------------------------------------------------------------
{{NULL,NULL},{1,1},{2,2},{3,3},{4,4},{5,5},{6,NULL},{6,NULL},{NULL,7},{NULL,7}}
(1 row)
select gp_array_agg(arr order by a nulls last) from int_array_table;
gp_array_agg
---------------------------------------------------------------------------------
{{1,1},{2,2},{3,3},{4,4},{5,5},{6,NULL},{6,NULL},{NULL,7},{NULL,7},{NULL,NULL}}
(1 row)
select gp_array_agg(arr order by a desc nulls first) from int_array_table;
gp_array_agg
---------------------------------------------------------------------------------
{{NULL,NULL},{NULL,7},{NULL,7},{6,NULL},{6,NULL},{5,5},{4,4},{3,3},{2,2},{1,1}}
(1 row)
select gp_array_agg(arr order by a desc nulls last) from int_array_table;
gp_array_agg
---------------------------------------------------------------------------------
{{NULL,7},{NULL,7},{6,NULL},{6,NULL},{5,5},{4,4},{3,3},{2,2},{1,1},{NULL,NULL}}
(1 row)
select a, gp_array_agg(arr order by arr) from int_array_table group by a order by a;
a | gp_array_agg
---+---------------
1 | {{1,1}}
2 | {{2,2}}
3 | {{3,3}}
4 | {{4,4}}
5 | {{5,5}}
6 | {{6,NULL}}
7 | {{6,NULL}}
8 | {{NULL,7}}
9 | {{NULL,7}}
| {{NULL,NULL}}
(10 rows)
select array_dims(gp_array_agg(distinct arr)) from int_array_table;
array_dims
------------
[1:8][1:2]
(1 row)
explain (verbose, costs off) select array_dims(gp_array_agg(distinct arr)) from int_array_table;
QUERY PLAN
-----------------------------------------------------------
Aggregate
Output: array_dims(gp_array_agg(DISTINCT arr))
-> Gather Motion 3:1 (slice1; segments: 3)
Output: arr
-> Seq Scan on test_gp_array_agg.int_array_table
Output: arr
Optimizer: Pivotal Optimizer (GPORCA)
(7 rows)
CREATE TABLE arrtest (
a int2[],
b int4[][][],
c name[],
d text[][],
e float8[],
f char(5)[],
g varchar(5)[]
) DISTRIBUTED RANDOMLY;
INSERT INTO arrtest (a, b[1:2][1:2][1:2], c, d, e, f, g)
VALUES ('{1,2}', '{{{0,0},{1,2}},{{3,4},{5,6}}}', '{"foo"}',
'{{"elt1", "elt2"}}', '{1.1, 2.2}',
'{"abc","abcde"}', '{"abc","abcde"}');
INSERT INTO arrtest (a, b[1:2][1:2][1:2], c, d, e, f, g)
VALUES ('{1,2}', '{{{7,8},{9,10}},{{11,12},{13,14}}}', '{"bar"}',
'{{"elt1", "elt2"}}', '{"3.3", "4.4"}',
'{"abc","abcde"}', '{"abc","abcde"}');
SELECT $query$
select
array_agg(a) agg_a,
array_dims(array_agg(b)) dims_b,
array_dims(array_agg(c)) dims_c,
array_agg(d) agg_d,
array_dims(array_agg(e)) dims_e,
array_agg(f) agg_f,
array_agg(g) agg_g
from arrtest;
$query$ AS qry \gset
EXPLAIN (COSTS OFF, VERBOSE)
:qry ;
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-default collation
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Finalize Aggregate
Output: array_agg(a), array_dims(array_agg(b)), array_dims(array_agg(c)), array_agg(d), array_dims(array_agg(e)), array_agg(f), array_agg(g)
-> Gather Motion 3:1 (slice1; segments: 3)
Output: (PARTIAL array_agg(a)), (PARTIAL array_agg(b)), (PARTIAL array_agg(c)), (PARTIAL array_agg(d)), (PARTIAL array_agg(e)), (PARTIAL array_agg(f)), (PARTIAL array_agg(g))
-> Partial Aggregate
Output: PARTIAL array_agg(a), PARTIAL array_agg(b), PARTIAL array_agg(c), PARTIAL array_agg(d), PARTIAL array_agg(e), PARTIAL array_agg(f), PARTIAL array_agg(g)
-> Seq Scan on test_gp_array_agg.arrtest
Output: a, b, c, d, e, f, g
Optimizer: Postgres query optimizer
(9 rows)
:qry ;
INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner
DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-default collation
agg_a | dims_b | dims_c | agg_d | dims_e | agg_f | agg_g
---------------+----------------------+------------+-------------------------------+------------+-----------------------------------+---------------------------
{{1,2},{1,2}} | [1:2][1:2][1:2][1:2] | [1:2][1:1] | {{{elt1,elt2}},{{elt1,elt2}}} | [1:2][1:2] | {{"abc ",abcde},{"abc ",abcde}} | {{abc,abcde},{abc,abcde}}
(1 row)
-- CLEANUP
drop schema test_gp_array_agg cascade;
NOTICE: drop cascades to 7 other objects
DETAIL: drop cascades to table perct
drop cascades to table t1
drop cascades to table mergeappend_test
drop cascades to table pagg_test
drop cascades to view v_pagg_test
drop cascades to table int_array_table
drop cascades to table arrtest
reset optimizer_trace_fallback;