| -- |
| -- Test parallel array_agg(anynonarray) and array_agg(anyarray) |
| -- |
| create schema test_gp_array_agg; |
| set search_path=test_gp_array_agg; |
| set optimizer_trace_fallback = on; |
| -- Test array_agg(anynonarray) |
| create table perct as select a, a / 10 as b from generate_series(1, 100)a distributed by (a); |
| drop table if exists t1; |
| NOTICE: table "t1" does not exist, skipping |
| create table t1 (a varchar, b character varying) distributed randomly; |
| insert into t1 values ('aaaaaaa', 'cccccccccc'); |
| insert into t1 values ('aaaaaaa', 'ddddd'); |
| insert into t1 values ('bbbbbbb', 'eeee'); |
| insert into t1 values ('bbbbbbb', 'eeef'); |
| insert into t1 values ('bbbbb', 'dfafa'); |
| create temporary table aggordertest (a int4, b int4) distributed by (a); |
| insert into aggordertest values (1,1), (2,2), (1,3), (3,4), (null,5), (2,null); |
| create table mergeappend_test ( a int, b int, x int ) distributed by (a,b); |
| insert into mergeappend_test select g/100, g/100, g from generate_series(1, 500) g; |
| analyze mergeappend_test; |
| create table pagg_test (x int, y int); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| insert into pagg_test |
| select (case x % 4 when 1 then null else x end), x % 10 |
| from generate_series(1,5000) x; |
| select (select gp_array_agg(a order by a) from perct where median(t.a) = 50.5) from (select * from perct t order by a offset 0) as t; |
| gp_array_agg |
| ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100} |
| (1 row) |
| |
| select gp_array_agg(f order by f) from (select b::text as f from t1 group by b order by b) q; |
| gp_array_agg |
| ------------------------------------ |
| {cccccccccc,ddddd,dfafa,eeee,eeef} |
| (1 row) |
| |
| select gp_array_agg(a order by a nulls first) from aggordertest; |
| gp_array_agg |
| ------------------ |
| {NULL,1,1,2,2,3} |
| (1 row) |
| |
| select gp_array_agg(a order by a nulls last) from aggordertest; |
| gp_array_agg |
| ------------------ |
| {1,1,2,2,3,NULL} |
| (1 row) |
| |
| select gp_array_agg(a order by a desc nulls first) from aggordertest; |
| gp_array_agg |
| ------------------ |
| {NULL,3,2,2,1,1} |
| (1 row) |
| |
| select gp_array_agg(a order by a desc nulls last) from aggordertest; |
| gp_array_agg |
| ------------------ |
| {3,2,2,1,1,NULL} |
| (1 row) |
| |
| select gp_array_agg(a order by b nulls first) from aggordertest; |
| gp_array_agg |
| ------------------ |
| {2,1,2,1,3,NULL} |
| (1 row) |
| |
| select gp_array_agg(a order by b nulls last) from aggordertest; |
| gp_array_agg |
| ------------------ |
| {1,2,1,3,NULL,2} |
| (1 row) |
| |
| select gp_array_agg(a order by b desc nulls first) from aggordertest; |
| gp_array_agg |
| ------------------ |
| {2,NULL,3,1,2,1} |
| (1 row) |
| |
| select gp_array_agg(a order by b desc nulls last) from aggordertest; |
| gp_array_agg |
| ------------------ |
| {NULL,3,1,2,1,2} |
| (1 row) |
| |
| select a, b, array_dims(gp_array_agg(x)) from mergeappend_test r group by a, b |
| union all |
| select null, null, array_dims(gp_array_agg(x)) from mergeappend_test r |
| order by 1,2; |
| a | b | array_dims |
| ---+---+------------ |
| 0 | 0 | [1:99] |
| 1 | 1 | [1:100] |
| 2 | 2 | [1:100] |
| 3 | 3 | [1:100] |
| 4 | 4 | [1:100] |
| 5 | 5 | [1:1] |
| | | [1:500] |
| (7 rows) |
| |
| explain (costs off) |
| select a, b, array_dims(gp_array_agg(x)) from mergeappend_test r group by a, b |
| union all |
| select null, null, array_dims(gp_array_agg(x)) from mergeappend_test r |
| order by 1,2; |
| QUERY PLAN |
| ------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) |
| Merge Key: r.a, r.b |
| -> Sort |
| Sort Key: r.a, r.b |
| -> Append |
| -> HashAggregate |
| Group Key: r.a, r.b |
| -> Seq Scan on mergeappend_test r |
| -> Result |
| -> Redistribute Motion 1:3 (slice2) |
| -> Aggregate |
| -> Gather Motion 3:1 (slice3; segments: 3) |
| -> Seq Scan on mergeappend_test r_1 |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (14 rows) |
| |
| select a, b, array_dims(gp_array_agg(x)) from mergeappend_test r group by a, b |
| union all |
| select null, null, array_dims(gp_array_agg(x)) from mergeappend_test r, pg_sleep(0) |
| order by 1,2; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions |
| a | b | array_dims |
| ---+---+------------ |
| 0 | 0 | [1:99] |
| 1 | 1 | [1:100] |
| 2 | 2 | [1:100] |
| 3 | 3 | [1:100] |
| 4 | 4 | [1:100] |
| 5 | 5 | [1:1] |
| | | [1:500] |
| (7 rows) |
| |
| explain analyze select a, b, array_dims(gp_array_agg(x)) from mergeappend_test r group by a, b |
| union all |
| select null, null, array_dims(gp_array_agg(x)) from mergeappend_test r |
| order by 1,2; |
| QUERY PLAN |
| ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.06 rows=22 width=16) (actual time=2.204..2.206 rows=7 loops=1) |
| Merge Key: r.a, r.b |
| -> Sort (cost=0.00..862.06 rows=8 width=16) (actual time=1.839..1.855 rows=5 loops=1) |
| Sort Key: r.a, r.b |
| Sort Method: quicksort Memory: 150kB |
| -> Append (cost=0.00..862.06 rows=8 width=16) (actual time=0.333..1.826 rows=5 loops=1) |
| -> HashAggregate (cost=0.00..431.05 rows=7 width=16) (actual time=0.332..0.341 rows=4 loops=1) |
| Group Key: r.a, r.b |
| Peak Memory Usage: 0 kB |
| -> Seq Scan on mergeappend_test r (cost=0.00..431.00 rows=167 width=12) (actual time=0.103..0.144 rows=301 loops=1) |
| -> Result (cost=0.00..431.01 rows=1 width=16) (actual time=1.481..1.483 rows=1 loops=1) |
| -> Redistribute Motion 1:3 (slice2) (cost=0.00..431.01 rows=1 width=8) (actual time=1.477..1.477 rows=1 loops=1) |
| -> Aggregate (cost=0.00..431.01 rows=1 width=8) (actual time=1.579..1.579 rows=1 loops=1) |
| -> Gather Motion 3:1 (slice3; segments: 3) (cost=0.00..431.01 rows=500 width=4) (actual time=1.284..1.451 rows=500 loops=1) |
| -> Seq Scan on mergeappend_test r_1 (cost=0.00..431.00 rows=167 width=4) (actual time=0.087..0.152 rows=301 loops=1) |
| Planning Time: 57.856 ms |
| (slice0) Executor memory: 56K bytes. |
| (slice1) Executor memory: 41K bytes avg x 3 workers, 42K bytes max (seg0). Work_mem: 26K bytes max. |
| (slice2) Executor memory: 58K bytes (entry db). |
| (slice3) Executor memory: 37K bytes avg x 3 workers, 37K bytes max (seg0). |
| Memory used: 128000kB |
| Optimizer: Pivotal Optimizer (GPORCA) |
| Execution Time: 2.943 ms |
| (23 rows) |
| |
| -- create a view as we otherwise have to repeat this query a few times. |
| create view v_pagg_test as |
| select |
| y, |
| min(t) as tmin,max(t) as tmax,count(distinct t) as tndistinct, |
| min(a) as amin,max(a) as amax,count(distinct a) as andistinct |
| from ( |
| select |
| y, |
| unnest(regexp_split_to_array(a1.t, ','))::int as t, |
| unnest(a1.a) as a |
| from ( |
| select |
| y, |
| string_agg(x::text, ',') as t, |
| string_agg(x::text::bytea, ',') as b, |
| gp_array_agg(x) as a |
| from pagg_test |
| group by y |
| ) a1 |
| ) a2 |
| group by y; |
| -- ensure results are correct. |
| select * from v_pagg_test order by y; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| y | tmin | tmax | tndistinct | amin | amax | andistinct |
| ---+------+------+------------+------+------+------------ |
| 0 | 10 | 5000 | 500 | 10 | 5000 | 500 |
| 1 | 11 | 4991 | 250 | 11 | 4991 | 250 |
| 2 | 2 | 4992 | 500 | 2 | 4992 | 500 |
| 3 | 3 | 4983 | 250 | 3 | 4983 | 250 |
| 4 | 4 | 4994 | 500 | 4 | 4994 | 500 |
| 5 | 15 | 4995 | 250 | 15 | 4995 | 250 |
| 6 | 6 | 4996 | 500 | 6 | 4996 | 500 |
| 7 | 7 | 4987 | 250 | 7 | 4987 | 250 |
| 8 | 8 | 4998 | 500 | 8 | 4998 | 500 |
| 9 | 19 | 4999 | 250 | 19 | 4999 | 250 |
| (10 rows) |
| |
| explain (costs off) select * from v_pagg_test order by y; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Multiple Distinct Qualified Aggregates are disabled in the optimizer |
| QUERY PLAN |
| ------------------------------------------------------------------------------------ |
| Gather Motion 3:1 (slice1; segments: 3) |
| Merge Key: pagg_test.y |
| -> GroupAggregate |
| Group Key: pagg_test.y |
| -> Sort |
| Sort Key: pagg_test.y |
| -> Result |
| -> ProjectSet |
| -> HashAggregate |
| Group Key: pagg_test.y |
| -> Redistribute Motion 3:3 (slice2; segments: 3) |
| Hash Key: pagg_test.y |
| -> Seq Scan on pagg_test |
| Optimizer: Postgres query optimizer |
| (14 rows) |
| |
| -- Test array_agg(anyarray) |
| create table int_array_table (a int, arr int[]); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| insert into int_array_table select i, array[i, i] from generate_series(1, 5)i; |
| insert into int_array_table select 6, '{6, NULL}'::int[]; |
| insert into int_array_table select 7, '{6, NULL}'::int[]; |
| insert into int_array_table select 8, '{NULL, 7}'::int[]; |
| insert into int_array_table select 9, '{NULL, 7}'::int[]; |
| insert into int_array_table select NULL, '{NULL, NULL}'::int[]; |
| select gp_array_agg(arr order by arr) from int_array_table; |
| gp_array_agg |
| --------------------------------------------------------------------------------- |
| {{1,1},{2,2},{3,3},{4,4},{5,5},{6,NULL},{6,NULL},{NULL,7},{NULL,7},{NULL,NULL}} |
| (1 row) |
| |
| select gp_array_agg(arr order by arr desc) from int_array_table; |
| gp_array_agg |
| --------------------------------------------------------------------------------- |
| {{NULL,NULL},{NULL,7},{NULL,7},{6,NULL},{6,NULL},{5,5},{4,4},{3,3},{2,2},{1,1}} |
| (1 row) |
| |
| select gp_array_agg(arr order by a nulls first) from int_array_table; |
| gp_array_agg |
| --------------------------------------------------------------------------------- |
| {{NULL,NULL},{1,1},{2,2},{3,3},{4,4},{5,5},{6,NULL},{6,NULL},{NULL,7},{NULL,7}} |
| (1 row) |
| |
| select gp_array_agg(arr order by a nulls last) from int_array_table; |
| gp_array_agg |
| --------------------------------------------------------------------------------- |
| {{1,1},{2,2},{3,3},{4,4},{5,5},{6,NULL},{6,NULL},{NULL,7},{NULL,7},{NULL,NULL}} |
| (1 row) |
| |
| select gp_array_agg(arr order by a desc nulls first) from int_array_table; |
| gp_array_agg |
| --------------------------------------------------------------------------------- |
| {{NULL,NULL},{NULL,7},{NULL,7},{6,NULL},{6,NULL},{5,5},{4,4},{3,3},{2,2},{1,1}} |
| (1 row) |
| |
| select gp_array_agg(arr order by a desc nulls last) from int_array_table; |
| gp_array_agg |
| --------------------------------------------------------------------------------- |
| {{NULL,7},{NULL,7},{6,NULL},{6,NULL},{5,5},{4,4},{3,3},{2,2},{1,1},{NULL,NULL}} |
| (1 row) |
| |
| select a, gp_array_agg(arr order by arr) from int_array_table group by a order by a; |
| a | gp_array_agg |
| ---+--------------- |
| 1 | {{1,1}} |
| 2 | {{2,2}} |
| 3 | {{3,3}} |
| 4 | {{4,4}} |
| 5 | {{5,5}} |
| 6 | {{6,NULL}} |
| 7 | {{6,NULL}} |
| 8 | {{NULL,7}} |
| 9 | {{NULL,7}} |
| | {{NULL,NULL}} |
| (10 rows) |
| |
| select array_dims(gp_array_agg(distinct arr)) from int_array_table; |
| array_dims |
| ------------ |
| [1:8][1:2] |
| (1 row) |
| |
| explain (verbose, costs off) select array_dims(gp_array_agg(distinct arr)) from int_array_table; |
| QUERY PLAN |
| ----------------------------------------------------------- |
| Aggregate |
| Output: array_dims(gp_array_agg(DISTINCT arr)) |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| Output: arr |
| -> Seq Scan on test_gp_array_agg.int_array_table |
| Output: arr |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (7 rows) |
| |
| CREATE TABLE arrtest ( |
| a int2[], |
| b int4[][][], |
| c name[], |
| d text[][], |
| e float8[], |
| f char(5)[], |
| g varchar(5)[] |
| ) DISTRIBUTED RANDOMLY; |
| INSERT INTO arrtest (a, b[1:2][1:2][1:2], c, d, e, f, g) |
| VALUES ('{1,2}', '{{{0,0},{1,2}},{{3,4},{5,6}}}', '{"foo"}', |
| '{{"elt1", "elt2"}}', '{1.1, 2.2}', |
| '{"abc","abcde"}', '{"abc","abcde"}'); |
| INSERT INTO arrtest (a, b[1:2][1:2][1:2], c, d, e, f, g) |
| VALUES ('{1,2}', '{{{7,8},{9,10}},{{11,12},{13,14}}}', '{"bar"}', |
| '{{"elt1", "elt2"}}', '{"3.3", "4.4"}', |
| '{"abc","abcde"}', '{"abc","abcde"}'); |
| SELECT $query$ |
| select |
| array_agg(a) agg_a, |
| array_dims(array_agg(b)) dims_b, |
| array_dims(array_agg(c)) dims_c, |
| array_agg(d) agg_d, |
| array_dims(array_agg(e)) dims_e, |
| array_agg(f) agg_f, |
| array_agg(g) agg_g |
| from arrtest; |
| $query$ AS qry \gset |
| EXPLAIN (COSTS OFF, VERBOSE) |
| :qry ; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-default collation |
| QUERY PLAN |
| ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| Finalize Aggregate |
| Output: array_agg(a), array_dims(array_agg(b)), array_dims(array_agg(c)), array_agg(d), array_dims(array_agg(e)), array_agg(f), array_agg(g) |
| -> Gather Motion 3:1 (slice1; segments: 3) |
| Output: (PARTIAL array_agg(a)), (PARTIAL array_agg(b)), (PARTIAL array_agg(c)), (PARTIAL array_agg(d)), (PARTIAL array_agg(e)), (PARTIAL array_agg(f)), (PARTIAL array_agg(g)) |
| -> Partial Aggregate |
| Output: PARTIAL array_agg(a), PARTIAL array_agg(b), PARTIAL array_agg(c), PARTIAL array_agg(d), PARTIAL array_agg(e), PARTIAL array_agg(f), PARTIAL array_agg(g) |
| -> Seq Scan on test_gp_array_agg.arrtest |
| Output: a, b, c, d, e, f, g |
| Optimizer: Postgres query optimizer |
| (9 rows) |
| |
| :qry ; |
| INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner |
| DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: Non-default collation |
| agg_a | dims_b | dims_c | agg_d | dims_e | agg_f | agg_g |
| ---------------+----------------------+------------+-------------------------------+------------+-----------------------------------+--------------------------- |
| {{1,2},{1,2}} | [1:2][1:2][1:2][1:2] | [1:2][1:1] | {{{elt1,elt2}},{{elt1,elt2}}} | [1:2][1:2] | {{"abc ",abcde},{"abc ",abcde}} | {{abc,abcde},{abc,abcde}} |
| (1 row) |
| |
| -- CLEANUP |
| drop schema test_gp_array_agg cascade; |
| NOTICE: drop cascades to 7 other objects |
| DETAIL: drop cascades to table perct |
| drop cascades to table t1 |
| drop cascades to table mergeappend_test |
| drop cascades to table pagg_test |
| drop cascades to view v_pagg_test |
| drop cascades to table int_array_table |
| drop cascades to table arrtest |
| reset optimizer_trace_fallback; |