blob: 839fb0bb1e0c47cc4193dcae39320f6d6010f58a [file] [log] [blame]
create table cost_agg_t1(a int, b int, c int);
insert into cost_agg_t1 select i, random() * 99999, i % 2000 from generate_series(1, 1000000) i;
create table cost_agg_t2 as select * from cost_agg_t1 with no data;
insert into cost_agg_t2 select i, random() * 99999, i % 300000 from generate_series(1, 1000000) i;
analyze cost_agg_t1;
analyze cost_agg_t2;
--
-- Test planner's decisions on aggregates when only little memory is available.
--
set statement_mem= '1800 kB';
-- There are only 2000 distinct values of 'c' in the table, which fits
-- comfortably in an in-memory hash table.
explain select avg(b) from cost_agg_t1 group by c;
-- In the other table, there are 300000 distinct values of 'c', which doesn't
-- fit in statement_mem. The planner chooses to do a single-phase agg for this.
--
-- In the single-phase plan, the aggregation is performed after redistrbuting
-- the data, which means that each node only has to process 1/(# of segments)
-- fraction of the data. That fits in memory, whereas an initial stage before
-- redistributing would not. And it would eliminate only a few rows, anyway.
explain select avg(b) from cost_agg_t2 group by c;
-- But if there are a lot more duplicate values, the two-stage plan becomes
-- cheaper again, even though it doesn't git in memory and has to spill.
insert into cost_agg_t2 select i, random() * 99999,1 from generate_series(1, 200000) i;
analyze cost_agg_t2;
explain select avg(b) from cost_agg_t2 group by c;
drop table cost_agg_t1;
drop table cost_agg_t2;
reset statement_mem;
-- The following case is to test GUC gp_eager_two_phase_agg for planner
-- When it is set true, planner will choose two stage agg.
create table t_planner_force_multi_stage(a int, b int) distributed randomly;
analyze t_planner_force_multi_stage;
show gp_eager_two_phase_agg;
-- the GUC gp_eager_two_phase_agg is default false, the table contains no data
-- so one stage agg will win.
explain (costs off) select b, sum(a) from t_planner_force_multi_stage group by b;
set gp_eager_two_phase_agg = on;
-- when forcing two stage, it should generate two stage agg plan.
explain (costs off) select b, sum(a) from t_planner_force_multi_stage group by b;
reset gp_eager_two_phase_agg;
drop table t_planner_force_multi_stage;
-- test operatorMem
begin;
create table test_operator_mem (i int, j int) distributed by (i);
insert into test_operator_mem select i, i+1 from generate_series(1, 100)i;
analyze test_operator_mem;
set local statement_mem=1024;
set local gp_resqueue_print_operator_memory_limits=on;
explain(costs off)
select count(*) from test_operator_mem;
abort;
-- Test user-defined aggregate marked safe to execute on replicated slices without motion
CREATE AGGREGATE my_unsafe_avg (float8)
(
sfunc = float8_accum,
stype = float8[],
finalfunc = float8_avg,
initcond = '{0,0,0}'
);
CREATE AGGREGATE my_safe_avg (float8)
(
sfunc = float8_accum,
stype = float8[],
finalfunc = float8_avg,
initcond = '{0,0,0}',
repsafe = true
);
CREATE TABLE a_reptable (a int) DISTRIBUTED REPLICATED;
CREATE TABLE b_reptable (b int) DISTRIBUTED REPLICATED;
EXPLAIN INSERT INTO a_reptable(a) SELECT my_unsafe_avg(b) FROM b_reptable;
EXPLAIN INSERT INTO a_reptable(a) SELECT my_safe_avg(b) FROM b_reptable;