blob: 363d90b6908afd60d196ef6859347f04ac7665fa [file] [log] [blame]
-- Check for MPP-19310 and MPP-19857 where mksort produces wrong result
-- on OPT build, and fails assertion on debug build if a "LIMIT" query
-- spills to disk.
CREATE TABLE mksort_limit_test_table(dkey INT, jkey INT, rval REAL, tval TEXT default repeat('abcdefghijklmnopqrstuvwxyz', 300)) DISTRIBUTED BY (dkey);
INSERT INTO mksort_limit_test_table VALUES(generate_series(1, 10000), generate_series(10001, 20000), sqrt(generate_series(10001, 20000)));
--Should fit LESS (because of overhead) than (20 * 1024 * 1024) / (26 * 300 + 12) => 2684 tuples in memory, after that spills to disk
SET statement_mem="20MB";
-- Should work in memory
SELECT dkey, substring(tval from 1 for 2) as str from (SELECT * from mksort_limit_test_table ORDER BY dkey LIMIT 200) as temp ORDER BY jkey LIMIT 3;
dkey | str
------+-----
1 | ab
2 | ab
3 | ab
(3 rows)
SELECT dkey, substring(tval from 1 for 2) as str from (SELECT * from mksort_limit_test_table ORDER BY dkey LIMIT 200) as temp ORDER BY jkey DESC LIMIT 3;
dkey | str
------+-----
200 | ab
199 | ab
198 | ab
(3 rows)
-- Should spill to disk (tested with 2 segments, for more segments it may not spill)
SELECT dkey, substring(tval from 1 for 2) as str from (SELECT * from mksort_limit_test_table ORDER BY dkey LIMIT 5000) as temp ORDER BY jkey LIMIT 3;
dkey | str
------+-----
1 | ab
2 | ab
3 | ab
(3 rows)
SELECT dkey, substring(tval from 1 for 2) as str from (SELECT * from mksort_limit_test_table ORDER BY dkey LIMIT 5000) as temp ORDER BY jkey DESC LIMIT 3;
dkey | str
------+-----
5000 | ab
4999 | ab
4998 | ab
(3 rows)
-- In memory descending sort
SELECT dkey, substring(tval from 1 for 2) as str from (SELECT * from mksort_limit_test_table ORDER BY dkey DESC LIMIT 200) as temp ORDER BY jkey LIMIT 3;
dkey | str
------+-----
9801 | ab
9802 | ab
9803 | ab
(3 rows)
SELECT dkey, substring(tval from 1 for 2) as str from (SELECT * from mksort_limit_test_table ORDER BY dkey DESC LIMIT 200) as temp ORDER BY jkey DESC LIMIT 3;
dkey | str
-------+-----
10000 | ab
9999 | ab
9998 | ab
(3 rows)
-- Spilled descending sort (tested with 2 segments, for more segments it may not spill)
SELECT dkey, substring(tval from 1 for 2) as str from (SELECT * from mksort_limit_test_table ORDER BY dkey DESC LIMIT 5000) as temp ORDER BY jkey LIMIT 3;
dkey | str
------+-----
5001 | ab
5002 | ab
5003 | ab
(3 rows)
SELECT dkey, substring(tval from 1 for 2) as str from (SELECT * from mksort_limit_test_table ORDER BY dkey DESC LIMIT 5000) as temp ORDER BY jkey DESC LIMIT 3;
dkey | str
-------+-----
10000 | ab
9999 | ab
9998 | ab
(3 rows)
DROP TABLE mksort_limit_test_table;
-- Check invalid things in LIMIT
select * from generate_series(1,10) g limit g;
ERROR: argument of LIMIT must not contain variables
LINE 1: select * from generate_series(1,10) g limit g;
^
select * from generate_series(1,10) g limit count(*);
ERROR: aggregate functions are not allowed in LIMIT
LINE 1: select * from generate_series(1,10) g limit count(*);
^
-- Check volatile limit should not pushdown.
create table t_volatile_limit (i int4);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table t_volatile_limit_1 (a int, b int) distributed randomly;
-- Cloudberry may generate two-stage limit plan to improve performance.
-- But for limit clause contains volatile functions, if we push them down
-- below the final gather motion, those volatile functions will be evaluated
-- many times. For such cases, we should not push down the limit.
-- Below test cases' limit clause contain function call `random` with order by.
-- `random()` is a volatile function it may return different results each time
-- invoked. If we push down to generate two-stage limit plan, `random()` will
-- execute on each segment which leads to different limit values of QEs
-- and QD and this cannot guarantee correct results. Suppose seg 0 contains the
-- top 3 minimum values, but random() returns 1, then you lose 2 values.
explain select * from t_volatile_limit order by i limit (random() * 10);
QUERY PLAN
------------------------------------------------------------------------------------
Limit (cost=0.00..431.00 rows=1 width=4)
-> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=4)
Merge Key: i
-> Sort (cost=0.00..431.00 rows=1 width=4)
Sort Key: i
-> Seq Scan on t_volatile_limit (cost=0.00..431.00 rows=1 width=4)
Optimizer: Pivotal Optimizer (GPORCA) version 3.80.0
(7 rows)
explain select * from t_volatile_limit order by i limit 2 offset (random()*5);
QUERY PLAN
------------------------------------------------------------------------------------
Limit (cost=0.00..431.00 rows=1 width=4)
-> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=4)
Merge Key: i
-> Sort (cost=0.00..431.00 rows=1 width=4)
Sort Key: i
-> Seq Scan on t_volatile_limit (cost=0.00..431.00 rows=1 width=4)
Optimizer: Pivotal Optimizer (GPORCA) version 3.80.0
(7 rows)
explain select distinct(a), sum(b) from t_volatile_limit_1 group by a order by a, sum(b) limit (random()+3);
QUERY PLAN
------------------------------------------------------------------------------------------------------------------
Limit (cost=0.00..431.00 rows=1 width=12)
-> Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=12)
Merge Key: a, (sum(b))
-> Sort (cost=0.00..431.00 rows=1 width=12)
Sort Key: a, (sum(b))
-> GroupAggregate (cost=0.00..431.00 rows=1 width=12)
Group Key: a
-> Sort (cost=0.00..431.00 rows=1 width=8)
Sort Key: a
-> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=8)
Hash Key: a
-> Seq Scan on t_volatile_limit_1 (cost=0.00..431.00 rows=1 width=8)
Optimizer: Pivotal Optimizer (GPORCA)
(13 rows)
explain select distinct(a), sum(b) from t_volatile_limit_1 group by a order by a, sum(b) limit 2 offset (random()*2);
QUERY PLAN
------------------------------------------------------------------------------------------------------------------
Limit (cost=0.00..431.00 rows=1 width=12)
-> Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=12)
Merge Key: a, (sum(b))
-> Sort (cost=0.00..431.00 rows=1 width=12)
Sort Key: a, (sum(b))
-> GroupAggregate (cost=0.00..431.00 rows=1 width=12)
Group Key: a
-> Sort (cost=0.00..431.00 rows=1 width=8)
Sort Key: a
-> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=8)
Hash Key: a
-> Seq Scan on t_volatile_limit_1 (cost=0.00..431.00 rows=1 width=8)
Optimizer: Pivotal Optimizer (GPORCA)
(13 rows)
drop table t_volatile_limit;
drop table t_volatile_limit_1;
-- Check LIMIT ALL should not be considered when gathering data to a single node
create table t_limit_all(a int, b int) distributed by (a);
insert into t_limit_all select i, i from generate_series(1,10)i;
explain (costs off)
select array(select b from t_limit_all order by b asc limit all) t;
QUERY PLAN
--------------------------------------------------
Result
InitPlan 1 (returns $0) (slice1)
-> Gather Motion 3:1 (slice2; segments: 3)
Merge Key: t_limit_all.b
-> Sort
Sort Key: t_limit_all.b
-> Seq Scan on t_limit_all
Optimizer: Postgres query optimizer
(8 rows)
select array(select b from t_limit_all order by b asc limit all) t;
t
------------------------
{1,2,3,4,5,6,7,8,9,10}
(1 row)
drop table t_limit_all;