| -- Check for MPP-19310 and MPP-19857 where mksort produces wrong result |
| -- on OPT build, and fails assertion on debug build if a "LIMIT" query |
| -- spills to disk. |
| CREATE TABLE mksort_limit_test_table(dkey INT, jkey INT, rval REAL, tval TEXT default repeat('abcdefghijklmnopqrstuvwxyz', 300)) DISTRIBUTED BY (dkey); |
| INSERT INTO mksort_limit_test_table VALUES(generate_series(1, 10000), generate_series(10001, 20000), sqrt(generate_series(10001, 20000))); |
| --Should fit LESS (because of overhead) than (20 * 1024 * 1024) / (26 * 300 + 12) => 2684 tuples in memory, after that spills to disk |
| SET statement_mem="20MB"; |
| -- Should work in memory |
| SELECT dkey, substring(tval from 1 for 2) as str from (SELECT * from mksort_limit_test_table ORDER BY dkey LIMIT 200) as temp ORDER BY jkey LIMIT 3; |
| dkey | str |
| ------+----- |
| 1 | ab |
| 2 | ab |
| 3 | ab |
| (3 rows) |
| |
| SELECT dkey, substring(tval from 1 for 2) as str from (SELECT * from mksort_limit_test_table ORDER BY dkey LIMIT 200) as temp ORDER BY jkey DESC LIMIT 3; |
| dkey | str |
| ------+----- |
| 200 | ab |
| 199 | ab |
| 198 | ab |
| (3 rows) |
| |
| -- Should spill to disk (tested with 2 segments, for more segments it may not spill) |
| SELECT dkey, substring(tval from 1 for 2) as str from (SELECT * from mksort_limit_test_table ORDER BY dkey LIMIT 5000) as temp ORDER BY jkey LIMIT 3; |
| dkey | str |
| ------+----- |
| 1 | ab |
| 2 | ab |
| 3 | ab |
| (3 rows) |
| |
| SELECT dkey, substring(tval from 1 for 2) as str from (SELECT * from mksort_limit_test_table ORDER BY dkey LIMIT 5000) as temp ORDER BY jkey DESC LIMIT 3; |
| dkey | str |
| ------+----- |
| 5000 | ab |
| 4999 | ab |
| 4998 | ab |
| (3 rows) |
| |
| -- In memory descending sort |
| SELECT dkey, substring(tval from 1 for 2) as str from (SELECT * from mksort_limit_test_table ORDER BY dkey DESC LIMIT 200) as temp ORDER BY jkey LIMIT 3; |
| dkey | str |
| ------+----- |
| 9801 | ab |
| 9802 | ab |
| 9803 | ab |
| (3 rows) |
| |
| SELECT dkey, substring(tval from 1 for 2) as str from (SELECT * from mksort_limit_test_table ORDER BY dkey DESC LIMIT 200) as temp ORDER BY jkey DESC LIMIT 3; |
| dkey | str |
| -------+----- |
| 10000 | ab |
| 9999 | ab |
| 9998 | ab |
| (3 rows) |
| |
| -- Spilled descending sort (tested with 2 segments, for more segments it may not spill) |
| SELECT dkey, substring(tval from 1 for 2) as str from (SELECT * from mksort_limit_test_table ORDER BY dkey DESC LIMIT 5000) as temp ORDER BY jkey LIMIT 3; |
| dkey | str |
| ------+----- |
| 5001 | ab |
| 5002 | ab |
| 5003 | ab |
| (3 rows) |
| |
| SELECT dkey, substring(tval from 1 for 2) as str from (SELECT * from mksort_limit_test_table ORDER BY dkey DESC LIMIT 5000) as temp ORDER BY jkey DESC LIMIT 3; |
| dkey | str |
| -------+----- |
| 10000 | ab |
| 9999 | ab |
| 9998 | ab |
| (3 rows) |
| |
| DROP TABLE mksort_limit_test_table; |
| -- Check invalid things in LIMIT |
| select * from generate_series(1,10) g limit g; |
| ERROR: argument of LIMIT must not contain variables |
| LINE 1: select * from generate_series(1,10) g limit g; |
| ^ |
| select * from generate_series(1,10) g limit count(*); |
| ERROR: aggregate functions are not allowed in LIMIT |
| LINE 1: select * from generate_series(1,10) g limit count(*); |
| ^ |
| -- Check volatile limit should not pushdown. |
| create table t_volatile_limit (i int4); |
| NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Apache Cloudberry data distribution key for this table. |
| HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. |
| create table t_volatile_limit_1 (a int, b int) distributed randomly; |
| -- Cloudberry may generate two-stage limit plan to improve performance. |
| -- But for limit clause contains volatile functions, if we push them down |
| -- below the final gather motion, those volatile functions will be evaluated |
| -- many times. For such cases, we should not push down the limit. |
| -- Below test cases' limit clause contain function call `random` with order by. |
| -- `random()` is a volatile function it may return different results each time |
| -- invoked. If we push down to generate two-stage limit plan, `random()` will |
| -- execute on each segment which leads to different limit values of QEs |
| -- and QD and this cannot guarantee correct results. Suppose seg 0 contains the |
| -- top 3 minimum values, but random() returns 1, then you lose 2 values. |
| explain select * from t_volatile_limit order by i limit (random() * 10); |
| QUERY PLAN |
| ------------------------------------------------------------------------------------ |
| Limit (cost=0.00..431.00 rows=1 width=4) |
| -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=4) |
| Merge Key: i |
| -> Sort (cost=0.00..431.00 rows=1 width=4) |
| Sort Key: i |
| -> Seq Scan on t_volatile_limit (cost=0.00..431.00 rows=1 width=4) |
| Optimizer: Pivotal Optimizer (GPORCA) version 3.80.0 |
| (7 rows) |
| |
| explain select * from t_volatile_limit order by i limit 2 offset (random()*5); |
| QUERY PLAN |
| ------------------------------------------------------------------------------------ |
| Limit (cost=0.00..431.00 rows=1 width=4) |
| -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=4) |
| Merge Key: i |
| -> Sort (cost=0.00..431.00 rows=1 width=4) |
| Sort Key: i |
| -> Seq Scan on t_volatile_limit (cost=0.00..431.00 rows=1 width=4) |
| Optimizer: Pivotal Optimizer (GPORCA) version 3.80.0 |
| (7 rows) |
| |
| explain select distinct(a), sum(b) from t_volatile_limit_1 group by a order by a, sum(b) limit (random()+3); |
| QUERY PLAN |
| ------------------------------------------------------------------------------------------------------------------ |
| Limit (cost=0.00..431.00 rows=1 width=12) |
| -> Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=12) |
| Merge Key: a, (sum(b)) |
| -> Sort (cost=0.00..431.00 rows=1 width=12) |
| Sort Key: a, (sum(b)) |
| -> GroupAggregate (cost=0.00..431.00 rows=1 width=12) |
| Group Key: a |
| -> Sort (cost=0.00..431.00 rows=1 width=8) |
| Sort Key: a |
| -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=8) |
| Hash Key: a |
| -> Seq Scan on t_volatile_limit_1 (cost=0.00..431.00 rows=1 width=8) |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (13 rows) |
| |
| explain select distinct(a), sum(b) from t_volatile_limit_1 group by a order by a, sum(b) limit 2 offset (random()*2); |
| QUERY PLAN |
| ------------------------------------------------------------------------------------------------------------------ |
| Limit (cost=0.00..431.00 rows=1 width=12) |
| -> Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=12) |
| Merge Key: a, (sum(b)) |
| -> Sort (cost=0.00..431.00 rows=1 width=12) |
| Sort Key: a, (sum(b)) |
| -> GroupAggregate (cost=0.00..431.00 rows=1 width=12) |
| Group Key: a |
| -> Sort (cost=0.00..431.00 rows=1 width=8) |
| Sort Key: a |
| -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=8) |
| Hash Key: a |
| -> Seq Scan on t_volatile_limit_1 (cost=0.00..431.00 rows=1 width=8) |
| Optimizer: Pivotal Optimizer (GPORCA) |
| (13 rows) |
| |
| drop table t_volatile_limit; |
| drop table t_volatile_limit_1; |
| -- Check LIMIT ALL should not be considered when gathering data to a single node |
| create table t_limit_all(a int, b int) distributed by (a); |
| insert into t_limit_all select i, i from generate_series(1,10)i; |
| explain (costs off) |
| select array(select b from t_limit_all order by b asc limit all) t; |
| QUERY PLAN |
| -------------------------------------------------- |
| Result |
| InitPlan 1 (returns $0) (slice1) |
| -> Gather Motion 3:1 (slice2; segments: 3) |
| Merge Key: t_limit_all.b |
| -> Sort |
| Sort Key: t_limit_all.b |
| -> Seq Scan on t_limit_all |
| Optimizer: Postgres query optimizer |
| (8 rows) |
| |
| select array(select b from t_limit_all order by b asc limit all) t; |
| t |
| ------------------------ |
| {1,2,3,4,5,6,7,8,9,10} |
| (1 row) |
| |
| drop table t_limit_all; |