blob: a6cdaa44de0c1c720e523ec601b371c62dfb0659 [file] [log] [blame]
# IMPALA-9983
# Base case. Limit pushdown into analytic sort should be applied
select * from (
select int_col, bigint_col, smallint_col,
rank() over (partition by int_col order by smallint_col desc) rk
from functional.alltypesagg) dt
where rk <= 10
order by int_col, bigint_col, smallint_col, rk
limit 10
---- PLAN
PLAN-ROOT SINK
|
04:TOP-N [LIMIT=10]
| order by: int_col ASC, bigint_col ASC, smallint_col ASC, rk ASC
| row-size=22B cardinality=10
|
03:SELECT
| predicates: rank() <= 10
| row-size=22B cardinality=20
|
02:ANALYTIC
| functions: rank()
| partition by: int_col
| order by: smallint_col DESC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
| row-size=22B cardinality=20
|
01:TOP-N
| order by: int_col ASC NULLS LAST, smallint_col DESC
| limit with ties: 20
| row-size=14B cardinality=20
|
00:SCAN HDFS [functional.alltypesagg]
HDFS partitions=11/11 files=11 size=814.73KB
row-size=14B cardinality=11.00K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
07:MERGING-EXCHANGE [UNPARTITIONED]
| order by: int_col ASC, bigint_col ASC, smallint_col ASC, rk ASC
| limit: 10
|
04:TOP-N [LIMIT=10]
| order by: int_col ASC, bigint_col ASC, smallint_col ASC, rk ASC
| row-size=22B cardinality=10
|
03:SELECT
| predicates: rank() <= 10
| row-size=22B cardinality=20
|
02:ANALYTIC
| functions: rank()
| partition by: int_col
| order by: smallint_col DESC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
| row-size=22B cardinality=20
|
06:TOP-N
| order by: int_col ASC NULLS LAST, smallint_col DESC
| limit with ties: 20
| row-size=14B cardinality=20
|
05:EXCHANGE [HASH(int_col)]
|
01:TOP-N
| order by: int_col ASC NULLS LAST, smallint_col DESC
| limit with ties: 20
| row-size=14B cardinality=20
|
00:SCAN HDFS [functional.alltypesagg]
HDFS partitions=11/11 files=11 size=814.73KB
row-size=14B cardinality=11.00K
====
# row_number() predicate
# Limit pushdown into analytic sort should be applied.
select * from (
select int_col, bigint_col, smallint_col,
row_number() over (partition by int_col order by smallint_col desc) rk
from functional.alltypesagg) dt
where rk <= 10
order by int_col, bigint_col, smallint_col, rk
limit 5
---- PLAN
PLAN-ROOT SINK
|
04:TOP-N [LIMIT=5]
| order by: int_col ASC, bigint_col ASC, smallint_col ASC, rk ASC
| row-size=22B cardinality=5
|
03:SELECT
| predicates: row_number() <= 10
| row-size=22B cardinality=15
|
02:ANALYTIC
| functions: row_number()
| partition by: int_col
| order by: smallint_col DESC
| window: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
| row-size=22B cardinality=15
|
01:TOP-N [LIMIT=15]
| order by: int_col ASC NULLS LAST, smallint_col DESC
| row-size=14B cardinality=15
|
00:SCAN HDFS [functional.alltypesagg]
HDFS partitions=11/11 files=11 size=814.73KB
row-size=14B cardinality=11.00K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
07:MERGING-EXCHANGE [UNPARTITIONED]
| order by: int_col ASC, bigint_col ASC, smallint_col ASC, rk ASC
| limit: 5
|
04:TOP-N [LIMIT=5]
| order by: int_col ASC, bigint_col ASC, smallint_col ASC, rk ASC
| row-size=22B cardinality=5
|
03:SELECT
| predicates: row_number() <= 10
| row-size=22B cardinality=15
|
02:ANALYTIC
| functions: row_number()
| partition by: int_col
| order by: smallint_col DESC
| window: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
| row-size=22B cardinality=15
|
06:TOP-N [LIMIT=15]
| order by: int_col ASC NULLS LAST, smallint_col DESC
| row-size=14B cardinality=15
|
05:EXCHANGE [HASH(int_col)]
|
01:TOP-N [LIMIT=15]
| order by: int_col ASC NULLS LAST, smallint_col DESC
| row-size=14B cardinality=15
|
00:SCAN HDFS [functional.alltypesagg]
HDFS partitions=11/11 files=11 size=814.73KB
row-size=14B cardinality=11.00K
====
# Multi column partition-by which is prefix of sort exprs.
# Limit pushdown into analytic sort should be applied
select * from (
select int_col, bigint_col, smallint_col,
rank() over (partition by int_col, bigint_col
order by smallint_col desc) rk
from functional.alltypesagg) dt
where rk <= 10
order by int_col, bigint_col, smallint_col, rk
limit 10;
---- PLAN
PLAN-ROOT SINK
|
04:TOP-N [LIMIT=10]
| order by: int_col ASC, bigint_col ASC, smallint_col ASC, rk ASC
| row-size=22B cardinality=10
|
03:SELECT
| predicates: rank() <= 10
| row-size=22B cardinality=20
|
02:ANALYTIC
| functions: rank()
| partition by: int_col, bigint_col
| order by: smallint_col DESC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
| row-size=22B cardinality=20
|
01:TOP-N
| order by: int_col ASC NULLS LAST, bigint_col ASC NULLS LAST, smallint_col DESC
| limit with ties: 20
| row-size=14B cardinality=20
|
00:SCAN HDFS [functional.alltypesagg]
HDFS partitions=11/11 files=11 size=814.73KB
row-size=14B cardinality=11.00K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
07:MERGING-EXCHANGE [UNPARTITIONED]
| order by: int_col ASC, bigint_col ASC, smallint_col ASC, rk ASC
| limit: 10
|
04:TOP-N [LIMIT=10]
| order by: int_col ASC, bigint_col ASC, smallint_col ASC, rk ASC
| row-size=22B cardinality=10
|
03:SELECT
| predicates: rank() <= 10
| row-size=22B cardinality=20
|
02:ANALYTIC
| functions: rank()
| partition by: int_col, bigint_col
| order by: smallint_col DESC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
| row-size=22B cardinality=20
|
06:TOP-N
| order by: int_col ASC NULLS LAST, bigint_col ASC NULLS LAST, smallint_col DESC
| limit with ties: 20
| row-size=14B cardinality=20
|
05:EXCHANGE [HASH(int_col,bigint_col)]
|
01:TOP-N
| order by: int_col ASC NULLS LAST, bigint_col ASC NULLS LAST, smallint_col DESC
| limit with ties: 20
| row-size=14B cardinality=20
|
00:SCAN HDFS [functional.alltypesagg]
HDFS partitions=11/11 files=11 size=814.73KB
row-size=14B cardinality=11.00K
====
# No predicate after the analytic function.
# Limit pushdown should not be applied because the order of smallint_col
# does not match.
select * from (
select int_col, bigint_col, smallint_col,
rank() over (partition by int_col, bigint_col
order by smallint_col desc) rk
from functional.alltypesagg) dt
order by int_col, bigint_col, smallint_col, rk
limit 10;
---- PLAN
PLAN-ROOT SINK
|
03:TOP-N [LIMIT=10]
| order by: int_col ASC, bigint_col ASC, smallint_col ASC, rk ASC
| row-size=22B cardinality=10
|
02:ANALYTIC
| functions: rank()
| partition by: int_col, bigint_col
| order by: smallint_col DESC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
| row-size=22B cardinality=11.00K
|
01:SORT
| order by: int_col ASC NULLS LAST, bigint_col ASC NULLS LAST, smallint_col DESC
| row-size=14B cardinality=11.00K
|
00:SCAN HDFS [functional.alltypesagg]
HDFS partitions=11/11 files=11 size=814.73KB
row-size=14B cardinality=11.00K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
05:MERGING-EXCHANGE [UNPARTITIONED]
| order by: int_col ASC, bigint_col ASC, smallint_col ASC, rk ASC
| limit: 10
|
03:TOP-N [LIMIT=10]
| order by: int_col ASC, bigint_col ASC, smallint_col ASC, rk ASC
| row-size=22B cardinality=10
|
02:ANALYTIC
| functions: rank()
| partition by: int_col, bigint_col
| order by: smallint_col DESC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
| row-size=22B cardinality=11.00K
|
01:SORT
| order by: int_col ASC NULLS LAST, bigint_col ASC NULLS LAST, smallint_col DESC
| row-size=14B cardinality=11.00K
|
04:EXCHANGE [HASH(int_col,bigint_col)]
|
00:SCAN HDFS [functional.alltypesagg]
HDFS partitions=11/11 files=11 size=814.73KB
row-size=14B cardinality=11.00K
====
# Limit is present without Order By.
# Limit pushdown should be applied.
select * from (
select int_col, bigint_col, smallint_col,
rank() over (partition by int_col order by smallint_col desc) rk
from functional.alltypesagg) dt
where rk <= 10
limit 10
---- PLAN
PLAN-ROOT SINK
|
03:SELECT
| predicates: rank() <= 10
| limit: 10
| row-size=22B cardinality=10
|
02:ANALYTIC
| functions: rank()
| partition by: int_col
| order by: smallint_col DESC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
| row-size=22B cardinality=20
|
01:TOP-N
| order by: int_col ASC NULLS LAST, smallint_col DESC
| limit with ties: 20
| row-size=14B cardinality=20
|
00:SCAN HDFS [functional.alltypesagg]
HDFS partitions=11/11 files=11 size=814.73KB
row-size=14B cardinality=11.00K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
06:EXCHANGE [UNPARTITIONED]
| limit: 10
|
03:SELECT
| predicates: rank() <= 10
| limit: 10
| row-size=22B cardinality=10
|
02:ANALYTIC
| functions: rank()
| partition by: int_col
| order by: smallint_col DESC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
| row-size=22B cardinality=20
|
05:TOP-N
| order by: int_col ASC NULLS LAST, smallint_col DESC
| limit with ties: 20
| row-size=14B cardinality=20
|
04:EXCHANGE [HASH(int_col)]
|
01:TOP-N
| order by: int_col ASC NULLS LAST, smallint_col DESC
| limit with ties: 20
| row-size=14B cardinality=20
|
00:SCAN HDFS [functional.alltypesagg]
HDFS partitions=11/11 files=11 size=814.73KB
row-size=14B cardinality=11.00K
====
# rank() predicate operands is flipped.
# Limit pushdown should be applied.
select * from (
select int_col, bigint_col, smallint_col,
rank() over (partition by int_col order by smallint_col desc) rk
from functional.alltypesagg) dt
where 11 > rk
order by int_col, bigint_col, smallint_col, rk
limit 10
---- PLAN
PLAN-ROOT SINK
|
04:TOP-N [LIMIT=10]
| order by: int_col ASC, bigint_col ASC, smallint_col ASC, rk ASC
| row-size=22B cardinality=10
|
03:SELECT
| predicates: rank() < 11
| row-size=22B cardinality=20
|
02:ANALYTIC
| functions: rank()
| partition by: int_col
| order by: smallint_col DESC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
| row-size=22B cardinality=20
|
01:TOP-N
| order by: int_col ASC NULLS LAST, smallint_col DESC
| limit with ties: 20
| row-size=14B cardinality=20
|
00:SCAN HDFS [functional.alltypesagg]
HDFS partitions=11/11 files=11 size=814.73KB
row-size=14B cardinality=11.00K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
07:MERGING-EXCHANGE [UNPARTITIONED]
| order by: int_col ASC, bigint_col ASC, smallint_col ASC, rk ASC
| limit: 10
|
04:TOP-N [LIMIT=10]
| order by: int_col ASC, bigint_col ASC, smallint_col ASC, rk ASC
| row-size=22B cardinality=10
|
03:SELECT
| predicates: rank() < 11
| row-size=22B cardinality=20
|
02:ANALYTIC
| functions: rank()
| partition by: int_col
| order by: smallint_col DESC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
| row-size=22B cardinality=20
|
06:TOP-N
| order by: int_col ASC NULLS LAST, smallint_col DESC
| limit with ties: 20
| row-size=14B cardinality=20
|
05:EXCHANGE [HASH(int_col)]
|
01:TOP-N
| order by: int_col ASC NULLS LAST, smallint_col DESC
| limit with ties: 20
| row-size=14B cardinality=20
|
00:SCAN HDFS [functional.alltypesagg]
HDFS partitions=11/11 files=11 size=814.73KB
row-size=14B cardinality=11.00K
====
# IMPALA-10296: Limit pushdown should not be applied
# when the limit on rank() is smaller than the limit on the order by.
select * from (
select int_col, bigint_col, smallint_col,
rank() over (partition by int_col order by smallint_col desc) rk
from functional.alltypesagg) dt
where rk < 10
order by int_col, bigint_col, smallint_col, rk
limit 10
---- PLAN
PLAN-ROOT SINK
|
04:TOP-N [LIMIT=10]
| order by: int_col ASC, bigint_col ASC, smallint_col ASC, rk ASC
| row-size=22B cardinality=10
|
03:SELECT
| predicates: rank() < 10
| row-size=22B cardinality=1.10K
|
02:ANALYTIC
| functions: rank()
| partition by: int_col
| order by: smallint_col DESC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
| row-size=22B cardinality=11.00K
|
01:SORT
| order by: int_col ASC NULLS LAST, smallint_col DESC
| row-size=14B cardinality=11.00K
|
00:SCAN HDFS [functional.alltypesagg]
HDFS partitions=11/11 files=11 size=814.73KB
row-size=14B cardinality=11.00K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
06:MERGING-EXCHANGE [UNPARTITIONED]
| order by: int_col ASC, bigint_col ASC, smallint_col ASC, rk ASC
| limit: 10
|
04:TOP-N [LIMIT=10]
| order by: int_col ASC, bigint_col ASC, smallint_col ASC, rk ASC
| row-size=22B cardinality=10
|
03:SELECT
| predicates: rank() < 10
| row-size=22B cardinality=1.10K
|
02:ANALYTIC
| functions: rank()
| partition by: int_col
| order by: smallint_col DESC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
| row-size=22B cardinality=11.00K
|
01:SORT
| order by: int_col ASC NULLS LAST, smallint_col DESC
| row-size=14B cardinality=11.00K
|
05:EXCHANGE [HASH(int_col)]
|
00:SCAN HDFS [functional.alltypesagg]
HDFS partitions=11/11 files=11 size=814.73KB
row-size=14B cardinality=11.00K
====
# Partition-by and order-by exprs refer to derived table
# columns rather than base table columns.
# Limit pushdown should be applied.
select * from (
select int_col, bigint_col, smallint_col,
rank() over (partition by int_col order by smallint_col desc) rk
from (select int_col, bigint_col, smallint_col from functional.alltypesagg
group by int_col, bigint_col, smallint_col)dt1)dt2
where rk <= 10
order by int_col, bigint_col, smallint_col, rk
limit 10
---- PLAN
PLAN-ROOT SINK
|
05:TOP-N [LIMIT=10]
| order by: int_col ASC, bigint_col ASC, smallint_col ASC, rk ASC
| row-size=22B cardinality=10
|
04:SELECT
| predicates: rank() <= 10
| row-size=22B cardinality=20
|
03:ANALYTIC
| functions: rank()
| partition by: int_col
| order by: smallint_col DESC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
| row-size=22B cardinality=20
|
02:TOP-N
| order by: int_col ASC NULLS LAST, smallint_col DESC
| limit with ties: 20
| row-size=14B cardinality=20
|
01:AGGREGATE [FINALIZE]
| group by: int_col, bigint_col, smallint_col
| row-size=14B cardinality=11.00K
|
00:SCAN HDFS [functional.alltypesagg]
HDFS partitions=11/11 files=11 size=814.73KB
row-size=14B cardinality=11.00K
====
# No limit in the query.
# Limit pushdown should not be applied.
select * from (
select int_col, bigint_col, smallint_col,
rank() over (partition by int_col order by smallint_col desc) rk
from functional.alltypesagg) dt
where rk <= 10
order by int_col, bigint_col, smallint_col, rk
---- PLAN
PLAN-ROOT SINK
|
04:SORT
| order by: int_col ASC, bigint_col ASC, smallint_col ASC, rk ASC
| row-size=22B cardinality=1.10K
|
03:SELECT
| predicates: rank() <= 10
| row-size=22B cardinality=1.10K
|
02:ANALYTIC
| functions: rank()
| partition by: int_col
| order by: smallint_col DESC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
| row-size=22B cardinality=11.00K
|
01:SORT
| order by: int_col ASC NULLS LAST, smallint_col DESC
| row-size=14B cardinality=11.00K
|
00:SCAN HDFS [functional.alltypesagg]
HDFS partitions=11/11 files=11 size=814.73KB
row-size=14B cardinality=11.00K
====
# Rank predicate's upper bound is greater than limit.
# Limit pushdown should not be applied.
select * from (
select int_col, bigint_col, smallint_col,
rank() over (partition by int_col order by smallint_col desc) rk
from functional.alltypesagg) dt
where rk <= 20
order by int_col, bigint_col, smallint_col, rk
limit 10
---- PLAN
PLAN-ROOT SINK
|
04:TOP-N [LIMIT=10]
| order by: int_col ASC, bigint_col ASC, smallint_col ASC, rk ASC
| row-size=22B cardinality=10
|
03:SELECT
| predicates: rank() <= 20
| row-size=22B cardinality=30
|
02:ANALYTIC
| functions: rank()
| partition by: int_col
| order by: smallint_col DESC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
| row-size=22B cardinality=30
|
01:TOP-N
| order by: int_col ASC NULLS LAST, smallint_col DESC
| limit with ties: 30
| row-size=14B cardinality=30
|
00:SCAN HDFS [functional.alltypesagg]
HDFS partitions=11/11 files=11 size=814.73KB
row-size=14B cardinality=11.00K
====
# Function is not a ranking function.
# Limit pushdown should not be applied.
select * from (
select int_col, bigint_col, smallint_col,
min(double_col) over (partition by int_col
order by smallint_col desc) rk
from functional.alltypesagg) dt
where rk <= 10
order by int_col, bigint_col, smallint_col, rk
limit 10
---- PLAN
PLAN-ROOT SINK
|
04:TOP-N [LIMIT=10]
| order by: int_col ASC, bigint_col ASC, smallint_col ASC, rk ASC
| row-size=22B cardinality=10
|
03:SELECT
| predicates: min(double_col) <= 10
| row-size=30B cardinality=1.10K
|
02:ANALYTIC
| functions: min(double_col)
| partition by: int_col
| order by: smallint_col DESC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
| row-size=30B cardinality=11.00K
|
01:SORT
| order by: int_col ASC NULLS LAST, smallint_col DESC
| row-size=22B cardinality=11.00K
|
00:SCAN HDFS [functional.alltypesagg]
HDFS partitions=11/11 files=11 size=814.73KB
row-size=22B cardinality=11.00K
====
# Partition-by expr is not a prefix of the sort exprs.
# Limit pushdown should not be applied.
select * from (
select int_col, bigint_col, smallint_col,
rank() over (partition by int_col order by smallint_col desc) rk
from functional.alltypesagg) dt
where rk <= 10
order by bigint_col, int_col, smallint_col, rk
limit 10;
---- PLAN
PLAN-ROOT SINK
|
04:TOP-N [LIMIT=10]
| order by: bigint_col ASC, int_col ASC, smallint_col ASC, rk ASC
| row-size=22B cardinality=10
|
03:SELECT
| predicates: rank() <= 10
| row-size=22B cardinality=1.10K
|
02:ANALYTIC
| functions: rank()
| partition by: int_col
| order by: smallint_col DESC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
| row-size=22B cardinality=11.00K
|
01:SORT
| order by: int_col ASC NULLS LAST, smallint_col DESC
| row-size=14B cardinality=11.00K
|
00:SCAN HDFS [functional.alltypesagg]
HDFS partitions=11/11 files=11 size=814.73KB
row-size=14B cardinality=11.00K
====
# Blocking operator occurs between the top level TopN
# operator and the analytic operator.
# Limit pushdown should not be applied.
select int_col, bigint_col, smallint_col, rk from (
select int_col, bigint_col, smallint_col,
rank() over (partition by int_col order by smallint_col desc) rk
from functional.alltypesagg) dt
where rk <= 10
group by int_col, bigint_col, smallint_col, rk
order by int_col, bigint_col, smallint_col, rk
limit 10;
---- PLAN
PLAN-ROOT SINK
|
05:TOP-N [LIMIT=10]
| order by: int_col ASC, bigint_col ASC, smallint_col ASC, rk ASC
| row-size=22B cardinality=10
|
04:AGGREGATE [FINALIZE]
| group by: int_col, bigint_col, smallint_col, rank()
| row-size=22B cardinality=1.10K
|
03:SELECT
| predicates: rank() <= 10
| row-size=22B cardinality=1.10K
|
02:ANALYTIC
| functions: rank()
| partition by: int_col
| order by: smallint_col DESC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
| row-size=22B cardinality=11.00K
|
01:SORT
| order by: int_col ASC NULLS LAST, smallint_col DESC
| row-size=14B cardinality=11.00K
|
00:SCAN HDFS [functional.alltypesagg]
HDFS partitions=11/11 files=11 size=814.73KB
row-size=14B cardinality=11.00K
====
# Rank predicate is not one of <, =, <= .
# Limit pushdown should not be applied.
select * from (
select int_col, bigint_col, smallint_col,
rank() over (partition by int_col order by smallint_col desc) rk
from functional.alltypesagg) dt
where rk in (10, 20, 30)
order by int_col, bigint_col, smallint_col, rk
limit 10
---- PLAN
PLAN-ROOT SINK
|
04:TOP-N [LIMIT=10]
| order by: int_col ASC, bigint_col ASC, smallint_col ASC, rk ASC
| row-size=22B cardinality=10
|
03:SELECT
| predicates: rank() IN (10, 20, 30)
| row-size=22B cardinality=11.00K
|
02:ANALYTIC
| functions: rank()
| partition by: int_col
| order by: smallint_col DESC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
| row-size=22B cardinality=11.00K
|
01:SORT
| order by: int_col ASC NULLS LAST, smallint_col DESC
| row-size=14B cardinality=11.00K
|
00:SCAN HDFS [functional.alltypesagg]
HDFS partitions=11/11 files=11 size=814.73KB
row-size=14B cardinality=11.00K
====
# 2 ranking functions with different partition-by exprs. Predicate is on
# the row_number() function. If the row_number() appears as the top
# level analytic function, like in this example, the limit pushdown can be
# applied. Otherwise, it should not.
select * from (
select int_col, bigint_col, smallint_col,
rank() over (partition by int_col order by smallint_col desc) rk1,
row_number() over (partition by bigint_col order by smallint_col desc) rk2
from functional.alltypesagg) dt
where rk2 <= 10
order by bigint_col, int_col, smallint_col, rk1, rk2
limit 10
---- PLAN
PLAN-ROOT SINK
|
06:TOP-N [LIMIT=10]
| order by: bigint_col ASC, int_col ASC, smallint_col ASC, rk1 ASC, rk2 ASC
| row-size=30B cardinality=10
|
05:SELECT
| predicates: row_number() <= 10
| row-size=30B cardinality=20
|
04:ANALYTIC
| functions: row_number()
| partition by: bigint_col
| order by: smallint_col DESC
| window: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
| row-size=30B cardinality=20
|
03:TOP-N [LIMIT=20]
| order by: bigint_col ASC NULLS LAST, smallint_col DESC
| row-size=22B cardinality=20
|
02:ANALYTIC
| functions: rank()
| partition by: int_col
| order by: smallint_col DESC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
| row-size=22B cardinality=11.00K
|
01:SORT
| order by: int_col ASC NULLS LAST, smallint_col DESC
| row-size=14B cardinality=11.00K
|
00:SCAN HDFS [functional.alltypesagg]
HDFS partitions=11/11 files=11 size=814.73KB
row-size=14B cardinality=11.00K
====
# 2 ranking functions with different partition-by exprs. Predicate is on
# the row_number() function. row_number() is not the top analytic function
# here, so limit pushdown cannot be applied.
select * from (
select int_col, bigint_col, smallint_col,
row_number() over (partition by bigint_col order by smallint_col desc) rk2,
rank() over (partition by int_col order by smallint_col desc) rk1
from functional.alltypesagg) dt
where rk2 <= 10
order by bigint_col, int_col, smallint_col, rk1, rk2
limit 10
---- PLAN
PLAN-ROOT SINK
|
06:TOP-N [LIMIT=10]
| order by: bigint_col ASC, int_col ASC, smallint_col ASC, rk1 ASC, rk2 ASC
| row-size=30B cardinality=10
|
05:SELECT
| predicates: row_number() <= 10
| row-size=30B cardinality=1.10K
|
04:ANALYTIC
| functions: rank()
| partition by: int_col
| order by: smallint_col DESC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
| row-size=30B cardinality=11.00K
|
03:SORT
| order by: int_col ASC NULLS LAST, smallint_col DESC
| row-size=22B cardinality=11.00K
|
02:ANALYTIC
| functions: row_number()
| partition by: bigint_col
| order by: smallint_col DESC
| window: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
| row-size=22B cardinality=11.00K
|
01:SORT
| order by: bigint_col ASC NULLS LAST, smallint_col DESC
| row-size=14B cardinality=11.00K
|
00:SCAN HDFS [functional.alltypesagg]
HDFS partitions=11/11 files=11 size=814.73KB
row-size=14B cardinality=11.00K
====
# Asc/Desc direction is different for the top order-by.
# Limit pushdown should not be applied.
select * from (
select int_col, bigint_col, smallint_col,
rank() over (partition by int_col order by smallint_col desc) rk
from functional.alltypesagg) dt
where rk <= 10
order by int_col DESC, bigint_col, smallint_col, rk
limit 10
---- PLAN
PLAN-ROOT SINK
|
04:TOP-N [LIMIT=10]
| order by: int_col DESC, bigint_col ASC, smallint_col ASC, rk ASC
| row-size=22B cardinality=10
|
03:SELECT
| predicates: rank() <= 10
| row-size=22B cardinality=1.10K
|
02:ANALYTIC
| functions: rank()
| partition by: int_col
| order by: smallint_col DESC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
| row-size=22B cardinality=11.00K
|
01:SORT
| order by: int_col ASC NULLS LAST, smallint_col DESC
| row-size=14B cardinality=11.00K
|
00:SCAN HDFS [functional.alltypesagg]
HDFS partitions=11/11 files=11 size=814.73KB
row-size=14B cardinality=11.00K
====
# NULLS FIRST for the top order-by which is different compared
# to the partition-by expr.
# Limit pushdown should not be applied.
select * from (
select int_col, bigint_col, smallint_col,
rank() over (partition by int_col order by smallint_col desc) rk
from functional.alltypesagg) dt
where rk <= 10
order by int_col NULLS FIRST, bigint_col, smallint_col, rk
limit 10
---- PLAN
PLAN-ROOT SINK
|
04:TOP-N [LIMIT=10]
| order by: int_col ASC NULLS FIRST, bigint_col ASC, smallint_col ASC, rk ASC
| row-size=22B cardinality=10
|
03:SELECT
| predicates: rank() <= 10
| row-size=22B cardinality=1.10K
|
02:ANALYTIC
| functions: rank()
| partition by: int_col
| order by: smallint_col DESC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
| row-size=22B cardinality=11.00K
|
01:SORT
| order by: int_col ASC NULLS LAST, smallint_col DESC
| row-size=14B cardinality=11.00K
|
00:SCAN HDFS [functional.alltypesagg]
HDFS partitions=11/11 files=11 size=814.73KB
row-size=14B cardinality=11.00K
====
# IMPALA-10229: limit cannot be pushed through analytic with no predicates
# unless the sort orders of OVER clause and ORDER BY clause match exactly.
select *
from (
select month, id, rank() over (partition by month order by id desc) rnk
from functional_parquet.alltypes
WHERE month >= 11) v
order by month, id limit 3
---- PLAN
PLAN-ROOT SINK
|
03:TOP-N [LIMIT=3]
| order by: month ASC, id ASC
| row-size=16B cardinality=3
|
02:ANALYTIC
| functions: rank()
| partition by: `month`
| order by: id DESC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
| row-size=16B cardinality=2.17K
|
01:SORT
| order by: month ASC NULLS LAST, id DESC
| row-size=8B cardinality=2.17K
|
00:SCAN HDFS [functional_parquet.alltypes]
partition predicates: `month` >= 11
HDFS partitions=4/24 files=4 size=34.10KB
row-size=8B cardinality=2.17K
====
# IMPALA-10229: limit can be pushed through analytic with no predicates
# when sort orders of ORDER BY clause and OVER clause match exactly.
select *
from (
select month, id, rank() over (partition by month order by id) rnk
from functional_parquet.alltypes
WHERE month >= 11) v
order by month, id
limit 3
---- PLAN
PLAN-ROOT SINK
|
03:TOP-N [LIMIT=3]
| order by: month ASC, id ASC
| row-size=16B cardinality=3
|
02:ANALYTIC
| functions: rank()
| partition by: `month`
| order by: id ASC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
| row-size=16B cardinality=3
|
01:TOP-N [LIMIT=3]
| order by: month ASC NULLS LAST, id ASC
| row-size=8B cardinality=3
|
00:SCAN HDFS [functional_parquet.alltypes]
partition predicates: `month` >= 11
HDFS partitions=4/24 files=4 size=34.10KB
row-size=8B cardinality=2.17K
====
# IMPALA-10229: limit can be pushed through analytic with no predicates
# when sort orders of ORDER BY clause is a prefix of the OVER clause and
# the sort order matches exactly.
select *
from (
select month, id, int_col, rank() over (partition by month order by id) rnk
from functional_parquet.alltypes
WHERE month >= 11) v
order by month, id, int_col desc
limit 3
---- PLAN
PLAN-ROOT SINK
|
03:TOP-N [LIMIT=3]
| order by: month ASC, id ASC, int_col DESC
| row-size=20B cardinality=3
|
02:ANALYTIC
| functions: rank()
| partition by: `month`
| order by: id ASC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
| row-size=20B cardinality=3
|
01:TOP-N [LIMIT=3]
| order by: month ASC NULLS LAST, id ASC
| row-size=12B cardinality=3
|
00:SCAN HDFS [functional_parquet.alltypes]
partition predicates: `month` >= 11
HDFS partitions=4/24 files=4 size=34.10KB
row-size=12B cardinality=2.17K
====
# Limit pushdown compares equality of non-trivial order by expressions, including
# constants and non-trivial expressions, so can push down limit in some cases.
select *
from (
select month, id, int_col, string_col,
rank() over (partition by 'hello', month order by id, int_col * 2) rnk
from functional_parquet.alltypes
WHERE month >= 11) v
order by 'hello', month, id, int_col * 2, int_col desc
limit 3
---- PLAN
PLAN-ROOT SINK
|
03:TOP-N [LIMIT=3]
| order by: 'hello' ASC, month ASC, id ASC, int_col * 2 ASC, int_col DESC
| row-size=32B cardinality=3
|
02:ANALYTIC
| functions: rank()
| partition by: 'hello', `month`
| order by: id ASC, int_col * 2 ASC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
| row-size=32B cardinality=3
|
01:TOP-N [LIMIT=3]
| order by: 'hello' ASC NULLS LAST, month ASC NULLS LAST, id ASC, int_col * 2 ASC
| row-size=24B cardinality=3
|
00:SCAN HDFS [functional_parquet.alltypes]
partition predicates: `month` >= 11
HDFS partitions=4/24 files=4 size=34.10KB
row-size=24B cardinality=2.17K
====
# Non-trivial order by expressions may not compare equal if the expression was
# materialized before the sort - lower(string_col) in this case.
select *
from (
select month, id, int_col, string_col,
rank() over (partition by 'hello', month order by id, lower(string_col)) rnk
from functional_parquet.alltypes
WHERE month >= 11) v
order by 'hello', month, id, lower(string_col)
limit 3
---- PLAN
PLAN-ROOT SINK
|
03:TOP-N [LIMIT=3]
| order by: 'hello' ASC, month ASC, id ASC, lower(string_col) ASC
| row-size=44B cardinality=3
|
02:ANALYTIC
| functions: rank()
| partition by: 'hello', `month`
| order by: id ASC, lower(string_col) ASC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
| row-size=44B cardinality=2.17K
|
01:SORT
| order by: 'hello' ASC NULLS LAST, month ASC NULLS LAST, id ASC, lower(string_col) ASC
| row-size=36B cardinality=2.17K
|
00:SCAN HDFS [functional_parquet.alltypes]
partition predicates: `month` >= 11
HDFS partitions=4/24 files=4 size=34.10KB
row-size=24B cardinality=2.17K
====
# Very high rank() predicate is not pushed down because the limit would not fit in
# a 32-bit integer.
select tinyint_col, string_col, id, rnk from (
select *, rank() over (partition by tinyint_col order by string_col) rnk
from functional.alltypestiny) v
where rnk <= 9999999999
order by tinyint_col, string_col desc, id desc
limit 6
---- PLAN
PLAN-ROOT SINK
|
04:TOP-N [LIMIT=6]
| order by: tinyint_col ASC, string_col DESC, id DESC
| row-size=26B cardinality=1
|
03:SELECT
| predicates: rank() <= 9999999999
| row-size=26B cardinality=1
|
02:ANALYTIC
| functions: rank()
| partition by: tinyint_col
| order by: string_col ASC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
| row-size=26B cardinality=8
|
01:SORT
| order by: tinyint_col ASC NULLS LAST, string_col ASC
| row-size=18B cardinality=8
|
00:SCAN HDFS [functional.alltypestiny]
HDFS partitions=4/4 files=4 size=460B
row-size=18B cardinality=8
====
# Very high rank() predicate is pushed down because the limit fits in
# a 32-bit integer and the top-n bytes limit does not kick in.
select tinyint_col, string_col, id, rnk from (
select *, rank() over (partition by tinyint_col order by string_col) rnk
from functional.alltypestiny) v
where rnk <= 999999999
order by tinyint_col, string_col desc, id desc
limit 6
---- PLAN
PLAN-ROOT SINK
|
04:TOP-N [LIMIT=6]
| order by: tinyint_col ASC, string_col DESC, id DESC
| row-size=26B cardinality=6
|
03:SELECT
| predicates: rank() <= 999999999
| row-size=26B cardinality=8
|
02:ANALYTIC
| functions: rank()
| partition by: tinyint_col
| order by: string_col ASC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
| row-size=26B cardinality=8
|
01:TOP-N
| order by: tinyint_col ASC NULLS LAST, string_col ASC
| limit with ties: 1000000005
| row-size=18B cardinality=8
|
00:SCAN HDFS [functional.alltypestiny]
HDFS partitions=4/4 files=4 size=460B
row-size=18B cardinality=8
====
# rank() predicate is not pushed down because TOPN_BYTES_LIMIT prevents conversion
# to top-n node.
select tinyint_col, string_col, id, rnk from (
select *, rank() over (partition by tinyint_col order by string_col) rnk
from functional.alltypesagg) v
where rnk <= 9999
order by tinyint_col, string_col desc, id desc
limit 6
---- QUERYOPTIONS
topn_bytes_limit=1000
---- PLAN
PLAN-ROOT SINK
|
04:TOP-N [LIMIT=6]
| order by: tinyint_col ASC, string_col DESC, id DESC
| row-size=28B cardinality=6
|
03:SELECT
| predicates: rank() <= 9999
| row-size=28B cardinality=11.00K
|
02:ANALYTIC
| functions: rank()
| partition by: tinyint_col
| order by: string_col ASC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
| row-size=28B cardinality=11.00K
|
01:SORT
| order by: tinyint_col ASC NULLS LAST, string_col ASC
| row-size=20B cardinality=11.00K
|
00:SCAN HDFS [functional.alltypesagg]
HDFS partitions=11/11 files=11 size=814.73KB
row-size=20B cardinality=11.00K
====
# Limit cannot be pushed down past a dense_rank() predicate.
select tinyint_col, string_col, id, rnk from (
select *, dense_rank() over (partition by tinyint_col order by string_col) rnk
from functional.alltypestiny) v
where rnk <= 10
order by tinyint_col, string_col desc, id desc
limit 10
---- PLAN
PLAN-ROOT SINK
|
04:TOP-N [LIMIT=10]
| order by: tinyint_col ASC, string_col DESC, id DESC
| row-size=26B cardinality=1
|
03:SELECT
| predicates: dense_rank() <= 10
| row-size=26B cardinality=1
|
02:ANALYTIC
| functions: dense_rank()
| partition by: tinyint_col
| order by: string_col ASC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
| row-size=26B cardinality=8
|
01:SORT
| order by: tinyint_col ASC NULLS LAST, string_col ASC
| row-size=18B cardinality=8
|
00:SCAN HDFS [functional.alltypestiny]
HDFS partitions=4/4 files=4 size=460B
row-size=18B cardinality=8
====
# Limit cannot be pushed down past = predicates, except for comparison to 1 where
# it is equivalent to <= 1
select tinyint_col, string_col, id, rnk from (
select *, rank() over (partition by tinyint_col order by id) rnk
from functional.alltypestiny) v
where rnk = 2
order by tinyint_col, string_col desc, id desc
limit 2
---- PLAN
PLAN-ROOT SINK
|
04:TOP-N [LIMIT=2]
| order by: tinyint_col ASC, string_col DESC, id DESC
| row-size=26B cardinality=2
|
03:SELECT
| predicates: rank() = 2
| row-size=26B cardinality=8
|
02:ANALYTIC
| functions: rank()
| partition by: tinyint_col
| order by: id ASC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
| row-size=26B cardinality=8
|
01:SORT
| order by: tinyint_col ASC NULLS LAST, id ASC
| row-size=18B cardinality=8
|
00:SCAN HDFS [functional.alltypestiny]
HDFS partitions=4/4 files=4 size=460B
row-size=18B cardinality=8
====
# Limit cannot be pushed down past = predicates, except for comparison to 1 where
# it is equivalent to <= 1
select tinyint_col, string_col, id, rnk from (
select *, rank() over (partition by tinyint_col order by id) rnk
from functional.alltypestiny) v
where rnk = 1
order by tinyint_col, string_col desc, id desc
limit 1
---- PLAN
PLAN-ROOT SINK
|
04:TOP-N [LIMIT=1]
| order by: tinyint_col ASC, string_col DESC, id DESC
| row-size=26B cardinality=1
|
03:SELECT
| predicates: rank() = 1
| row-size=26B cardinality=1
|
02:ANALYTIC
| functions: rank()
| partition by: tinyint_col
| order by: id ASC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
| row-size=26B cardinality=2
|
01:TOP-N
| order by: tinyint_col ASC NULLS LAST, id ASC
| limit with ties: 2
| row-size=18B cardinality=2
|
00:SCAN HDFS [functional.alltypestiny]
HDFS partitions=4/4 files=4 size=460B
row-size=18B cardinality=8
====