blob: dd82364cc4c7581883594143f1997d5bcd743078 [file]
# IMPALA-10314
# Basic simple limit optimization
select * from functional.alltypes_date_partition limit 10;
---- PLAN
PLAN-ROOT SINK
|
00:SCAN HDFS [functional.alltypes_date_partition]
HDFS partitions=10/55 files=10 size=3.97KB
limit: 10
row-size=65B cardinality=10
====
# Sanity check with simple limit optimization disabled
select * from functional.alltypes_date_partition limit 10;
---- QUERYOPTIONS
OPTIMIZE_SIMPLE_LIMIT=false
---- PLAN
PLAN-ROOT SINK
|
00:SCAN HDFS [functional.alltypes_date_partition]
HDFS partitions=55/55 files=55 size=27.27KB
limit: 10
row-size=65B cardinality=10
====
# limit on both sides of a union all
select * from functional.alltypes_date_partition limit 10
union all
select * from functional.alltypes_date_partition limit 5
---- PLAN
PLAN-ROOT SINK
|
00:UNION
| pass-through-operands: all
| row-size=65B cardinality=15
|
|--02:SCAN HDFS [functional.alltypes_date_partition]
| HDFS partitions=5/55 files=5 size=1.29KB
| limit: 5
| row-size=65B cardinality=5
|
01:SCAN HDFS [functional.alltypes_date_partition]
HDFS partitions=10/55 files=10 size=3.97KB
limit: 10
row-size=65B cardinality=10
====
# limit in outer block, WHERE clause in inline view
# along with an explicit hint
with dp_view as
(select * from functional.alltypes_date_partition_2
where /* +always_true */ date_col = cast(timestamp_col as date))
select * from dp_view limit 10;
---- PLAN
PLAN-ROOT SINK
|
00:SCAN HDFS [functional.alltypes_date_partition_2]
HDFS partitions=10/50 files=10 size=5.36KB
predicates: date_col = CAST(timestamp_col AS DATE)
limit: 10
row-size=65B cardinality=10
====
# limit and WHERE with a wrongly specified hint (missing
# underscore)
select * from functional.alltypes_date_partition
where /* +alwaystrue */ date_col = cast(timestamp_col as date)
limit 10;
---- PLAN
PLAN-ROOT SINK
|
00:SCAN HDFS [functional.alltypes_date_partition]
HDFS partitions=55/55 files=55 size=27.27KB
predicates: date_col = CAST(timestamp_col AS DATE)
limit: 10
row-size=65B cardinality=10
====
# WHERE with hint in alternative format '[]' and uppercase
select * from functional.alltypes_date_partition
where [ALWAYS_TRUE] date_col = cast(timestamp_col as date)
limit 10;
---- PLAN
PLAN-ROOT SINK
|
00:SCAN HDFS [functional.alltypes_date_partition]
HDFS partitions=10/55 files=10 size=3.97KB
predicates: date_col = CAST(timestamp_col AS DATE)
limit: 10
row-size=65B cardinality=8
====
# WHERE with more conjuncts
select * from functional.alltypes_date_partition
where [ALWAYS_TRUE] date_col = cast(timestamp_col as date)
and int_col >= 0
limit 10;
---- PLAN
PLAN-ROOT SINK
|
00:SCAN HDFS [functional.alltypes_date_partition]
HDFS partitions=10/55 files=10 size=3.97KB
predicates: int_col >= 0, date_col = CAST(timestamp_col AS DATE)
limit: 10
row-size=65B cardinality=8
====
# WHERE with subquery; should not apply optimization
# since the subquery is converted to a join
select * from functional.alltypes_date_partition
where [ALWAYS_TRUE] int_col in (select int_col from functional.alltypes)
limit 10;
---- PLAN
PLAN-ROOT SINK
|
03:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: int_col = functional.alltypes.int_col
| runtime filters: RF000 <- functional.alltypes.int_col
| limit: 10
| row-size=65B cardinality=10
|
|--02:AGGREGATE [FINALIZE]
| | group by: functional.alltypes.int_col
| | row-size=4B cardinality=10
| |
| 01:SCAN HDFS [functional.alltypes]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=4B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypes_date_partition]
HDFS partitions=55/55 files=55 size=27.27KB
runtime filters: RF000 -> int_col
row-size=65B cardinality=500
====
# limit in outer block, WHERE clause in inline view
# but without the hint: should not apply optimization
with dp_view as
(select * from functional.alltypes_date_partition
where date_col = cast(timestamp_col as date))
select * from dp_view limit 10;
---- PLAN
PLAN-ROOT SINK
|
00:SCAN HDFS [functional.alltypes_date_partition]
HDFS partitions=55/55 files=55 size=27.27KB
predicates: date_col = CAST(timestamp_col AS DATE)
limit: 10
row-size=65B cardinality=10
====
# limit and partition pruning via range constant propagation:
# should not apply optimization since outer query predicate
# is not always true
with dp_view as
(select * from functional.alltypes_date_partition_2
where /* +always_true */ date_col = cast(timestamp_col as date))
select * from dp_view
where timestamp_col between '2009-01-01' and '2009-02-01'
limit 10;
---- PLAN
PLAN-ROOT SINK
|
00:SCAN HDFS [functional.alltypes_date_partition_2]
partition predicates: date_col >= DATE '2009-01-01' AND date_col <= DATE '2009-02-01'
HDFS partitions=32/50 files=32 size=17.41KB
predicates: functional.alltypes_date_partition_2.timestamp_col <= TIMESTAMP '2009-02-01 00:00:00', functional.alltypes_date_partition_2.timestamp_col >= TIMESTAMP '2009-01-01 00:00:00', date_col = CAST(timestamp_col AS DATE)
limit: 10
row-size=65B cardinality=10
====
# limit in outer block, WHERE and LIMIT clause in
# inline view
with dp_view as
(select * from functional.alltypes_date_partition_2
where /* +always_true */ date_col = cast(timestamp_col as date) limit 20)
select * from dp_view limit 10;
---- PLAN
PLAN-ROOT SINK
|
00:SCAN HDFS [functional.alltypes_date_partition_2]
HDFS partitions=20/50 files=20 size=10.85KB
predicates: date_col = CAST(timestamp_col AS DATE)
limit: 10
row-size=65B cardinality=10
====
# Join with limit: optimization should not be applied
select * from functional.alltypes_date_partition t1
inner join functional.alltypes t2
on t1.bigint_col = t2.bigint_col
limit 10;
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [INNER JOIN]
| hash predicates: t2.bigint_col = t1.bigint_col
| runtime filters: RF000 <- t1.bigint_col
| limit: 10
| row-size=154B cardinality=10
|
|--00:SCAN HDFS [functional.alltypes_date_partition t1]
| HDFS partitions=55/55 files=55 size=27.27KB
| row-size=65B cardinality=500
|
01:SCAN HDFS [functional.alltypes t2]
HDFS partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> t2.bigint_col
row-size=89B cardinality=7.30K
====
# Order-by with limit: optimization should not be applied
select * from functional.alltypes_date_partition
order by bigint_col limit 10;
---- PLAN
PLAN-ROOT SINK
|
01:TOP-N [LIMIT=10]
| order by: bigint_col ASC
| row-size=65B cardinality=10
|
00:SCAN HDFS [functional.alltypes_date_partition]
HDFS partitions=55/55 files=55 size=27.27KB
row-size=65B cardinality=500
====
# Order-by with limit in outer query, limit also present in
# from clause subquery: optimization should be applied
select * from (select * from functional.alltypes_date_partition limit 10) t
order by int_col limit 10;
---- PLAN
PLAN-ROOT SINK
|
01:TOP-N [LIMIT=10]
| order by: int_col ASC
| row-size=65B cardinality=10
|
00:SCAN HDFS [functional.alltypes_date_partition]
HDFS partitions=10/55 files=10 size=3.97KB
limit: 10
row-size=65B cardinality=10
====
# limit query on table with multiple files per partition
select * from functional.alltypesaggmultifiles limit 10;
---- PLAN
PLAN-ROOT SINK
|
00:SCAN HDFS [functional.alltypesaggmultifiles]
HDFS partitions=3/11 files=10 size=182.67KB
limit: 10
row-size=84B cardinality=10
====
# Special case of EXISTS subquery that gets transformed to have a
# limit 1 and then simple limit optimization should be applied
select * from functional.alltypestiny
where exists (select int_col from functional.alltypes_date_partition)
---- PLAN
PLAN-ROOT SINK
|
02:NESTED LOOP JOIN [LEFT SEMI JOIN]
| row-size=89B cardinality=8
|
|--01:SCAN HDFS [functional.alltypes_date_partition]
| HDFS partitions=1/55 files=1 size=257B
| limit: 1
| row-size=0B cardinality=1
|
00:SCAN HDFS [functional.alltypestiny]
HDFS partitions=4/4 files=4 size=460B
row-size=89B cardinality=8
====
# Query against a view that has WHERE clause hint.
# Both the num partitions and files are pruned.
select * from functional.alltypes_dp_2_view_1 limit 10;
---- PLAN
PLAN-ROOT SINK
|
00:SCAN HDFS [functional.alltypes_date_partition_2]
HDFS partitions=10/50 files=10 size=5.36KB
predicates: date_col = CAST(timestamp_col AS DATE)
limit: 10
row-size=65B cardinality=10
====
# IMPALA-10360
# Query against a view that has hints for table sampling
# and WHERE clause. Since table has a convert_limit_to_sample
# hint, number of files are pruned.
select * from functional.alltypes_dp_2_view_2 limit 10;
---- PLAN
PLAN-ROOT SINK
|
03:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: int_col = functional.alltypessmall.int_col
| runtime filters: RF000 <- functional.alltypessmall.int_col
| limit: 10
| row-size=65B cardinality=3
|
|--02:AGGREGATE [FINALIZE]
| | group by: functional.alltypessmall.int_col
| | row-size=4B cardinality=10
| |
| 01:SCAN HDFS [functional.alltypessmall]
| HDFS partitions=4/4 files=4 size=6.32KB
| row-size=4B cardinality=100
|
00:SCAN HDFS [functional.alltypes_date_partition_2]
HDFS partitions=3/50 files=3 size=1.65KB
predicates: date_col = CAST(timestamp_col AS DATE)
runtime filters: RF000 -> int_col
row-size=65B cardinality=3
====
# query against view but without the limit - no optimization
select * from functional.alltypes_dp_2_view_1;
---- PLAN
PLAN-ROOT SINK
|
00:SCAN HDFS [functional.alltypes_date_partition_2]
HDFS partitions=50/50 files=50 size=27.27KB
predicates: date_col = CAST(timestamp_col AS DATE)
row-size=65B cardinality=50
====
# query against view but without the limit - no optimization
select * from functional.alltypes_dp_2_view_2;
---- PLAN
PLAN-ROOT SINK
|
03:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: int_col = functional.alltypessmall.int_col
| runtime filters: RF000 <- functional.alltypessmall.int_col
| row-size=65B cardinality=50
|
|--02:AGGREGATE [FINALIZE]
| | group by: functional.alltypessmall.int_col
| | row-size=4B cardinality=10
| |
| 01:SCAN HDFS [functional.alltypessmall]
| HDFS partitions=4/4 files=4 size=6.32KB
| row-size=4B cardinality=100
|
00:SCAN HDFS [functional.alltypes_date_partition_2]
HDFS partitions=50/50 files=50 size=27.27KB
predicates: date_col = CAST(timestamp_col AS DATE)
runtime filters: RF000 -> int_col
row-size=65B cardinality=50
====
# Query against a base table that has convert_limit_to_sample hint
# and WHERE clause has always_true hint. Num files is pruned.
select * from functional.alltypes_date_partition_2 [convert_limit_to_sample(5)]
where [always_true] date_col = cast(timestamp_col as date)
and int_col in (select int_col from functional.alltypessmall)
limit 50;
---- PLAN
PLAN-ROOT SINK
|
03:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: int_col = functional.alltypessmall.int_col
| runtime filters: RF000 <- functional.alltypessmall.int_col
| limit: 50
| row-size=65B cardinality=3
|
|--02:AGGREGATE [FINALIZE]
| | group by: functional.alltypessmall.int_col
| | row-size=4B cardinality=10
| |
| 01:SCAN HDFS [functional.alltypessmall]
| HDFS partitions=4/4 files=4 size=6.32KB
| row-size=4B cardinality=100
|
00:SCAN HDFS [functional.alltypes_date_partition_2]
HDFS partitions=3/50 files=3 size=1.65KB
predicates: date_col = CAST(timestamp_col AS DATE)
runtime filters: RF000 -> int_col
row-size=65B cardinality=3
====
# Query against a base table that has convert_limit_to_sample hint
# and WHERE clause has always_true hint. Predicate on partition
# columns. Both num partitions and num files is pruned.
select * from functional.alltypes_date_partition_2 [convert_limit_to_sample(5)]
where [always_true] date_col = cast(timestamp_col as date)
and timestamp_col between '2009-01-01' and '2009-02-01'
limit 10;
---- PLAN
PLAN-ROOT SINK
|
00:SCAN HDFS [functional.alltypes_date_partition_2]
partition predicates: date_col >= DATE '2009-01-01' AND date_col <= DATE '2009-02-01'
HDFS partitions=2/50 files=2 size=1.07KB
predicates: timestamp_col <= TIMESTAMP '2009-02-01 00:00:00', timestamp_col >= TIMESTAMP '2009-01-01 00:00:00', date_col = CAST(timestamp_col AS DATE)
limit: 10
row-size=65B cardinality=2
====