blob: 2b372d826f187554ecc5bd99f17fe59ee27f4feb [file] [log] [blame]
# Test with aggregate expressions which ignore the distinct keyword.
select min(month), max(year), ndv(day) from functional.alltypesagg
---- PLAN
PLAN-ROOT SINK
|
01:AGGREGATE [FINALIZE]
| output: min(`month`), max(`year`), ndv(`day`)
| row-size=16B cardinality=1
|
00:UNION
constant-operands=11
row-size=12B cardinality=11
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
01:AGGREGATE [FINALIZE]
| output: min(`month`), max(`year`), ndv(`day`)
| row-size=16B cardinality=1
|
00:UNION
constant-operands=11
row-size=12B cardinality=11
====
# Test with explicit distinct keyword.
select count(distinct year), ndv(day) from functional.alltypesagg
---- PLAN
PLAN-ROOT SINK
|
02:AGGREGATE [FINALIZE]
| output: count(`year`), ndv:merge(`day`)
| row-size=16B cardinality=1
|
01:AGGREGATE
| output: ndv(`day`)
| group by: `year`
| row-size=12B cardinality=1
|
00:UNION
constant-operands=11
row-size=8B cardinality=11
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
02:AGGREGATE [FINALIZE]
| output: count(`year`), ndv:merge(`day`)
| row-size=16B cardinality=1
|
01:AGGREGATE
| output: ndv(`day`)
| group by: `year`
| row-size=12B cardinality=1
|
00:UNION
constant-operands=11
row-size=8B cardinality=11
====
# Test static partition pruning.
select min(month), max(day) from functional.alltypesagg where year = 2010 and day = 1;
---- PLAN
PLAN-ROOT SINK
|
01:AGGREGATE [FINALIZE]
| output: min(`month`), max(`day`)
| row-size=8B cardinality=1
|
00:UNION
constant-operands=1
row-size=8B cardinality=1
====
# Test with cases where all partitions are pruned.
select c1, c2 from
(select min(year) c1, max(month) c2, count(int_col) c3
from functional.alltypes where year = 2000) t;
---- PLAN
PLAN-ROOT SINK
|
01:AGGREGATE [FINALIZE]
| output: min(`year`), max(`month`)
| row-size=8B cardinality=0
|
00:UNION
row-size=8B cardinality=0
====
# Test with group by and having clauses.
select ndv(month) from functional.alltypesagg group by year having max(day)=10
---- PLAN
PLAN-ROOT SINK
|
01:AGGREGATE [FINALIZE]
| output: ndv(`month`), max(`day`)
| group by: `year`
| having: max(`day`) = 10
| row-size=16B cardinality=0
|
00:UNION
constant-operands=11
row-size=12B cardinality=11
====
# Test with group-by clauses (no aggregate expressions) only.
select month from functional.alltypes group by month
---- PLAN
PLAN-ROOT SINK
|
01:AGGREGATE [FINALIZE]
| group by: `month`
| row-size=4B cardinality=12
|
00:UNION
constant-operands=12
row-size=4B cardinality=12
====
# Test with distinct select list.
select distinct month from functional.alltypes where month % 2 = 0
---- PLAN
PLAN-ROOT SINK
|
01:AGGREGATE [FINALIZE]
| group by: `month`
| row-size=4B cardinality=6
|
00:UNION
constant-operands=6
row-size=4B cardinality=6
====
# Test with joins on the partition keys.
select min(a.month)
from functional.alltypes as a, functional.alltypesagg as b
where a.year = b.year
---- PLAN
PLAN-ROOT SINK
|
03:AGGREGATE [FINALIZE]
| output: min(a.`month`)
| row-size=4B cardinality=1
|
02:HASH JOIN [INNER JOIN]
| hash predicates: a.`year` = b.`year`
| row-size=12B cardinality=24
|
|--01:UNION
| constant-operands=1
| row-size=4B cardinality=1
|
00:UNION
constant-operands=24
row-size=8B cardinality=24
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
03:AGGREGATE [FINALIZE]
| output: min(a.`month`)
| row-size=4B cardinality=1
|
02:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: a.`year` = b.`year`
| row-size=12B cardinality=24
|
|--04:EXCHANGE [UNPARTITIONED]
| |
| 01:UNION
| constant-operands=1
| row-size=4B cardinality=1
|
00:UNION
constant-operands=24
row-size=8B cardinality=24
====
# Test query which contains both distinct and non-distinct aggregate
# expressions and make sure the optimization is applied when applicable.
select * from
(select distinct year from functional.alltypes) a join
(select year, count(month) from functional.alltypes group by year) b
on (a.year = b.year)
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [INNER JOIN]
| hash predicates: `year` = `year`
| runtime filters: RF000 <- `year`
| row-size=16B cardinality=4
|
|--01:AGGREGATE [FINALIZE]
| | group by: `year`
| | row-size=4B cardinality=2
| |
| 00:UNION
| constant-operands=2
| row-size=4B cardinality=2
|
03:AGGREGATE [FINALIZE]
| output: count(`month`)
| group by: `year`
| row-size=12B cardinality=2
|
02:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> functional.alltypes.year
row-size=8B cardinality=7.30K
====
# Test queries with tableRefs which cannot be evaluated by metadata.
select min(a.year), ndv(b.timestamp_col) from
functional.alltypes a, functional.alltypesnopart b
---- PLAN
PLAN-ROOT SINK
|
03:AGGREGATE [FINALIZE]
| output: min(a.`year`), ndv(b.timestamp_col)
| row-size=12B cardinality=0
|
02:NESTED LOOP JOIN [CROSS JOIN]
| row-size=20B cardinality=0
|
|--01:SCAN HDFS [functional.alltypesnopart b]
| partitions=1/1 files=0 size=0B
| row-size=16B cardinality=0
|
00:UNION
constant-operands=2
row-size=4B cardinality=2
====
# Test that non-partitioning slots which aren't materialized won't block the
# optimization from being applied.
select c1, c2 from
(select ndv(a.year + b.year) c1, min(a.month + b.month) c2, count(a.int_col) c3 from
functional.alltypes a, functional.alltypesagg b) t
---- PLAN
PLAN-ROOT SINK
|
03:AGGREGATE [FINALIZE]
| output: ndv(a.`year` + b.`year`), min(a.`month` + b.`month`)
| row-size=16B cardinality=1
|
02:NESTED LOOP JOIN [CROSS JOIN]
| row-size=16B cardinality=24
|
|--01:UNION
| constant-operands=1
| row-size=8B cardinality=1
|
00:UNION
constant-operands=24
row-size=8B cardinality=24
====
# IMPALA-2948. Unmaterialized slots won't block the optimization (the hash join version).
select t1.int_col
from functional.alltypestiny t1 inner join
(select count(t2.tinyint_col) = 1337 as boolean_col,
min(t2.year) as int_col from functional.alltypestiny t2) t3
on (t1.int_col = t3.int_col)
---- PLAN
PLAN-ROOT SINK
|
03:HASH JOIN [INNER JOIN]
| hash predicates: t1.int_col = min(t2.`year`)
| runtime filters: RF000 <- min(t2.`year`)
| row-size=8B cardinality=8
|
|--02:AGGREGATE [FINALIZE]
| | output: min(t2.`year`)
| | row-size=4B cardinality=1
| |
| 01:UNION
| constant-operands=1
| row-size=4B cardinality=1
|
00:SCAN HDFS [functional.alltypestiny t1]
partitions=4/4 files=4 size=460B
runtime filters: RF000 -> t1.int_col
row-size=4B cardinality=8
====
# Test with with clauses on the partition keys.
with c1 as (select distinct month from functional.alltypes),
c2 as (select distinct year from functional.alltypes)
select ndv(month) from (select * from c1 union all select * from c2) t
---- PLAN
PLAN-ROOT SINK
|
05:AGGREGATE [FINALIZE]
| output: ndv(month)
| row-size=8B cardinality=1
|
00:UNION
| row-size=4B cardinality=14
|
|--04:AGGREGATE [FINALIZE]
| | group by: `year`
| | row-size=4B cardinality=2
| |
| 03:UNION
| constant-operands=2
| row-size=4B cardinality=2
|
02:AGGREGATE [FINALIZE]
| group by: `month`
| row-size=4B cardinality=12
|
01:UNION
constant-operands=12
row-size=4B cardinality=12
====
# If slots other than partition keys are accessed, make sure scan nodes are generated.
select date_string_col, min(month) from functional.alltypes group by date_string_col
---- PLAN
PLAN-ROOT SINK
|
01:AGGREGATE [FINALIZE]
| output: min(`month`)
| group by: date_string_col
| row-size=24B cardinality=736
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
row-size=24B cardinality=7.30K
====
# Make sure non-distinct aggregation functions will generate scan nodes.
select count(month) from functional.alltypes
---- PLAN
PLAN-ROOT SINK
|
01:AGGREGATE [FINALIZE]
| output: count(`month`)
| row-size=8B cardinality=1
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
row-size=4B cardinality=7.30K
====
# Make sure that queries without any aggregation will generate scan nodes.
select month from functional.alltypes order by year
---- PLAN
PLAN-ROOT SINK
|
01:SORT
| order by: year ASC
| row-size=8B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
row-size=8B cardinality=7.30K
====