blob: 7d10dc6f6c922b3887b4496c8eb34b1e924e5cba [file] [log] [blame]
# subquery with aggregation and order by/limit, as left-hand side of join;
# having clause in subquery is transfered to merge agg step in distrib plan
select *
from (
select int_col, count(*)
from functional.alltypessmall
where month = 1
group by int_col
having count(*) > 1
order by count(*) desc limit 5
) t1
join functional.alltypes t2 on (t1.int_col = t2.int_col)
where month = 1
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [INNER JOIN]
| hash predicates: t2.int_col = int_col
| runtime filters: RF000 <- int_col
|
|--02:TOP-N [LIMIT=5]
| | order by: count(*) DESC
| |
| 01:AGGREGATE [FINALIZE]
| | output: count(*)
| | group by: int_col
| | having: count(*) > 1
| |
| 00:SCAN HDFS [functional.alltypessmall]
| partitions=1/4 files=1 size=1.57KB
|
03:SCAN HDFS [functional.alltypes t2]
partitions=2/24 files=2 size=40.32KB
runtime filters: RF000 -> t2.int_col
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
09:EXCHANGE [UNPARTITIONED]
|
04:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: t2.int_col = int_col
| runtime filters: RF000 <- int_col
|
|--08:EXCHANGE [BROADCAST]
| |
| 07:MERGING-EXCHANGE [UNPARTITIONED]
| | order by: count(*) DESC
| | limit: 5
| |
| 02:TOP-N [LIMIT=5]
| | order by: count(*) DESC
| |
| 06:AGGREGATE [FINALIZE]
| | output: count:merge(*)
| | group by: int_col
| | having: count(*) > 1
| |
| 05:EXCHANGE [HASH(int_col)]
| |
| 01:AGGREGATE [STREAMING]
| | output: count(*)
| | group by: int_col
| |
| 00:SCAN HDFS [functional.alltypessmall]
| partitions=1/4 files=1 size=1.57KB
|
03:SCAN HDFS [functional.alltypes t2]
partitions=2/24 files=2 size=40.32KB
runtime filters: RF000 -> t2.int_col
====
# simple full scan subquery
select * from (select y x from (select id y from functional_hbase.alltypessmall) a) b
---- PLAN
PLAN-ROOT SINK
|
00:SCAN HBASE [functional_hbase.alltypessmall]
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
01:EXCHANGE [UNPARTITIONED]
|
00:SCAN HBASE [functional_hbase.alltypessmall]
====
# subquery doing join
select * from (select t2.*
from functional.testtbl t1 join functional.testtbl t2 using(id)
where t1.zip = 94611) x
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [INNER JOIN]
| hash predicates: t1.id = t2.id
| runtime filters: RF000 <- t2.id
|
|--01:SCAN HDFS [functional.testtbl t2]
| partitions=1/1 files=0 size=0B
|
00:SCAN HDFS [functional.testtbl t1]
partitions=1/1 files=0 size=0B
predicates: t1.zip = 94611
runtime filters: RF000 -> t1.id
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
04:EXCHANGE [UNPARTITIONED]
|
02:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: t1.id = t2.id
| runtime filters: RF000 <- t2.id
|
|--03:EXCHANGE [BROADCAST]
| |
| 01:SCAN HDFS [functional.testtbl t2]
| partitions=1/1 files=0 size=0B
|
00:SCAN HDFS [functional.testtbl t1]
partitions=1/1 files=0 size=0B
predicates: t1.zip = 94611
runtime filters: RF000 -> t1.id
====
# subquery doing join
# multiple join predicates;
# scan predicates get propagated correctly;
# non-eq join predicates are evaluated as extra conjuncts by the join node
select *
from
(select a.*
from functional.alltypesagg a
right outer join functional.alltypessmall b using (id, int_col)
where a.day >= 6
and b.month > 2
and a.tinyint_col = 15
and b.string_col = '15'
and a.tinyint_col + b.tinyint_col < 15) x
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [RIGHT OUTER JOIN]
| hash predicates: a.id = b.id, a.int_col = b.int_col
| other predicates: a.tinyint_col = 15, a.day >= 6, a.tinyint_col + b.tinyint_col < 15
| runtime filters: RF000 <- b.id, RF001 <- b.int_col
|
|--01:SCAN HDFS [functional.alltypessmall b]
| partitions=2/4 files=2 size=3.17KB
| predicates: b.string_col = '15'
|
00:SCAN HDFS [functional.alltypesagg a]
partitions=5/11 files=5 size=372.38KB
predicates: a.tinyint_col = 15
runtime filters: RF000 -> a.id, RF001 -> a.int_col
---- SCANRANGELOCATIONS
NODE 0:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=10/100110.txt 0:76263
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=6/100106.txt 0:76263
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=7/100107.txt 0:76263
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=8/100108.txt 0:76263
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=9/100109.txt 0:76263
NODE 1:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=3/090301.txt 0:1620
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=4/090401.txt 0:1621
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
05:EXCHANGE [UNPARTITIONED]
|
02:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED]
| hash predicates: a.id = b.id, a.int_col = b.int_col
| other predicates: a.tinyint_col = 15, a.day >= 6, a.tinyint_col + b.tinyint_col < 15
| runtime filters: RF000 <- b.id, RF001 <- b.int_col
|
|--04:EXCHANGE [HASH(b.id,b.int_col)]
| |
| 01:SCAN HDFS [functional.alltypessmall b]
| partitions=2/4 files=2 size=3.17KB
| predicates: b.string_col = '15'
|
03:EXCHANGE [HASH(a.id,a.int_col)]
|
00:SCAN HDFS [functional.alltypesagg a]
partitions=5/11 files=5 size=372.38KB
predicates: a.tinyint_col = 15
runtime filters: RF000 -> a.id, RF001 -> a.int_col
====
# predicate pushdown
select * from (select * from functional_hbase.alltypessmall) a where id < 5
---- PLAN
PLAN-ROOT SINK
|
00:SCAN HBASE [functional_hbase.alltypessmall]
predicates: functional_hbase.alltypessmall.id < 5
====
# subquery join
# multiple join predicates;
# scan predicates get propagated correctly;
# non-eq join predicates are evaluated as extra conjuncts by the join node
select *
from
(select id, int_col, day, tinyint_col from functional.alltypesagg) a
right outer join
(select id, int_col, month, string_col, tinyint_col
from functional.alltypessmall) b using (id, int_col)
where a.day >= 6
and b.month > 2
and a.tinyint_col = 15
and b.string_col = '15'
and a.tinyint_col + b.tinyint_col < 15
and b.id + 15 = 27
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [RIGHT OUTER JOIN]
| hash predicates: id = id, int_col = int_col
| other predicates: tinyint_col = 15, day >= 6, tinyint_col + tinyint_col < 15
| runtime filters: RF000 <- id, RF001 <- int_col
|
|--01:SCAN HDFS [functional.alltypessmall]
| partitions=2/4 files=2 size=3.17KB
| predicates: functional.alltypessmall.id + 15 = 27, functional.alltypessmall.string_col = '15'
|
00:SCAN HDFS [functional.alltypesagg]
partitions=5/11 files=5 size=372.38KB
predicates: functional.alltypesagg.tinyint_col = 15, functional.alltypesagg.id + 15 = 27
runtime filters: RF000 -> id, RF001 -> int_col
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
05:EXCHANGE [UNPARTITIONED]
|
02:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED]
| hash predicates: id = id, int_col = int_col
| other predicates: tinyint_col = 15, day >= 6, tinyint_col + tinyint_col < 15
| runtime filters: RF000 <- id, RF001 <- int_col
|
|--04:EXCHANGE [HASH(id,int_col)]
| |
| 01:SCAN HDFS [functional.alltypessmall]
| partitions=2/4 files=2 size=3.17KB
| predicates: functional.alltypessmall.id + 15 = 27, functional.alltypessmall.string_col = '15'
|
03:EXCHANGE [HASH(id,int_col)]
|
00:SCAN HDFS [functional.alltypesagg]
partitions=5/11 files=5 size=372.38KB
predicates: functional.alltypesagg.tinyint_col = 15, functional.alltypesagg.id + 15 = 27
runtime filters: RF000 -> id, RF001 -> int_col
====
# subquery join
# multiple join predicates;
# scan predicates get propagated correctly;
# non-eq join predicates are evaluated as extra conjuncts by the join node
select *
from
(select id, int_col, day, tinyint_col
from
(select id, int_col, day, tinyint_col from functional.alltypesagg) a0
where a0.day >= 6) a
right outer join
(select id, int_col, month, string_col, tinyint_col from functional.alltypessmall) b
using (id, int_col)
where b.month > 2
and a.tinyint_col = 15
and b.string_col = '15'
and a.tinyint_col + b.tinyint_col < 15
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [RIGHT OUTER JOIN]
| hash predicates: id = id, int_col = int_col
| other predicates: tinyint_col = 15, tinyint_col + tinyint_col < 15
| runtime filters: RF000 <- id, RF001 <- int_col
|
|--01:SCAN HDFS [functional.alltypessmall]
| partitions=2/4 files=2 size=3.17KB
| predicates: functional.alltypessmall.string_col = '15'
|
00:SCAN HDFS [functional.alltypesagg]
partitions=5/11 files=5 size=372.38KB
predicates: functional.alltypesagg.tinyint_col = 15
runtime filters: RF000 -> id, RF001 -> int_col
---- SCANRANGELOCATIONS
NODE 0:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=10/100110.txt 0:76263
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=6/100106.txt 0:76263
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=7/100107.txt 0:76263
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=8/100108.txt 0:76263
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=9/100109.txt 0:76263
NODE 1:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=3/090301.txt 0:1620
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=4/090401.txt 0:1621
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
05:EXCHANGE [UNPARTITIONED]
|
02:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED]
| hash predicates: id = id, int_col = int_col
| other predicates: tinyint_col = 15, tinyint_col + tinyint_col < 15
| runtime filters: RF000 <- id, RF001 <- int_col
|
|--04:EXCHANGE [HASH(id,int_col)]
| |
| 01:SCAN HDFS [functional.alltypessmall]
| partitions=2/4 files=2 size=3.17KB
| predicates: functional.alltypessmall.string_col = '15'
|
03:EXCHANGE [HASH(id,int_col)]
|
00:SCAN HDFS [functional.alltypesagg]
partitions=5/11 files=5 size=372.38KB
predicates: functional.alltypesagg.tinyint_col = 15
runtime filters: RF000 -> id, RF001 -> int_col
====
# complex join, having joined subquery on the rhs, and predicate
# at multiple subquery level. This tests that both sides of a join
# that is itself on the build side of another join get compacted.
select x.smallint_col, x.id, x.tinyint_col, c.id, x.int_col, x.float_col, c.string_col
from functional.alltypessmall c
join (
select a.smallint_col smallint_col, a.tinyint_col tinyint_col, a.day day,
a.int_col int_col, a.month month, b.float_col float_col, b.id id
from ( select * from functional.alltypesagg a where month=1 ) a
join functional.alltypessmall b on (a.smallint_col = b.id)
) x on (x.tinyint_col = c.id)
where x.day=1
and x.int_col > 899
and x.float_col > 4.5
and c.string_col < '7'
and x.int_col + x.float_col + cast(c.string_col as float) < 1000
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [INNER JOIN]
| hash predicates: c.id = a.tinyint_col
| other predicates: a.int_col + b.float_col + CAST(c.string_col AS FLOAT) < 1000
| runtime filters: RF000 <- a.tinyint_col
|
|--03:HASH JOIN [INNER JOIN]
| | hash predicates: a.smallint_col = b.id
| | runtime filters: RF001 <- b.id
| |
| |--02:SCAN HDFS [functional.alltypessmall b]
| | partitions=4/4 files=4 size=6.32KB
| | predicates: b.float_col > 4.5
| |
| 01:SCAN HDFS [functional.alltypesagg a]
| partitions=1/11 files=1 size=73.39KB
| predicates: a.int_col > 899
| runtime filters: RF001 -> a.smallint_col
|
00:SCAN HDFS [functional.alltypessmall c]
partitions=4/4 files=4 size=6.32KB
predicates: c.string_col < '7'
runtime filters: RF000 -> c.id
---- SCANRANGELOCATIONS
NODE 0:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=1/090101.txt 0:1610
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=2/090201.txt 0:1621
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=3/090301.txt 0:1620
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=4/090401.txt 0:1621
NODE 1:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=1/100101.txt 0:75153
NODE 2:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=1/090101.txt 0:1610
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=2/090201.txt 0:1621
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=3/090301.txt 0:1620
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=4/090401.txt 0:1621
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
09:EXCHANGE [UNPARTITIONED]
|
04:HASH JOIN [INNER JOIN, PARTITIONED]
| hash predicates: c.id = a.tinyint_col
| other predicates: a.int_col + b.float_col + CAST(c.string_col AS FLOAT) < 1000
| runtime filters: RF000 <- a.tinyint_col
|
|--08:EXCHANGE [HASH(a.tinyint_col)]
| |
| 03:HASH JOIN [INNER JOIN, PARTITIONED]
| | hash predicates: b.id = a.smallint_col
| | runtime filters: RF001 <- a.smallint_col
| |
| |--06:EXCHANGE [HASH(a.smallint_col)]
| | |
| | 01:SCAN HDFS [functional.alltypesagg a]
| | partitions=1/11 files=1 size=73.39KB
| | predicates: a.int_col > 899
| |
| 05:EXCHANGE [HASH(b.id)]
| |
| 02:SCAN HDFS [functional.alltypessmall b]
| partitions=4/4 files=4 size=6.32KB
| predicates: b.float_col > 4.5
| runtime filters: RF001 -> b.id
|
07:EXCHANGE [HASH(c.id)]
|
00:SCAN HDFS [functional.alltypessmall c]
partitions=4/4 files=4 size=6.32KB
predicates: c.string_col < '7'
runtime filters: RF000 -> c.id
====
# with grouping
select tinyint_col, count(*), min(tinyint_col), max(tinyint_col), sum(tinyint_col),
avg(tinyint_col)
from (select * from functional.alltypesagg) a
group by 1
---- PLAN
PLAN-ROOT SINK
|
01:AGGREGATE [FINALIZE]
| output: count(*), min(functional.alltypesagg.tinyint_col), max(functional.alltypesagg.tinyint_col), sum(functional.alltypesagg.tinyint_col), avg(functional.alltypesagg.tinyint_col)
| group by: functional.alltypesagg.tinyint_col
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 files=11 size=814.73KB
---- SCANRANGELOCATIONS
NODE 0:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=1/100101.txt 0:75153
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=10/100110.txt 0:76263
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=2/100102.txt 0:76263
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=3/100103.txt 0:76263
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=4/100104.txt 0:76263
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=5/100105.txt 0:76263
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=6/100106.txt 0:76263
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=7/100107.txt 0:76263
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=8/100108.txt 0:76263
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=9/100109.txt 0:76263
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=__HIVE_DEFAULT_PARTITION__/000000_0 0:72759
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
04:EXCHANGE [UNPARTITIONED]
|
03:AGGREGATE [FINALIZE]
| output: count:merge(*), min:merge(tinyint_col), max:merge(tinyint_col), sum:merge(tinyint_col), avg:merge(tinyint_col)
| group by: tinyint_col
|
02:EXCHANGE [HASH(tinyint_col)]
|
01:AGGREGATE [STREAMING]
| output: count(*), min(functional.alltypesagg.tinyint_col), max(functional.alltypesagg.tinyint_col), sum(functional.alltypesagg.tinyint_col), avg(functional.alltypesagg.tinyint_col)
| group by: functional.alltypesagg.tinyint_col
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 files=11 size=814.73KB
====
# with grouping
select * from (
select tinyint_col, count(*), min(tinyint_col), max(tinyint_col), sum(tinyint_col),
avg(tinyint_col)
from functional.alltypesagg
group by 1
) a
---- PLAN
PLAN-ROOT SINK
|
01:AGGREGATE [FINALIZE]
| output: count(*), min(tinyint_col), max(tinyint_col), sum(tinyint_col), avg(tinyint_col)
| group by: tinyint_col
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 files=11 size=814.73KB
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
04:EXCHANGE [UNPARTITIONED]
|
03:AGGREGATE [FINALIZE]
| output: count:merge(*), min:merge(tinyint_col), max:merge(tinyint_col), sum:merge(tinyint_col), avg:merge(tinyint_col)
| group by: tinyint_col
|
02:EXCHANGE [HASH(tinyint_col)]
|
01:AGGREGATE [STREAMING]
| output: count(*), min(tinyint_col), max(tinyint_col), sum(tinyint_col), avg(tinyint_col)
| group by: tinyint_col
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 files=11 size=814.73KB
====
select c1, c2, c3
from
(select c1, c2, c3
from
(select int_col c1, sum(float_col) c2, min(float_col) c3
from functional_hbase.alltypessmall
group by 1) x
order by 2,3 desc
limit 5
) y
---- PLAN
PLAN-ROOT SINK
|
02:TOP-N [LIMIT=5]
| order by: c2 ASC, c3 DESC
|
01:AGGREGATE [FINALIZE]
| output: sum(float_col), min(float_col)
| group by: int_col
|
00:SCAN HBASE [functional_hbase.alltypessmall]
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
05:MERGING-EXCHANGE [UNPARTITIONED]
| order by: c2 ASC, c3 DESC
| limit: 5
|
02:TOP-N [LIMIT=5]
| order by: c2 ASC, c3 DESC
|
04:AGGREGATE [FINALIZE]
| output: sum:merge(float_col), min:merge(float_col)
| group by: int_col
|
03:EXCHANGE [HASH(int_col)]
|
01:AGGREGATE [STREAMING]
| output: sum(float_col), min(float_col)
| group by: int_col
|
00:SCAN HBASE [functional_hbase.alltypessmall]
====
select c1, x2
from (
select c1, min(c2) x2
from (
select c1, c2, c3
from (
select int_col c1, tinyint_col c2, min(float_col) c3
from functional_hbase.alltypessmall
group by 1, 2
order by 1,2
limit 1
) x
) x2
group by c1
) y
order by 2,1 desc
limit 0
---- PLAN
PLAN-ROOT SINK
|
00:EMPTYSET
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
00:EMPTYSET
====
# distinct *
select distinct *
from (select distinct * from functional.testtbl) x
---- PLAN
PLAN-ROOT SINK
|
02:AGGREGATE [FINALIZE]
| group by: functional.testtbl.id, functional.testtbl.name, functional.testtbl.zip
|
01:AGGREGATE [FINALIZE]
| group by: functional.testtbl.id, functional.testtbl.name, functional.testtbl.zip
|
00:SCAN HDFS [functional.testtbl]
partitions=1/1 files=0 size=0B
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
05:EXCHANGE [UNPARTITIONED]
|
02:AGGREGATE [FINALIZE]
| group by: functional.testtbl.id, functional.testtbl.name, functional.testtbl.zip
|
04:AGGREGATE [FINALIZE]
| group by: functional.testtbl.id, functional.testtbl.name, functional.testtbl.zip
|
03:EXCHANGE [HASH(functional.testtbl.id,functional.testtbl.name,functional.testtbl.zip)]
|
01:AGGREGATE [STREAMING]
| group by: functional.testtbl.id, functional.testtbl.name, functional.testtbl.zip
|
00:SCAN HDFS [functional.testtbl]
partitions=1/1 files=0 size=0B
====
# distinct w/ explicit select list
select distinct id, zip
from (select distinct * from functional.testtbl) x
---- PLAN
PLAN-ROOT SINK
|
02:AGGREGATE [FINALIZE]
| group by: functional.testtbl.id, functional.testtbl.zip
|
01:AGGREGATE [FINALIZE]
| group by: functional.testtbl.id, functional.testtbl.name, functional.testtbl.zip
|
00:SCAN HDFS [functional.testtbl]
partitions=1/1 files=0 size=0B
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
07:EXCHANGE [UNPARTITIONED]
|
06:AGGREGATE [FINALIZE]
| group by: id, zip
|
05:EXCHANGE [HASH(id,zip)]
|
02:AGGREGATE [STREAMING]
| group by: functional.testtbl.id, functional.testtbl.zip
|
04:AGGREGATE [FINALIZE]
| group by: functional.testtbl.id, functional.testtbl.name, functional.testtbl.zip
|
03:EXCHANGE [HASH(functional.testtbl.id,functional.testtbl.name,functional.testtbl.zip)]
|
01:AGGREGATE [STREAMING]
| group by: functional.testtbl.id, functional.testtbl.name, functional.testtbl.zip
|
00:SCAN HDFS [functional.testtbl]
partitions=1/1 files=0 size=0B
====
# aggregate with group-by, having
select *
from (
select int_col % 7 c1, count(*) c2, avg(int_col) c3
from (
select * from functional.alltypesagg
) a
group by 1
having avg(int_col) > 500 or count(*) = 10
) b
where c1 is not null
and c2 > 10
---- PLAN
PLAN-ROOT SINK
|
01:AGGREGATE [FINALIZE]
| output: count(*), avg(functional.alltypesagg.int_col)
| group by: functional.alltypesagg.int_col % 7
| having: int_col % 7 IS NOT NULL, count(*) > 10, avg(int_col) > 500 OR count(*) = 10
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 files=11 size=814.73KB
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
04:EXCHANGE [UNPARTITIONED]
|
03:AGGREGATE [FINALIZE]
| output: count:merge(*), avg:merge(int_col)
| group by: int_col % 7
| having: int_col % 7 IS NOT NULL, count(*) > 10, avg(int_col) > 500 OR count(*) = 10
|
02:EXCHANGE [HASH(int_col % 7)]
|
01:AGGREGATE [STREAMING]
| output: count(*), avg(functional.alltypesagg.int_col)
| group by: functional.alltypesagg.int_col % 7
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 files=11 size=814.73KB
====
# subquery with left outer join
select j.*, d.*
from (
select *
from functional.JoinTbl a
) j
left outer join
(
select *
from functional.DimTbl b
) d
on (j.test_name = d.name)
where j.test_id <= 1006
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [RIGHT OUTER JOIN]
| hash predicates: b.name = a.test_name
| runtime filters: RF000 <- a.test_name
|
|--00:SCAN HDFS [functional.jointbl a]
| partitions=1/1 files=1 size=433B
| predicates: a.test_id <= 1006
|
01:SCAN HDFS [functional.dimtbl b]
partitions=1/1 files=1 size=171B
runtime filters: RF000 -> b.name
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
05:EXCHANGE [UNPARTITIONED]
|
02:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED]
| hash predicates: b.name = a.test_name
| runtime filters: RF000 <- a.test_name
|
|--04:EXCHANGE [HASH(a.test_name)]
| |
| 00:SCAN HDFS [functional.jointbl a]
| partitions=1/1 files=1 size=433B
| predicates: a.test_id <= 1006
|
03:EXCHANGE [HASH(b.name)]
|
01:SCAN HDFS [functional.dimtbl b]
partitions=1/1 files=1 size=171B
runtime filters: RF000 -> b.name
====
# complex join, having joined subquery on the rhs, and predicate
# at multiple subquery level
select x.smallint_col, count(x.id)
from functional.alltypessmall c
left outer join
(
select a.smallint_col smallint_col, a.tinyint_col tinyint_col, a.day day,
a.int_col int_col, a.month month, b.float_col float_col, b.id id
from (
select *
from functional.alltypesagg a
) a
join
functional.alltypessmall b
on (a.smallint_col = b.id)
) x
on (x.tinyint_col = c.id)
group by x.smallint_col
---- PLAN
PLAN-ROOT SINK
|
05:AGGREGATE [FINALIZE]
| output: count(b.id)
| group by: a.smallint_col
|
04:HASH JOIN [RIGHT OUTER JOIN]
| hash predicates: a.tinyint_col = c.id
| runtime filters: RF000 <- c.id
|
|--00:SCAN HDFS [functional.alltypessmall c]
| partitions=4/4 files=4 size=6.32KB
|
03:HASH JOIN [INNER JOIN]
| hash predicates: a.smallint_col = b.id
| runtime filters: RF001 <- b.id
|
|--02:SCAN HDFS [functional.alltypessmall b]
| partitions=4/4 files=4 size=6.32KB
|
01:SCAN HDFS [functional.alltypesagg a]
partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> a.tinyint_col, RF001 -> a.smallint_col
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
11:EXCHANGE [UNPARTITIONED]
|
10:AGGREGATE [FINALIZE]
| output: count:merge(x.id)
| group by: x.smallint_col
|
09:EXCHANGE [HASH(x.smallint_col)]
|
05:AGGREGATE [STREAMING]
| output: count(b.id)
| group by: a.smallint_col
|
04:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED]
| hash predicates: a.tinyint_col = c.id
| runtime filters: RF000 <- c.id
|
|--08:EXCHANGE [HASH(c.id)]
| |
| 00:SCAN HDFS [functional.alltypessmall c]
| partitions=4/4 files=4 size=6.32KB
|
07:EXCHANGE [HASH(a.tinyint_col)]
|
03:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: a.smallint_col = b.id
| runtime filters: RF001 <- b.id
|
|--06:EXCHANGE [BROADCAST]
| |
| 02:SCAN HDFS [functional.alltypessmall b]
| partitions=4/4 files=4 size=6.32KB
|
01:SCAN HDFS [functional.alltypesagg a]
partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> a.tinyint_col, RF001 -> a.smallint_col
====
# complex join, having joined subquery on the lhs, and predicate
# at multiple subquery level
select x.smallint_col, x.id, x.tinyint_col, c.id, x.int_col, x.float_col, c.string_col
from
(
select a.smallint_col smallint_col, a.tinyint_col tinyint_col, a.day day,
a.int_col int_col, a.month month, b.float_col float_col, b.id id
from (
select *
from functional.alltypesagg a
where month=1
) a
join
functional.alltypessmall b
on (a.smallint_col = b.id)
) x
join
functional.alltypessmall c
on (x.tinyint_col = c.id)
where x.day=1
and x.int_col > 899
and x.float_col > 4.5
and c.string_col < '7'
and x.int_col + x.float_col + CAST(c.string_col AS FLOAT) < 1000
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [INNER JOIN]
| hash predicates: c.id = a.tinyint_col
| other predicates: a.int_col + b.float_col + CAST(c.string_col AS FLOAT) < 1000
| runtime filters: RF000 <- a.tinyint_col
|
|--02:HASH JOIN [INNER JOIN]
| | hash predicates: a.smallint_col = b.id
| | runtime filters: RF001 <- b.id
| |
| |--01:SCAN HDFS [functional.alltypessmall b]
| | partitions=4/4 files=4 size=6.32KB
| | predicates: b.float_col > 4.5
| |
| 00:SCAN HDFS [functional.alltypesagg a]
| partitions=1/11 files=1 size=73.39KB
| predicates: a.int_col > 899
| runtime filters: RF001 -> a.smallint_col
|
03:SCAN HDFS [functional.alltypessmall c]
partitions=4/4 files=4 size=6.32KB
predicates: c.string_col < '7'
runtime filters: RF000 -> c.id
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
09:EXCHANGE [UNPARTITIONED]
|
04:HASH JOIN [INNER JOIN, PARTITIONED]
| hash predicates: c.id = a.tinyint_col
| other predicates: a.int_col + b.float_col + CAST(c.string_col AS FLOAT) < 1000
| runtime filters: RF000 <- a.tinyint_col
|
|--08:EXCHANGE [HASH(a.tinyint_col)]
| |
| 02:HASH JOIN [INNER JOIN, PARTITIONED]
| | hash predicates: b.id = a.smallint_col
| | runtime filters: RF001 <- a.smallint_col
| |
| |--06:EXCHANGE [HASH(a.smallint_col)]
| | |
| | 00:SCAN HDFS [functional.alltypesagg a]
| | partitions=1/11 files=1 size=73.39KB
| | predicates: a.int_col > 899
| |
| 05:EXCHANGE [HASH(b.id)]
| |
| 01:SCAN HDFS [functional.alltypessmall b]
| partitions=4/4 files=4 size=6.32KB
| predicates: b.float_col > 4.5
| runtime filters: RF001 -> b.id
|
07:EXCHANGE [HASH(c.id)]
|
03:SCAN HDFS [functional.alltypessmall c]
partitions=4/4 files=4 size=6.32KB
predicates: c.string_col < '7'
runtime filters: RF000 -> c.id
====
# complex join, having joined aggregate subquery on the rhs, and predicate
# at multiple subquery level
select x.smallint_col, sum(x.cnt)
from functional.alltypessmall c
join (
select count(a.id) cnt, b.smallint_col smallint_col
from ( select * from functional.alltypesagg a ) a
join functional.alltypessmall b on (a.smallint_col = b.id)
group by b.smallint_col
) x on (x.smallint_col = c.id)
group by x.smallint_col
---- PLAN
PLAN-ROOT SINK
|
06:AGGREGATE [FINALIZE]
| output: sum(count(a.id))
| group by: b.smallint_col
|
05:HASH JOIN [INNER JOIN]
| hash predicates: c.id = b.smallint_col
| runtime filters: RF000 <- b.smallint_col
|
|--04:AGGREGATE [FINALIZE]
| | output: count(a.id)
| | group by: b.smallint_col
| |
| 03:HASH JOIN [INNER JOIN]
| | hash predicates: a.smallint_col = b.id
| | runtime filters: RF001 <- b.id
| |
| |--02:SCAN HDFS [functional.alltypessmall b]
| | partitions=4/4 files=4 size=6.32KB
| |
| 01:SCAN HDFS [functional.alltypesagg a]
| partitions=11/11 files=11 size=814.73KB
| runtime filters: RF001 -> a.smallint_col
|
00:SCAN HDFS [functional.alltypessmall c]
partitions=4/4 files=4 size=6.32KB
runtime filters: RF000 -> c.id
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
13:EXCHANGE [UNPARTITIONED]
|
12:AGGREGATE [FINALIZE]
| output: sum:merge(x.cnt)
| group by: x.smallint_col
|
11:EXCHANGE [HASH(x.smallint_col)]
|
06:AGGREGATE [STREAMING]
| output: sum(count(a.id))
| group by: b.smallint_col
|
05:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: c.id = b.smallint_col
| runtime filters: RF000 <- b.smallint_col
|
|--10:EXCHANGE [BROADCAST]
| |
| 09:AGGREGATE [FINALIZE]
| | output: count:merge(a.id)
| | group by: b.smallint_col
| |
| 08:EXCHANGE [HASH(b.smallint_col)]
| |
| 04:AGGREGATE [STREAMING]
| | output: count(a.id)
| | group by: b.smallint_col
| |
| 03:HASH JOIN [INNER JOIN, BROADCAST]
| | hash predicates: a.smallint_col = b.id
| | runtime filters: RF001 <- b.id
| |
| |--07:EXCHANGE [BROADCAST]
| | |
| | 02:SCAN HDFS [functional.alltypessmall b]
| | partitions=4/4 files=4 size=6.32KB
| |
| 01:SCAN HDFS [functional.alltypesagg a]
| partitions=11/11 files=11 size=814.73KB
| runtime filters: RF001 -> a.smallint_col
|
00:SCAN HDFS [functional.alltypessmall c]
partitions=4/4 files=4 size=6.32KB
runtime filters: RF000 -> c.id
====
# Values statement in subqueries with predicate
select * from (select y from (values((1 as y),(11))) a where y < 10) b
---- PLAN
PLAN-ROOT SINK
|
00:UNION
constant-operands=1
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
00:UNION
constant-operands=1
====
# Mixed constant and non-constant select; the predicate is evaluated directly
# by the non-const select
select * from
(select y from
((select 1 as y)
union all
(select tinyint_col from functional.alltypes)) a
where y < 10) b
---- PLAN
PLAN-ROOT SINK
|
00:UNION
| constant-operands=1
| pass-through-operands: all
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
predicates: functional.alltypes.tinyint_col < 10
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
02:EXCHANGE [UNPARTITIONED]
|
00:UNION
| constant-operands=1
| pass-through-operands: all
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
predicates: functional.alltypes.tinyint_col < 10
====
# Union of constant selects in subquery
select * from (select 1 as y union all select 2 union all select * from (select 11) a) b
where y < 10
---- PLAN
PLAN-ROOT SINK
|
00:UNION
constant-operands=2
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
00:UNION
constant-operands=2
====
# Union of values statements in subquery
# TODO: We could combine the merge nodes below.
select * from (values(1 as y) union all values(2) union all select * from (values(11)) a) b
where y < 10
---- PLAN
PLAN-ROOT SINK
|
00:UNION
| constant-operands=2
|
01:UNION
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
00:UNION
| constant-operands=2
|
01:UNION
====
# Inner join on inline views made up of unions of constant selects
select * from
(select 1 a, 2 b union all select 1 a, 2 b) x
inner join
(select 1 a, 3 b union all select 1 a, 2 b) y on x.a = y.a
inner join
(select 1 a, 3 b union all select 1 a, 3 b) z on z.b = y.b
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [INNER JOIN]
| hash predicates: b = b
|
|--02:UNION
| constant-operands=2
|
03:HASH JOIN [INNER JOIN]
| hash predicates: a = a
|
|--01:UNION
| constant-operands=2
|
00:UNION
constant-operands=2
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
04:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: b = b
|
|--06:EXCHANGE [UNPARTITIONED]
| |
| 02:UNION
| constant-operands=2
|
03:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: a = a
|
|--05:EXCHANGE [UNPARTITIONED]
| |
| 01:UNION
| constant-operands=2
|
00:UNION
constant-operands=2
====
# Semi and inner join on a table and on inline views made up of constant selects
select * from functional.alltypessmall x
left semi join
(select 1 a, 3 b union all select 1 a, 3 b) y on y.a = x.id
inner join
(select 1 a, 3 b union all select 1 a, 3 b) z on z.b = x.id + 2
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [INNER JOIN]
| hash predicates: x.id + 2 = b
| runtime filters: RF000 <- b
|
|--02:UNION
| constant-operands=2
|
03:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: x.id = a
| runtime filters: RF001 <- a
|
|--01:UNION
| constant-operands=2
|
00:SCAN HDFS [functional.alltypessmall x]
partitions=4/4 files=4 size=6.32KB
runtime filters: RF000 -> x.id + 2, RF001 -> x.id
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
07:EXCHANGE [UNPARTITIONED]
|
04:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: x.id + 2 = b
| runtime filters: RF000 <- b
|
|--06:EXCHANGE [BROADCAST]
| |
| 02:UNION
| constant-operands=2
|
03:HASH JOIN [LEFT SEMI JOIN, BROADCAST]
| hash predicates: x.id = a
| runtime filters: RF001 <- a
|
|--05:EXCHANGE [BROADCAST]
| |
| 01:UNION
| constant-operands=2
|
00:SCAN HDFS [functional.alltypessmall x]
partitions=4/4 files=4 size=6.32KB
runtime filters: RF000 -> x.id + 2, RF001 -> x.id
====
# Tests that views correctly reanalyze cloned exprs. (IMPALA-984)
select b.* from functional.decimal_tbl a left outer join
(select d1, d1 + NULL IS NULL x from functional.decimal_tbl) b
on (a.d1 = b.d1)
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: a.d1 = d1
|
|--01:SCAN HDFS [functional.decimal_tbl]
| partitions=1/1 files=1 size=195B
|
00:SCAN HDFS [functional.decimal_tbl a]
partitions=1/1 files=1 size=195B
====
# Test predicate assignment through inline view when the query contains
# group by and distinct (IMPALA-1165)
select foo, sum(distinct foo)
from (select int_col + int_col as foo from functional.alltypesagg) t
where foo = 10
group by foo
limit 10
---- PLAN
PLAN-ROOT SINK
|
02:AGGREGATE [FINALIZE]
| output: sum(foo)
| group by: foo
| limit: 10
|
01:AGGREGATE
| group by: int_col + int_col, int_col + int_col
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 files=11 size=814.73KB
predicates: int_col + int_col = 10
====
# Test enforcement of inline-view slot equivalences when the inline-view
# contains an outer join (IMPALA-1441)
select * from
(select t1.int_col, t1.tinyint_col, t2.int_col as int_col2, t2.tinyint_col as tinyint_col2
from functional.alltypestiny t1 left outer join functional.alltypes t2
on t1.int_col = t2.int_col and t1.tinyint_col = t2.tinyint_col) t
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [RIGHT OUTER JOIN]
| hash predicates: t2.int_col = t1.int_col, t2.tinyint_col = t1.tinyint_col
| runtime filters: RF000 <- t1.int_col, RF001 <- t1.tinyint_col
|
|--00:SCAN HDFS [functional.alltypestiny t1]
| partitions=4/4 files=4 size=460B
|
01:SCAN HDFS [functional.alltypes t2]
partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> t2.int_col, RF001 -> t2.tinyint_col
====
# IMPALA-1459: Test correct assignment of On-clause predicate from an enclosing block
# inside an inline view with an outer join.
select 1 from
(select a.id aid, b.id bid from
functional.alltypes a inner join functional.alltypes b
on a.id = b.id
full outer join functional.alltypessmall c on a.id = c.id) v
inner join functional.alltypestiny c
on (aid < bid and aid = c.id)
---- PLAN
PLAN-ROOT SINK
|
06:HASH JOIN [INNER JOIN]
| hash predicates: a.id = c.id
| runtime filters: RF000 <- c.id
|
|--05:SCAN HDFS [functional.alltypestiny c]
| partitions=4/4 files=4 size=460B
|
04:HASH JOIN [FULL OUTER JOIN]
| hash predicates: a.id = c.id
| other predicates: a.id < b.id, a.id = b.id
|
|--02:SCAN HDFS [functional.alltypessmall c]
| partitions=4/4 files=4 size=6.32KB
|
03:HASH JOIN [INNER JOIN]
| hash predicates: a.id = b.id
| runtime filters: RF001 <- b.id
|
|--01:SCAN HDFS [functional.alltypes b]
| partitions=24/24 files=24 size=478.45KB
| runtime filters: RF000 -> b.id
|
00:SCAN HDFS [functional.alltypes a]
partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> a.id, RF001 -> a.id
====
# IMPALA-2665: Test correct assignment of On-clause predicate from an enclosing block
# inside an inline view with an outer join.
select 1 from functional.alltypes t1
inner join
(select a.id, b.int_col
from functional.alltypes a left outer join functional.alltypes b
on a.id = b.int_col) v
on (t1.id = v.id and v.int_col is null and v.int_col < 10 and v.id < 10)
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [INNER JOIN]
| hash predicates: a.id = t1.id
| runtime filters: RF000 <- t1.id
|
|--00:SCAN HDFS [functional.alltypes t1]
| partitions=24/24 files=24 size=478.45KB
| predicates: t1.id < 10
|
03:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: a.id = b.int_col
| other predicates: b.int_col IS NULL, b.int_col < 10
|
|--02:SCAN HDFS [functional.alltypes b]
| partitions=24/24 files=24 size=478.45KB
| predicates: b.int_col < 10
| runtime filters: RF000 -> b.int_col
|
01:SCAN HDFS [functional.alltypes a]
partitions=24/24 files=24 size=478.45KB
predicates: a.id < 10
runtime filters: RF000 -> a.id
====
# IMPALA-2643: Test inline views with duplicate exprs in their select list.
# Inferred predicate referencing the same expr gets filtered out.
select * from
(select * from
(select bigint_col, bigint_col as bigint_col2
from functional.alltypestiny) iv
) ivv
---- PLAN
PLAN-ROOT SINK
|
00:SCAN HDFS [functional.alltypestiny]
partitions=4/4 files=4 size=460B
====
# IMPALA-2643: Explicit predicates remain unafftected.
select * from
(select * from
(select bigint_col, bigint_col as bigint_col2
from functional.alltypestiny) iv
) ivv where bigint_col = bigint_col2
---- PLAN
PLAN-ROOT SINK
|
00:SCAN HDFS [functional.alltypestiny]
partitions=4/4 files=4 size=460B
predicates: bigint_col = bigint_col
====
# IMPALA-2643: Test aggregation.
# Inferred predicate referencing the same expr gets filtered out.
select * from
(select * from
(select sum(bigint_col) as s1, sum(bigint_col) as s2
from functional.alltypestiny) iv
) ivv
---- PLAN
PLAN-ROOT SINK
|
01:AGGREGATE [FINALIZE]
| output: sum(bigint_col)
|
00:SCAN HDFS [functional.alltypestiny]
partitions=4/4 files=4 size=460B
====
# IMPALA-2643: Explicit predicates remain unafftected.
select * from
(select * from
(select sum(bigint_col) as s1, sum(bigint_col) as s2
from functional.alltypestiny) iv
) ivv where s1 = s2
---- PLAN
PLAN-ROOT SINK
|
01:AGGREGATE [FINALIZE]
| output: sum(bigint_col)
| having: sum(bigint_col) = sum(bigint_col)
|
00:SCAN HDFS [functional.alltypestiny]
partitions=4/4 files=4 size=460B
====