blob: 721b2abb275ec8a980ace78b804b40461ad7bb8e [file] [log] [blame]
select *
from functional.testtbl t1 join functional.testtbl t2 using(id)
where t1.zip = 94611
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [INNER JOIN]
| hash predicates: t1.id = t2.id
| runtime filters: RF000 <- t2.id
| row-size=48B cardinality=0
|
|--01:SCAN HDFS [functional.testtbl t2]
| HDFS partitions=1/1 files=0 size=0B
| row-size=24B cardinality=0
|
00:SCAN HDFS [functional.testtbl t1]
HDFS partitions=1/1 files=0 size=0B
predicates: t1.zip = 94611
runtime filters: RF000 -> t1.id
row-size=24B cardinality=0
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
04:EXCHANGE [UNPARTITIONED]
|
02:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: t1.id = t2.id
| runtime filters: RF000 <- t2.id
| row-size=48B cardinality=0
|
|--03:EXCHANGE [BROADCAST]
| |
| 01:SCAN HDFS [functional.testtbl t2]
| HDFS partitions=1/1 files=0 size=0B
| row-size=24B cardinality=0
|
00:SCAN HDFS [functional.testtbl t1]
HDFS partitions=1/1 files=0 size=0B
predicates: t1.zip = 94611
runtime filters: RF000 -> t1.id
row-size=24B cardinality=0
====
# general exprs on both sides of equi-join predicates
select *
from functional.testtbl t1 left outer join functional.testtbl t2
on (t1.id - 1 = t2.id + 1)
where t1.zip = 94611
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: t1.id - 1 = t2.id + 1
| row-size=48B cardinality=0
|
|--01:SCAN HDFS [functional.testtbl t2]
| HDFS partitions=1/1 files=0 size=0B
| row-size=24B cardinality=0
|
00:SCAN HDFS [functional.testtbl t1]
HDFS partitions=1/1 files=0 size=0B
predicates: t1.zip = 94611
row-size=24B cardinality=0
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
04:EXCHANGE [UNPARTITIONED]
|
02:HASH JOIN [LEFT OUTER JOIN, BROADCAST]
| hash predicates: t1.id - 1 = t2.id + 1
| row-size=48B cardinality=0
|
|--03:EXCHANGE [BROADCAST]
| |
| 01:SCAN HDFS [functional.testtbl t2]
| HDFS partitions=1/1 files=0 size=0B
| row-size=24B cardinality=0
|
00:SCAN HDFS [functional.testtbl t1]
HDFS partitions=1/1 files=0 size=0B
predicates: t1.zip = 94611
row-size=24B cardinality=0
====
# test that on-clause predicates referring to multiple tuple ids
# get registered as eq join conjuncts
select t1.*
from (select * from functional.alltypestiny) t1
join (select * from functional.alltypestiny) t2 on (t1.id = t2.id)
join functional.alltypestiny t3 on (coalesce(t1.id, t2.id) = t3.id)
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [INNER JOIN]
| hash predicates: coalesce(functional.alltypestiny.id, functional.alltypestiny.id) = t3.id
| runtime filters: RF000 <- t3.id
| row-size=97B cardinality=8
|
|--02:SCAN HDFS [functional.alltypestiny t3]
| HDFS partitions=4/4 files=4 size=460B
| row-size=4B cardinality=8
|
03:HASH JOIN [INNER JOIN]
| hash predicates: functional.alltypestiny.id = functional.alltypestiny.id
| runtime filters: RF002 <- functional.alltypestiny.id
| row-size=93B cardinality=8
|
|--01:SCAN HDFS [functional.alltypestiny]
| HDFS partitions=4/4 files=4 size=460B
| runtime filters: RF000 -> coalesce(functional.alltypestiny.id, functional.alltypestiny.id)
| row-size=4B cardinality=8
|
00:SCAN HDFS [functional.alltypestiny]
HDFS partitions=4/4 files=4 size=460B
runtime filters: RF000 -> coalesce(functional.alltypestiny.id, functional.alltypestiny.id), RF002 -> functional.alltypestiny.id
row-size=89B cardinality=8
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
07:EXCHANGE [UNPARTITIONED]
|
04:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: coalesce(functional.alltypestiny.id, functional.alltypestiny.id) = t3.id
| runtime filters: RF000 <- t3.id
| row-size=97B cardinality=8
|
|--06:EXCHANGE [BROADCAST]
| |
| 02:SCAN HDFS [functional.alltypestiny t3]
| HDFS partitions=4/4 files=4 size=460B
| row-size=4B cardinality=8
|
03:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: functional.alltypestiny.id = functional.alltypestiny.id
| runtime filters: RF002 <- functional.alltypestiny.id
| row-size=93B cardinality=8
|
|--05:EXCHANGE [BROADCAST]
| |
| 01:SCAN HDFS [functional.alltypestiny]
| HDFS partitions=4/4 files=4 size=460B
| runtime filters: RF000 -> coalesce(functional.alltypestiny.id, functional.alltypestiny.id)
| row-size=4B cardinality=8
|
00:SCAN HDFS [functional.alltypestiny]
HDFS partitions=4/4 files=4 size=460B
runtime filters: RF000 -> coalesce(functional.alltypestiny.id, functional.alltypestiny.id), RF002 -> functional.alltypestiny.id
row-size=89B cardinality=8
====
# multiple join predicates;
# scan predicates get propagated correctly;
# non-eq join predicates are evaluated as extra conjuncts by the join node
select *
from functional.alltypesagg a right outer join functional.alltypessmall b using (id, int_col)
where a.day >= 6
and b.month > 2
and a.tinyint_col = 15
and b.string_col = '15'
and a.tinyint_col + b.tinyint_col < 15
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [RIGHT OUTER JOIN]
| hash predicates: a.id = b.id, a.int_col = b.int_col
| other predicates: a.tinyint_col = 15, a.`day` >= 6, a.tinyint_col + b.tinyint_col < 15
| runtime filters: RF000 <- b.id, RF001 <- b.int_col
| row-size=184B cardinality=5
|
|--01:SCAN HDFS [functional.alltypessmall b]
| partition predicates: b.`month` > 2
| HDFS partitions=2/4 files=2 size=3.17KB
| predicates: b.string_col = '15'
| row-size=89B cardinality=5
|
00:SCAN HDFS [functional.alltypesagg a]
partition predicates: a.`day` >= 6
HDFS partitions=5/11 files=5 size=372.38KB
predicates: a.tinyint_col = 15
runtime filters: RF000 -> a.id, RF001 -> a.int_col
row-size=95B cardinality=556
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
05:EXCHANGE [UNPARTITIONED]
|
02:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED]
| hash predicates: a.id = b.id, a.int_col = b.int_col
| other predicates: a.tinyint_col = 15, a.`day` >= 6, a.tinyint_col + b.tinyint_col < 15
| runtime filters: RF000 <- b.id, RF001 <- b.int_col
| row-size=184B cardinality=5
|
|--04:EXCHANGE [HASH(b.id,b.int_col)]
| |
| 01:SCAN HDFS [functional.alltypessmall b]
| partition predicates: b.`month` > 2
| HDFS partitions=2/4 files=2 size=3.17KB
| predicates: b.string_col = '15'
| row-size=89B cardinality=5
|
03:EXCHANGE [HASH(a.id,a.int_col)]
|
00:SCAN HDFS [functional.alltypesagg a]
partition predicates: a.`day` >= 6
HDFS partitions=5/11 files=5 size=372.38KB
predicates: a.tinyint_col = 15
runtime filters: RF000 -> a.id, RF001 -> a.int_col
row-size=95B cardinality=556
====
# same as before, with 3 tables;
# non-eq join predicates are evaluated at the correct join node
select *
from functional.alltypesagg a
full outer join functional.alltypessmall b using (id, int_col)
right join functional.alltypesaggnonulls c on (a.id = c.id and b.string_col = c.string_col)
where a.day >= 6
and b.month > 2
and c.day < 3
and a.tinyint_col = 15
and b.string_col = '15'
and a.tinyint_col + b.tinyint_col < 15
and a.float_col - c.double_col < 0
and (b.double_col * c.tinyint_col > 1000 or c.tinyint_col < 1000)
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: c.id = a.id, c.string_col = b.string_col
| other predicates: a.tinyint_col = 15, b.string_col = '15', a.`day` >= 6, b.`month` > 2, a.float_col - c.double_col < 0, a.tinyint_col + b.tinyint_col < 15, (b.double_col * c.tinyint_col > 1000 OR c.tinyint_col < 1000)
| row-size=279B cardinality=2.00K
|
|--03:HASH JOIN [FULL OUTER JOIN]
| | hash predicates: a.id = b.id, a.int_col = b.int_col
| | row-size=184B cardinality=561
| |
| |--01:SCAN HDFS [functional.alltypessmall b]
| | partition predicates: b.`month` > 2
| | HDFS partitions=2/4 files=2 size=3.17KB
| | predicates: b.string_col = '15'
| | row-size=89B cardinality=5
| |
| 00:SCAN HDFS [functional.alltypesagg a]
| partition predicates: a.`day` >= 6
| HDFS partitions=5/11 files=5 size=372.38KB
| predicates: a.tinyint_col = 15
| row-size=95B cardinality=556
|
02:SCAN HDFS [functional.alltypesaggnonulls c]
partition predicates: c.`day` < 3
HDFS partitions=2/10 files=2 size=148.10KB
row-size=95B cardinality=2.00K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
09:EXCHANGE [UNPARTITIONED]
|
04:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED]
| hash predicates: a.id = c.id, b.string_col = c.string_col
| other predicates: a.tinyint_col = 15, b.string_col = '15', a.`day` >= 6, b.`month` > 2, a.float_col - c.double_col < 0, a.tinyint_col + b.tinyint_col < 15, (b.double_col * c.tinyint_col > 1000 OR c.tinyint_col < 1000)
| runtime filters: RF000 <- c.id, RF001 <- c.string_col
| row-size=279B cardinality=2.00K
|
|--08:EXCHANGE [HASH(c.id,c.string_col)]
| |
| 02:SCAN HDFS [functional.alltypesaggnonulls c]
| partition predicates: c.`day` < 3
| HDFS partitions=2/10 files=2 size=148.10KB
| row-size=95B cardinality=2.00K
|
07:EXCHANGE [HASH(a.id,b.string_col)]
|
03:HASH JOIN [FULL OUTER JOIN, PARTITIONED]
| hash predicates: a.id = b.id, a.int_col = b.int_col
| row-size=184B cardinality=561
|
|--06:EXCHANGE [HASH(b.id,b.int_col)]
| |
| 01:SCAN HDFS [functional.alltypessmall b]
| partition predicates: b.`month` > 2
| HDFS partitions=2/4 files=2 size=3.17KB
| predicates: b.string_col = '15'
| runtime filters: RF001 -> b.string_col
| row-size=89B cardinality=5
|
05:EXCHANGE [HASH(a.id,a.int_col)]
|
00:SCAN HDFS [functional.alltypesagg a]
partition predicates: a.`day` >= 6
HDFS partitions=5/11 files=5 size=372.38KB
predicates: a.tinyint_col = 15
runtime filters: RF000 -> a.id
row-size=95B cardinality=556
====
# equi join with constants in the on clause are not supported
select a.id, b.id from
(select 1 as x, id from functional.alltypessmall) a
inner join
(select 1 as x, id from functional.alltypessmall) b
on a.x = b.x
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [INNER JOIN]
| hash predicates: 1 = 1
| row-size=8B cardinality=100
|
|--01:SCAN HDFS [functional.alltypessmall]
| HDFS partitions=4/4 files=4 size=6.32KB
| row-size=4B cardinality=100
|
00:SCAN HDFS [functional.alltypessmall]
HDFS partitions=4/4 files=4 size=6.32KB
row-size=4B cardinality=100
====
# join using values() in a subquery
select a.int_col, b.x from functional.alltypessmall a inner join
(values(1 as int_col, 'a' as x), (1, 'b'), (2, 'c')) b on a.int_col = b.int_col
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [INNER JOIN]
| hash predicates: a.int_col = int_col
| runtime filters: RF000 <- int_col
| row-size=17B cardinality=100
|
|--01:UNION
| constant-operands=3
| row-size=13B cardinality=3
|
00:SCAN HDFS [functional.alltypessmall a]
HDFS partitions=4/4 files=4 size=6.32KB
runtime filters: RF000 -> a.int_col
row-size=4B cardinality=100
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
04:EXCHANGE [UNPARTITIONED]
|
02:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: a.int_col = int_col
| runtime filters: RF000 <- int_col
| row-size=17B cardinality=100
|
|--03:EXCHANGE [BROADCAST]
| |
| 01:UNION
| constant-operands=3
| row-size=13B cardinality=3
|
00:SCAN HDFS [functional.alltypessmall a]
HDFS partitions=4/4 files=4 size=6.32KB
runtime filters: RF000 -> a.int_col
row-size=4B cardinality=100
====
# hbase-hdfs join
select *
from functional.alltypesagg join functional_hbase.alltypessmall using (id, int_col)
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [INNER JOIN]
| hash predicates: functional.alltypesagg.id = functional_hbase.alltypessmall.id, functional.alltypesagg.int_col = functional_hbase.alltypessmall.int_col
| runtime filters: RF000 <- functional_hbase.alltypessmall.id, RF001 <- functional_hbase.alltypessmall.int_col
| row-size=184B cardinality=53
|
|--01:SCAN HBASE [functional_hbase.alltypessmall]
| row-size=89B cardinality=50
|
00:SCAN HDFS [functional.alltypesagg]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> functional.alltypesagg.id, RF001 -> functional.alltypesagg.int_col
row-size=95B cardinality=11.00K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
04:EXCHANGE [UNPARTITIONED]
|
02:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: functional.alltypesagg.id = functional_hbase.alltypessmall.id, functional.alltypesagg.int_col = functional_hbase.alltypessmall.int_col
| runtime filters: RF000 <- functional_hbase.alltypessmall.id, RF001 <- functional_hbase.alltypessmall.int_col
| row-size=184B cardinality=53
|
|--03:EXCHANGE [BROADCAST]
| |
| 01:SCAN HBASE [functional_hbase.alltypessmall]
| row-size=89B cardinality=50
|
00:SCAN HDFS [functional.alltypesagg]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> functional.alltypesagg.id, RF001 -> functional.alltypesagg.int_col
row-size=95B cardinality=11.00K
====
# hbase-hdfs join with scan filtering
select *
from functional.alltypesagg a join functional_hbase.stringids b
on (a.id = cast(b.id as int) and a.int_col = b.int_col)
where a.day >= 6
and a.tinyint_col = 15
and b.id = '5'
and b.tinyint_col = 5
and a.tinyint_col + b.tinyint_col < 15
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [INNER JOIN]
| hash predicates: a.int_col = b.int_col, a.id = CAST(b.id AS INT)
| other predicates: a.tinyint_col + b.tinyint_col < 15
| runtime filters: RF000 <- b.int_col, RF001 <- CAST(b.id AS INT)
| row-size=202B cardinality=11
|
|--01:SCAN HBASE [functional_hbase.stringids b]
| key predicates: b.id = '5'
| start key: 5
| stop key: 5\0
| predicates: b.tinyint_col = 5
| row-size=107B cardinality=1
|
00:SCAN HDFS [functional.alltypesagg a]
partition predicates: a.`day` >= 6
HDFS partitions=5/11 files=5 size=372.38KB
predicates: a.tinyint_col = 15
runtime filters: RF000 -> a.int_col, RF001 -> a.id
row-size=95B cardinality=556
---- SCANRANGELOCATIONS
NODE 0:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=10/100110.txt 0:76263
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=6/100106.txt 0:76263
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=7/100107.txt 0:76263
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=8/100108.txt 0:76263
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=9/100109.txt 0:76263
NODE 1:
HBASE KEYRANGE 5:5\0
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
04:EXCHANGE [UNPARTITIONED]
|
02:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: a.int_col = b.int_col, a.id = CAST(b.id AS INT)
| other predicates: a.tinyint_col + b.tinyint_col < 15
| runtime filters: RF000 <- b.int_col, RF001 <- CAST(b.id AS INT)
| row-size=202B cardinality=11
|
|--03:EXCHANGE [BROADCAST]
| |
| 01:SCAN HBASE [functional_hbase.stringids b]
| key predicates: b.id = '5'
| start key: 5
| stop key: 5\0
| predicates: b.tinyint_col = 5
| row-size=107B cardinality=1
|
00:SCAN HDFS [functional.alltypesagg a]
partition predicates: a.`day` >= 6
HDFS partitions=5/11 files=5 size=372.38KB
predicates: a.tinyint_col = 15
runtime filters: RF000 -> a.int_col, RF001 -> a.id
row-size=95B cardinality=556
====
# hbase-hdfs join with scan filtering (bogus)
select *
from functional.alltypesagg a join functional_hbase.stringids b
on (a.id = cast(b.id as int) and a.int_col = b.int_col)
where a.day >= 6
and a.tinyint_col = 15
and b.id = '5'
and b.tinyint_col = 5
and b.tinyint_col > 123
and a.tinyint_col + b.tinyint_col < 15
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [INNER JOIN]
| hash predicates: a.int_col = b.int_col, a.id = CAST(b.id AS INT)
| other predicates: a.tinyint_col + b.tinyint_col < 15
| runtime filters: RF000 <- b.int_col, RF001 <- CAST(b.id AS INT)
| row-size=202B cardinality=0
|
|--01:EMPTYSET
|
00:SCAN HDFS [functional.alltypesagg a]
partition predicates: a.`day` >= 6
HDFS partitions=5/11 files=5 size=372.38KB
predicates: a.tinyint_col = 15
runtime filters: RF000 -> a.int_col, RF001 -> a.id
row-size=95B cardinality=556
---- SCANRANGELOCATIONS
NODE 0:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=10/100110.txt 0:76263
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=6/100106.txt 0:76263
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=7/100107.txt 0:76263
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=8/100108.txt 0:76263
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=9/100109.txt 0:76263
====
# left join followed by right join and then aggregate
select x.tinyint_col, count(x.day)
from (
select a.day day, c.tinyint_col tinyint_col
from functional.alltypesagg a
join functional.alltypessmall b using (id, int_col)
right outer join functional.alltypesnopart c on (b.id = c.id)
join functional.alltypesagg d on (a.id = d.id)
order by 1,2
limit 10
) x
where x.day >= 6
group by x.tinyint_col
order by 2
limit 5
---- PLAN
PLAN-ROOT SINK
|
10:TOP-N [LIMIT=5]
| order by: count(x.`day`) ASC
| row-size=9B cardinality=1
|
09:AGGREGATE [FINALIZE]
| output: count(day)
| group by: tinyint_col
| row-size=9B cardinality=1
|
08:SELECT
| predicates: day >= 6
| row-size=5B cardinality=1
|
07:TOP-N [LIMIT=10]
| order by: day ASC, tinyint_col ASC
| row-size=5B cardinality=10
|
06:HASH JOIN [INNER JOIN]
| hash predicates: d.id = a.id
| runtime filters: RF000 <- a.id
| row-size=29B cardinality=113
|
|--05:HASH JOIN [RIGHT OUTER JOIN]
| | hash predicates: b.id = c.id
| | runtime filters: RF002 <- c.id
| | row-size=25B cardinality=106
| |
| |--02:SCAN HDFS [functional.alltypesnopart c]
| | HDFS partitions=1/1 files=0 size=0B
| | row-size=5B cardinality=0
| |
| 04:HASH JOIN [INNER JOIN]
| | hash predicates: a.id = b.id, a.int_col = b.int_col
| | runtime filters: RF004 <- b.id, RF005 <- b.int_col
| | row-size=20B cardinality=106
| |
| |--01:SCAN HDFS [functional.alltypessmall b]
| | HDFS partitions=4/4 files=4 size=6.32KB
| | runtime filters: RF002 -> b.id
| | row-size=8B cardinality=100
| |
| 00:SCAN HDFS [functional.alltypesagg a]
| HDFS partitions=11/11 files=11 size=814.73KB
| runtime filters: RF002 -> a.id, RF004 -> a.id, RF005 -> a.int_col
| row-size=12B cardinality=11.00K
|
03:SCAN HDFS [functional.alltypesagg d]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> d.id
row-size=4B cardinality=11.00K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
10:TOP-N [LIMIT=5]
| order by: count(x.`day`) ASC
| row-size=9B cardinality=1
|
09:AGGREGATE [FINALIZE]
| output: count(day)
| group by: tinyint_col
| row-size=9B cardinality=1
|
08:SELECT
| predicates: day >= 6
| row-size=5B cardinality=1
|
15:MERGING-EXCHANGE [UNPARTITIONED]
| order by: day ASC, tinyint_col ASC
| limit: 10
|
07:TOP-N [LIMIT=10]
| order by: day ASC, tinyint_col ASC
| row-size=5B cardinality=10
|
06:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: d.id = a.id
| runtime filters: RF000 <- a.id
| row-size=29B cardinality=113
|
|--14:EXCHANGE [BROADCAST]
| |
| 05:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED]
| | hash predicates: b.id = c.id
| | runtime filters: RF002 <- c.id
| | row-size=25B cardinality=106
| |
| |--13:EXCHANGE [HASH(c.id)]
| | |
| | 02:SCAN HDFS [functional.alltypesnopart c]
| | HDFS partitions=1/1 files=0 size=0B
| | row-size=5B cardinality=0
| |
| 12:EXCHANGE [HASH(b.id)]
| |
| 04:HASH JOIN [INNER JOIN, BROADCAST]
| | hash predicates: a.id = b.id, a.int_col = b.int_col
| | runtime filters: RF004 <- b.id, RF005 <- b.int_col
| | row-size=20B cardinality=106
| |
| |--11:EXCHANGE [BROADCAST]
| | |
| | 01:SCAN HDFS [functional.alltypessmall b]
| | HDFS partitions=4/4 files=4 size=6.32KB
| | runtime filters: RF002 -> b.id
| | row-size=8B cardinality=100
| |
| 00:SCAN HDFS [functional.alltypesagg a]
| HDFS partitions=11/11 files=11 size=814.73KB
| runtime filters: RF002 -> a.id, RF004 -> a.id, RF005 -> a.int_col
| row-size=12B cardinality=11.00K
|
03:SCAN HDFS [functional.alltypesagg d]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> d.id
row-size=4B cardinality=11.00K
====
# join without "other join conjuncts"
select * from functional.alltypessmall a, functional.alltypessmall b where a.id = b.id limit 1
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [INNER JOIN]
| hash predicates: a.id = b.id
| runtime filters: RF000 <- b.id
| limit: 1
| row-size=178B cardinality=1
|
|--01:SCAN HDFS [functional.alltypessmall b]
| HDFS partitions=4/4 files=4 size=6.32KB
| row-size=89B cardinality=100
|
00:SCAN HDFS [functional.alltypessmall a]
HDFS partitions=4/4 files=4 size=6.32KB
runtime filters: RF000 -> a.id
row-size=89B cardinality=100
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
05:EXCHANGE [UNPARTITIONED]
| limit: 1
|
02:HASH JOIN [INNER JOIN, PARTITIONED]
| hash predicates: a.id = b.id
| runtime filters: RF000 <- b.id
| limit: 1
| row-size=178B cardinality=1
|
|--04:EXCHANGE [HASH(b.id)]
| |
| 01:SCAN HDFS [functional.alltypessmall b]
| HDFS partitions=4/4 files=4 size=6.32KB
| row-size=89B cardinality=100
|
03:EXCHANGE [HASH(a.id)]
|
00:SCAN HDFS [functional.alltypessmall a]
HDFS partitions=4/4 files=4 size=6.32KB
runtime filters: RF000 -> a.id
row-size=89B cardinality=100
====
# join conjunct is derived from equivalence classes
# (no explicit join conjunct between t1 and t2)
select *
from functional.testtbl t1, functional.testtbl t2, functional.testtbl t3
where t1.id = t3.id and t2.id = t3.id
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [INNER JOIN]
| hash predicates: t1.id = t3.id
| runtime filters: RF000 <- t3.id
| row-size=72B cardinality=0
|
|--02:SCAN HDFS [functional.testtbl t3]
| HDFS partitions=1/1 files=0 size=0B
| row-size=24B cardinality=0
|
03:HASH JOIN [INNER JOIN]
| hash predicates: t1.id = t2.id
| runtime filters: RF002 <- t2.id
| row-size=48B cardinality=0
|
|--01:SCAN HDFS [functional.testtbl t2]
| HDFS partitions=1/1 files=0 size=0B
| runtime filters: RF000 -> t2.id
| row-size=24B cardinality=0
|
00:SCAN HDFS [functional.testtbl t1]
HDFS partitions=1/1 files=0 size=0B
runtime filters: RF000 -> t1.id, RF002 -> t1.id
row-size=24B cardinality=0
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
07:EXCHANGE [UNPARTITIONED]
|
04:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: t1.id = t3.id
| runtime filters: RF000 <- t3.id
| row-size=72B cardinality=0
|
|--06:EXCHANGE [BROADCAST]
| |
| 02:SCAN HDFS [functional.testtbl t3]
| HDFS partitions=1/1 files=0 size=0B
| row-size=24B cardinality=0
|
03:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: t1.id = t2.id
| runtime filters: RF002 <- t2.id
| row-size=48B cardinality=0
|
|--05:EXCHANGE [BROADCAST]
| |
| 01:SCAN HDFS [functional.testtbl t2]
| HDFS partitions=1/1 files=0 size=0B
| runtime filters: RF000 -> t2.id
| row-size=24B cardinality=0
|
00:SCAN HDFS [functional.testtbl t1]
HDFS partitions=1/1 files=0 size=0B
runtime filters: RF000 -> t1.id, RF002 -> t1.id
row-size=24B cardinality=0
====
# join involving a table with no table stats (functional.emptytable)
# tests that the default join strategy is broadcast
select * from functional.emptytable a inner join
functional.alltypes b on a.f2 = b.int_col
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [INNER JOIN]
| hash predicates: b.int_col = a.f2
| runtime filters: RF000 <- a.f2
| row-size=105B cardinality=7.30K
|
|--00:SCAN HDFS [functional.emptytable a]
| partitions=0/0 files=0 size=0B
| row-size=16B cardinality=0
|
01:SCAN HDFS [functional.alltypes b]
HDFS partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> b.int_col
row-size=89B cardinality=7.30K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
04:EXCHANGE [UNPARTITIONED]
|
02:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: b.int_col = a.f2
| runtime filters: RF000 <- a.f2
| row-size=105B cardinality=7.30K
|
|--03:EXCHANGE [BROADCAST]
| |
| 00:SCAN HDFS [functional.emptytable a]
| partitions=0/0 files=0 size=0B
| row-size=16B cardinality=0
|
01:SCAN HDFS [functional.alltypes b]
HDFS partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> b.int_col
row-size=89B cardinality=7.30K
====
# cross join
select *
from functional.testtbl t1 cross join functional.testtbl
---- PLAN
PLAN-ROOT SINK
|
02:NESTED LOOP JOIN [CROSS JOIN]
| row-size=48B cardinality=0
|
|--01:SCAN HDFS [functional.testtbl]
| HDFS partitions=1/1 files=0 size=0B
| row-size=24B cardinality=0
|
00:SCAN HDFS [functional.testtbl t1]
HDFS partitions=1/1 files=0 size=0B
row-size=24B cardinality=0
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
04:EXCHANGE [UNPARTITIONED]
|
02:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
| row-size=48B cardinality=0
|
|--03:EXCHANGE [BROADCAST]
| |
| 01:SCAN HDFS [functional.testtbl]
| HDFS partitions=1/1 files=0 size=0B
| row-size=24B cardinality=0
|
00:SCAN HDFS [functional.testtbl t1]
HDFS partitions=1/1 files=0 size=0B
row-size=24B cardinality=0
====
# cross join with where clause
select *
from functional.testtbl t1 cross join functional.testtbl t2 where t1.id < t2.id
---- PLAN
PLAN-ROOT SINK
|
02:NESTED LOOP JOIN [INNER JOIN]
| predicates: t1.id < t2.id
| row-size=48B cardinality=0
|
|--01:SCAN HDFS [functional.testtbl t2]
| HDFS partitions=1/1 files=0 size=0B
| row-size=24B cardinality=0
|
00:SCAN HDFS [functional.testtbl t1]
HDFS partitions=1/1 files=0 size=0B
row-size=24B cardinality=0
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
04:EXCHANGE [UNPARTITIONED]
|
02:NESTED LOOP JOIN [INNER JOIN, BROADCAST]
| predicates: t1.id < t2.id
| row-size=48B cardinality=0
|
|--03:EXCHANGE [BROADCAST]
| |
| 01:SCAN HDFS [functional.testtbl t2]
| HDFS partitions=1/1 files=0 size=0B
| row-size=24B cardinality=0
|
00:SCAN HDFS [functional.testtbl t1]
HDFS partitions=1/1 files=0 size=0B
row-size=24B cardinality=0
====
# Tests that the partitioned join between b and c exploits the existing
# data partition of its lhs input.
select * from functional.alltypes a
inner join [shuffle] functional.alltypes b
on (a.id = b.id and b.int_col = a.int_col)
inner join [shuffle] functional.alltypes c
on (b.id = c.id and c.int_col = b.int_col)
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [INNER JOIN]
| hash predicates: b.id = c.id, b.int_col = c.int_col
| runtime filters: RF000 <- c.id, RF001 <- c.int_col
| row-size=267B cardinality=7.30K
|
|--02:SCAN HDFS [functional.alltypes c]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=89B cardinality=7.30K
|
03:HASH JOIN [INNER JOIN]
| hash predicates: a.id = b.id, a.int_col = b.int_col
| runtime filters: RF004 <- b.id, RF005 <- b.int_col
| row-size=178B cardinality=7.30K
|
|--01:SCAN HDFS [functional.alltypes b]
| HDFS partitions=24/24 files=24 size=478.45KB
| runtime filters: RF000 -> b.id, RF001 -> b.int_col
| row-size=89B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypes a]
HDFS partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> a.id, RF001 -> a.int_col, RF004 -> a.id, RF005 -> a.int_col
row-size=89B cardinality=7.30K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
08:EXCHANGE [UNPARTITIONED]
|
04:HASH JOIN [INNER JOIN, PARTITIONED]
| hash predicates: b.id = c.id, b.int_col = c.int_col
| runtime filters: RF000 <- c.id, RF001 <- c.int_col
| row-size=267B cardinality=7.30K
|
|--07:EXCHANGE [HASH(c.id,c.int_col)]
| |
| 02:SCAN HDFS [functional.alltypes c]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=89B cardinality=7.30K
|
03:HASH JOIN [INNER JOIN, PARTITIONED]
| hash predicates: a.id = b.id, a.int_col = b.int_col
| runtime filters: RF004 <- b.id, RF005 <- b.int_col
| row-size=178B cardinality=7.30K
|
|--06:EXCHANGE [HASH(b.id,b.int_col)]
| |
| 01:SCAN HDFS [functional.alltypes b]
| HDFS partitions=24/24 files=24 size=478.45KB
| runtime filters: RF000 -> b.id, RF001 -> b.int_col
| row-size=89B cardinality=7.30K
|
05:EXCHANGE [HASH(a.id,a.int_col)]
|
00:SCAN HDFS [functional.alltypes a]
HDFS partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> a.id, RF001 -> a.int_col, RF004 -> a.id, RF005 -> a.int_col
row-size=89B cardinality=7.30K
====
# Tests that the partitioned join between a and b exploits the existing
# data partition of its rhs input.
select * from functional.alltypes a
inner join [shuffle]
(select count(*), int_col, bool_col
from functional.alltypes group by int_col, bool_col) b
on (a.int_col = b.int_col and b.bool_col = a.bool_col)
---- PLAN
PLAN-ROOT SINK
|
03:HASH JOIN [INNER JOIN]
| hash predicates: a.bool_col = bool_col, a.int_col = int_col
| runtime filters: RF000 <- bool_col, RF001 <- int_col
| row-size=102B cardinality=14.60K
|
|--02:AGGREGATE [FINALIZE]
| | output: count(*)
| | group by: int_col, bool_col
| | row-size=13B cardinality=20
| |
| 01:SCAN HDFS [functional.alltypes]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=5B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypes a]
HDFS partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> a.bool_col, RF001 -> a.int_col
row-size=89B cardinality=7.30K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
07:EXCHANGE [UNPARTITIONED]
|
03:HASH JOIN [INNER JOIN, PARTITIONED]
| hash predicates: a.bool_col = bool_col, a.int_col = int_col
| runtime filters: RF000 <- bool_col, RF001 <- int_col
| row-size=102B cardinality=14.60K
|
|--05:AGGREGATE [FINALIZE]
| | output: count:merge(*)
| | group by: int_col, bool_col
| | row-size=13B cardinality=20
| |
| 04:EXCHANGE [HASH(int_col,bool_col)]
| |
| 02:AGGREGATE [STREAMING]
| | output: count(*)
| | group by: int_col, bool_col
| | row-size=13B cardinality=20
| |
| 01:SCAN HDFS [functional.alltypes]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=5B cardinality=7.30K
|
06:EXCHANGE [HASH(a.int_col,a.bool_col)]
|
00:SCAN HDFS [functional.alltypes a]
HDFS partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> a.bool_col, RF001 -> a.int_col
row-size=89B cardinality=7.30K
====
# Tests that the partitioned join between b and c exploits the existing
# data partition of its lhs and rhs inputs.
select * from functional.alltypes a
inner join [shuffle] functional.alltypes b
on(a.int_col = b.int_col and b.bool_col = a.bool_col)
inner join [shuffle]
(select count(*), int_col, bool_col
from functional.alltypes group by int_col, bool_col) c
on (b.int_col = c.int_col and c.bool_col = b.bool_col)
---- PLAN
PLAN-ROOT SINK
|
05:HASH JOIN [INNER JOIN]
| hash predicates: a.bool_col = b.bool_col, a.int_col = b.int_col
| runtime filters: RF000 <- b.bool_col, RF001 <- b.int_col
| row-size=191B cardinality=10.66M
|
|--01:SCAN HDFS [functional.alltypes b]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=89B cardinality=7.30K
|
04:HASH JOIN [INNER JOIN]
| hash predicates: a.bool_col = bool_col, a.int_col = int_col
| runtime filters: RF004 <- bool_col, RF005 <- int_col
| row-size=102B cardinality=14.60K
|
|--03:AGGREGATE [FINALIZE]
| | output: count(*)
| | group by: int_col, bool_col
| | row-size=13B cardinality=20
| |
| 02:SCAN HDFS [functional.alltypes]
| HDFS partitions=24/24 files=24 size=478.45KB
| runtime filters: RF000 -> functional.alltypes.bool_col, RF001 -> functional.alltypes.int_col
| row-size=5B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypes a]
HDFS partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> a.bool_col, RF001 -> a.int_col, RF004 -> a.bool_col, RF005 -> a.int_col
row-size=89B cardinality=7.30K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
10:EXCHANGE [UNPARTITIONED]
|
05:HASH JOIN [INNER JOIN, PARTITIONED]
| hash predicates: a.bool_col = b.bool_col, a.int_col = b.int_col
| runtime filters: RF000 <- b.bool_col, RF001 <- b.int_col
| row-size=191B cardinality=10.66M
|
|--09:EXCHANGE [HASH(b.int_col,b.bool_col)]
| |
| 01:SCAN HDFS [functional.alltypes b]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=89B cardinality=7.30K
|
04:HASH JOIN [INNER JOIN, PARTITIONED]
| hash predicates: a.bool_col = bool_col, a.int_col = int_col
| runtime filters: RF004 <- bool_col, RF005 <- int_col
| row-size=102B cardinality=14.60K
|
|--07:AGGREGATE [FINALIZE]
| | output: count:merge(*)
| | group by: int_col, bool_col
| | row-size=13B cardinality=20
| |
| 06:EXCHANGE [HASH(int_col,bool_col)]
| |
| 03:AGGREGATE [STREAMING]
| | output: count(*)
| | group by: int_col, bool_col
| | row-size=13B cardinality=20
| |
| 02:SCAN HDFS [functional.alltypes]
| HDFS partitions=24/24 files=24 size=478.45KB
| runtime filters: RF000 -> functional.alltypes.bool_col, RF001 -> functional.alltypes.int_col
| row-size=5B cardinality=7.30K
|
08:EXCHANGE [HASH(a.int_col,a.bool_col)]
|
00:SCAN HDFS [functional.alltypes a]
HDFS partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> a.bool_col, RF001 -> a.int_col, RF004 -> a.bool_col, RF005 -> a.int_col
row-size=89B cardinality=7.30K
====
# Tests that all predicates from the On-clause are applied (IMPALA-805)
# and that slot equivalences are enforced at lowest possible plan node.
select 1 from functional.alltypes a
inner join functional.alltypes b
# first equivalence class
on a.id = b.id and a.id = b.int_col and a.id = b.bigint_col
and a.tinyint_col = b.id and a.smallint_col = b.id
and a.int_col = b.id and a.bigint_col = b.id
# second equivalence class
and b.string_col = a.string_col and b.date_string_col = a.string_col
# redundant predicates to test minimal spanning tree of equivalent slots at a
where a.tinyint_col = a.smallint_col and a.int_col = a.bigint_col
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [INNER JOIN]
| hash predicates: b.id = a.id, b.string_col = a.string_col
| runtime filters: RF000 <- a.id, RF001 <- a.string_col
| row-size=81B cardinality=73
|
|--00:SCAN HDFS [functional.alltypes a]
| HDFS partitions=24/24 files=24 size=478.45KB
| predicates: a.id = a.int_col, a.id = a.tinyint_col, a.int_col = a.bigint_col, a.tinyint_col = a.smallint_col
| row-size=32B cardinality=730
|
01:SCAN HDFS [functional.alltypes b]
HDFS partitions=24/24 files=24 size=478.45KB
predicates: b.id = b.int_col, b.id = b.bigint_col, b.string_col = b.date_string_col
runtime filters: RF000 -> b.id, RF001 -> b.string_col
row-size=49B cardinality=730
====
# Tests that all predicates from the On-clause are applied (IMPALA-805).
select 1 from functional.alltypes a
left outer join functional.alltypes b
# first equivalence class
on a.id = b.id and a.id = b.int_col and a.id = b.bigint_col
and a.tinyint_col = b.id and a.smallint_col = b.id
and a.int_col = b.id and a.bigint_col = b.id
# second equivalence class
and b.string_col = a.string_col and b.date_string_col = a.string_col
# redundant predicates to test minimal spanning tree of equivalent slots at a
where a.tinyint_col = a.smallint_col and a.int_col = a.bigint_col
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [RIGHT OUTER JOIN]
| hash predicates: b.id = a.id, b.int_col = a.id, b.id = a.int_col, b.id = a.bigint_col, b.bigint_col = a.id, b.id = a.smallint_col, b.string_col = a.string_col, b.id = a.tinyint_col, b.date_string_col = a.string_col
| runtime filters: RF000 <- a.id, RF001 <- a.id, RF002 <- a.int_col, RF003 <- a.bigint_col, RF004 <- a.id, RF005 <- a.smallint_col, RF006 <- a.string_col, RF007 <- a.tinyint_col, RF008 <- a.string_col
| row-size=81B cardinality=730
|
|--00:SCAN HDFS [functional.alltypes a]
| HDFS partitions=24/24 files=24 size=478.45KB
| predicates: a.int_col = a.bigint_col, a.tinyint_col = a.smallint_col
| row-size=32B cardinality=730
|
01:SCAN HDFS [functional.alltypes b]
HDFS partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> b.id, RF001 -> b.int_col, RF002 -> b.id, RF003 -> b.id, RF004 -> b.bigint_col, RF005 -> b.id, RF006 -> b.string_col, RF007 -> b.id, RF008 -> b.date_string_col
row-size=49B cardinality=7.30K
====
# Tests elimination of redundant join predicates (IMPALA-912).
select * from
functional.alltypes a
inner join functional.alltypestiny b
on (a.id = b.id and a.int_col = b.int_col)
inner join functional.alltypessmall c
on (b.id = c.id and b.int_col = c.int_col)
# redundant join predicates
where a.id = c.id and a.int_col = c.int_col
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [INNER JOIN]
| hash predicates: c.id = b.id, c.int_col = b.int_col
| runtime filters: RF000 <- b.id, RF001 <- b.int_col
| row-size=267B cardinality=8
|
|--03:HASH JOIN [INNER JOIN]
| | hash predicates: a.id = b.id, a.int_col = b.int_col
| | runtime filters: RF004 <- b.id, RF005 <- b.int_col
| | row-size=178B cardinality=8
| |
| |--01:SCAN HDFS [functional.alltypestiny b]
| | HDFS partitions=4/4 files=4 size=460B
| | row-size=89B cardinality=8
| |
| 00:SCAN HDFS [functional.alltypes a]
| HDFS partitions=24/24 files=24 size=478.45KB
| runtime filters: RF004 -> a.id, RF005 -> a.int_col
| row-size=89B cardinality=7.30K
|
02:SCAN HDFS [functional.alltypessmall c]
HDFS partitions=4/4 files=4 size=6.32KB
runtime filters: RF000 -> c.id, RF001 -> c.int_col
row-size=89B cardinality=100
====
# Tests elimination of redundant join predicates (IMPALA-912).
# This test relies on the desired join order b,a,c which requires
# inference of the predicate 'b.int_col = a.id' which makes one of
# the original predicates redundant.
select * from
functional.alltypestiny a,
functional.alltypes b,
functional.alltypessmall c
where a.id = c.id and b.int_col = c.int_col and b.int_col = c.id
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [INNER JOIN]
| hash predicates: a.id = c.id
| runtime filters: RF000 <- c.id
| row-size=267B cardinality=5.84K
|
|--02:SCAN HDFS [functional.alltypessmall c]
| HDFS partitions=4/4 files=4 size=6.32KB
| predicates: c.id = c.int_col
| row-size=89B cardinality=10
|
03:HASH JOIN [INNER JOIN]
| hash predicates: b.int_col = a.id
| runtime filters: RF002 <- a.id
| row-size=178B cardinality=5.84K
|
|--00:SCAN HDFS [functional.alltypestiny a]
| HDFS partitions=4/4 files=4 size=460B
| runtime filters: RF000 -> a.id
| row-size=89B cardinality=8
|
01:SCAN HDFS [functional.alltypes b]
HDFS partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> b.int_col, RF002 -> b.int_col
row-size=89B cardinality=7.30K
====
# Tests elimination of redundant join predicates (IMPALA-912)
# and that slot equivalences are enforced at the lowest possible plan node.
select a.* from
(select int_col, smallint_col, count(*) c from functional.alltypes
group by 1, 2) a
inner join
(select int_col, smallint_col, count(*) c from functional.alltypessmall
group by 1, 2) b
on a.int_col = b.int_col and a.int_col = b.smallint_col and a.int_col = b.c
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [INNER JOIN]
| hash predicates: int_col = int_col
| runtime filters: RF000 <- int_col
| row-size=28B cardinality=100
|
|--03:AGGREGATE [FINALIZE]
| | output: count(*)
| | group by: int_col, smallint_col
| | having: int_col = count(*)
| | row-size=14B cardinality=1
| |
| 02:SCAN HDFS [functional.alltypessmall]
| HDFS partitions=4/4 files=4 size=6.32KB
| predicates: functional.alltypessmall.int_col = functional.alltypessmall.smallint_col
| row-size=6B cardinality=10
|
01:AGGREGATE [FINALIZE]
| output: count(*)
| group by: int_col, smallint_col
| row-size=14B cardinality=100
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> functional.alltypes.int_col
row-size=6B cardinality=7.30K
====
# Test retainment of join predicates referencing outer-joined tuples
# that are otherwise redundant (equivalence class already covered
# by another predicate). Regression test for IMPALA-1118.
# Force a partitioned join to excercise removal of redundant hash exprs.
select t3.int_col, t2.bigint_col
from functional.alltypes t1
left outer join [shuffle] functional.alltypesagg t2
on t2.smallint_col = t1.smallint_col
inner join [shuffle] functional.alltypestiny t3
on t3.int_col = t1.smallint_col
# equivalence class already covered by predicates in scans, plus the join
# conditions up to here; still need to retain this predicate to remove
# tuples with t2.bigint_col IS NULL
and t3.int_col = t2.bigint_col
# predicate is redundant because t2.bigint_col IS NOT NULL has
# already been established by 't3.int_col = t2.bigint_col'
and t3.bigint_col = t2.bigint_col
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [INNER JOIN]
| hash predicates: t1.smallint_col = t3.int_col, t2.bigint_col = t3.int_col
| runtime filters: RF000 <- t3.int_col, RF001 <- t3.int_col
| row-size=24B cardinality=821
|
|--02:SCAN HDFS [functional.alltypestiny t3]
| HDFS partitions=4/4 files=4 size=460B
| predicates: t3.int_col = t3.bigint_col
| row-size=12B cardinality=1
|
03:HASH JOIN [RIGHT OUTER JOIN]
| hash predicates: t2.smallint_col = t1.smallint_col
| runtime filters: RF004 <- t1.smallint_col
| row-size=12B cardinality=827.84K
|
|--00:SCAN HDFS [functional.alltypes t1]
| HDFS partitions=24/24 files=24 size=478.45KB
| runtime filters: RF000 -> t1.smallint_col, RF001 -> t1.smallint_col
| row-size=2B cardinality=7.30K
|
01:SCAN HDFS [functional.alltypesagg t2]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> t2.smallint_col, RF001 -> t2.bigint_col, RF004 -> t2.smallint_col
row-size=10B cardinality=11.00K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
08:EXCHANGE [UNPARTITIONED]
|
04:HASH JOIN [INNER JOIN, PARTITIONED]
| hash predicates: t1.smallint_col = t3.int_col, t2.bigint_col = t3.int_col
| runtime filters: RF000 <- t3.int_col, RF001 <- t3.int_col
| row-size=24B cardinality=821
|
|--07:EXCHANGE [HASH(t3.int_col)]
| |
| 02:SCAN HDFS [functional.alltypestiny t3]
| HDFS partitions=4/4 files=4 size=460B
| predicates: t3.int_col = t3.bigint_col
| row-size=12B cardinality=1
|
03:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED]
| hash predicates: t2.smallint_col = t1.smallint_col
| runtime filters: RF004 <- t1.smallint_col
| row-size=12B cardinality=827.84K
|
|--06:EXCHANGE [HASH(t1.smallint_col)]
| |
| 00:SCAN HDFS [functional.alltypes t1]
| HDFS partitions=24/24 files=24 size=478.45KB
| runtime filters: RF000 -> t1.smallint_col, RF001 -> t1.smallint_col
| row-size=2B cardinality=7.30K
|
05:EXCHANGE [HASH(t2.smallint_col)]
|
01:SCAN HDFS [functional.alltypesagg t2]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> t2.smallint_col, RF001 -> t2.bigint_col, RF004 -> t2.smallint_col
row-size=10B cardinality=11.00K
====
# Test correct removal of redundant join predicates (IMPALA-1353):
# Equivalences among inline-view slots are enforced. The predicates
# are migrated into the inline view plan.
select 1
from functional.alltypes a
inner join
(select id+id as x, tinyint_col,
int_col*int_col as y, bigint_col
from functional.alltypessmall) b
on a.id = b.x and a.id = b.tinyint_col and
a.int_col = b.y and a.int_col = b.bigint_col
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [INNER JOIN]
| hash predicates: a.id = id + id, a.int_col = int_col * int_col
| runtime filters: RF000 <- id + id, RF001 <- int_col * int_col
| row-size=25B cardinality=7.30K
|
|--01:SCAN HDFS [functional.alltypessmall]
| HDFS partitions=4/4 files=4 size=6.32KB
| predicates: int_col * int_col = bigint_col, id + id = tinyint_col
| row-size=17B cardinality=10
|
00:SCAN HDFS [functional.alltypes a]
HDFS partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> a.id, RF001 -> a.int_col
row-size=8B cardinality=7.30K
====
# Test correct removal of redundant join predicates (IMPALA-1353):
# Equivalences among inline-view slots are enforced. The predicates
# are assigned to a SelectNode because the inline view has a limit.
select 1
from functional.alltypes a
inner join
(select id+id as x, tinyint_col,
int_col*int_col as y, bigint_col
from functional.alltypessmall
order by tinyint_col limit 20) b
on a.id = b.x and a.id = b.tinyint_col and
a.int_col = b.y and a.int_col = b.bigint_col
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [INNER JOIN]
| hash predicates: a.id = id + id, a.int_col = int_col * int_col
| runtime filters: RF000 <- id + id, RF001 <- int_col * int_col
| row-size=25B cardinality=7.30K
|
|--03:SELECT
| | predicates: int_col * int_col = bigint_col, id + id = tinyint_col
| | row-size=17B cardinality=2
| |
| 02:TOP-N [LIMIT=20]
| | order by: tinyint_col ASC
| | row-size=17B cardinality=20
| |
| 01:SCAN HDFS [functional.alltypessmall]
| HDFS partitions=4/4 files=4 size=6.32KB
| row-size=17B cardinality=100
|
00:SCAN HDFS [functional.alltypes a]
HDFS partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> a.id, RF001 -> a.int_col
row-size=8B cardinality=7.30K
====
# Test correct removal of redundant join predicates (IMPALA-1353):
# Equivalences among inline-view slots are enforced. The predicates
# are assigned to a SelectNode because the inline view has analytics.
select 1
from functional.alltypes a
inner join
(select id+id as x, sum(tinyint_col) over() tinyint_col,
int_col*int_col as y, count(bigint_col) over() bigint_col
from functional.alltypessmall
order by tinyint_col limit 20) b
on a.id = b.x and a.id = b.tinyint_col and
a.int_col = b.y and a.int_col = b.bigint_col
---- PLAN
PLAN-ROOT SINK
|
05:HASH JOIN [INNER JOIN]
| hash predicates: a.id = id + id, a.int_col = int_col * int_col
| runtime filters: RF000 <- id + id, RF001 <- int_col * int_col
| row-size=32B cardinality=7.30K
|
|--04:SELECT
| | predicates: id + id = sum(tinyint_col), int_col * int_col = count(bigint_col)
| | row-size=24B cardinality=2
| |
| 03:TOP-N [LIMIT=20]
| | order by: sum(tinyint_col) ASC
| | row-size=24B cardinality=20
| |
| 02:ANALYTIC
| | functions: sum(tinyint_col), count(bigint_col)
| | row-size=33B cardinality=100
| |
| 01:SCAN HDFS [functional.alltypessmall]
| HDFS partitions=4/4 files=4 size=6.32KB
| row-size=17B cardinality=100
|
00:SCAN HDFS [functional.alltypes a]
HDFS partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> a.id, RF001 -> a.int_col
row-size=8B cardinality=7.30K
====
# IMPALA-1353/IMPALA-4916: Test correct removal of redundant join predicates.
select 1
from functional.alltypes a
inner join
(select id+id x, tinyint_col, int_col*int_col y, bigint_col
from functional.alltypessmall
union all
select id-id x, tinyint_col, int_col/int_col y, bigint_col
from functional.alltypestiny) b
on a.id = b.x and a.id = b.tinyint_col and
a.int_col = b.y and a.int_col = b.bigint_col
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [INNER JOIN]
| hash predicates: a.id = x, a.int_col = y
| runtime filters: RF000 <- x, RF001 <- y
| row-size=33B cardinality=7.30K
|
|--01:UNION
| | row-size=25B cardinality=11
| |
| |--03:SCAN HDFS [functional.alltypestiny]
| | HDFS partitions=4/4 files=4 size=460B
| | predicates: id - id = tinyint_col, int_col / int_col = bigint_col
| | row-size=17B cardinality=1
| |
| 02:SCAN HDFS [functional.alltypessmall]
| HDFS partitions=4/4 files=4 size=6.32KB
| predicates: int_col * int_col = bigint_col, id + id = tinyint_col
| row-size=17B cardinality=10
|
00:SCAN HDFS [functional.alltypes a]
HDFS partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> a.id, RF001 -> a.int_col
row-size=8B cardinality=7.30K
====
# Test creation of predicates at a join node for constructing the
# minimum spanning tree to cover known slot equivalences (IMPALA-1102).
select straight_join t1.smallint_col AS smallint_col_1
from functional.alltypesagg t1
inner join functional.alltypestiny t3
on t3.smallint_col = t1.tinyint_col
inner join functional.alltypes t2
on t2.string_col = t1.string_col and t3.date_string_col = t2.string_col
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [INNER JOIN]
| hash predicates: t1.string_col = t2.string_col
| runtime filters: RF000 <- t2.string_col
| row-size=53B cardinality=33.58K
|
|--02:SCAN HDFS [functional.alltypes t2]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=13B cardinality=7.30K
|
03:HASH JOIN [INNER JOIN]
| hash predicates: t1.tinyint_col = t3.smallint_col, t1.string_col = t3.date_string_col
| runtime filters: RF002 <- t3.smallint_col, RF003 <- t3.date_string_col
| row-size=40B cardinality=46
|
|--01:SCAN HDFS [functional.alltypestiny t3]
| HDFS partitions=4/4 files=4 size=460B
| runtime filters: RF000 -> t3.date_string_col
| row-size=22B cardinality=8
|
00:SCAN HDFS [functional.alltypesagg t1]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> t1.string_col, RF002 -> t1.tinyint_col, RF003 -> t1.string_col
row-size=18B cardinality=11.00K
====
# Regression test for IMPALA-935.
select 1 from
(select int_col from functional.alltypessmall group by int_col) a
full outer join
(select distinct bigint_col from functional.alltypestiny) b
on (a.int_col = b.bigint_col)
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [FULL OUTER JOIN]
| hash predicates: int_col = bigint_col
| row-size=12B cardinality=20
|
|--03:AGGREGATE [FINALIZE]
| | group by: bigint_col
| | row-size=8B cardinality=2
| |
| 02:SCAN HDFS [functional.alltypestiny]
| HDFS partitions=4/4 files=4 size=460B
| row-size=8B cardinality=8
|
01:AGGREGATE [FINALIZE]
| group by: int_col
| row-size=4B cardinality=10
|
00:SCAN HDFS [functional.alltypessmall]
HDFS partitions=4/4 files=4 size=6.32KB
row-size=4B cardinality=100
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
09:EXCHANGE [UNPARTITIONED]
|
04:HASH JOIN [FULL OUTER JOIN, PARTITIONED]
| hash predicates: int_col = bigint_col
| row-size=12B cardinality=20
|
|--08:AGGREGATE [FINALIZE]
| | group by: bigint_col
| | row-size=8B cardinality=2
| |
| 07:EXCHANGE [HASH(bigint_col)]
| |
| 03:AGGREGATE [STREAMING]
| | group by: bigint_col
| | row-size=8B cardinality=2
| |
| 02:SCAN HDFS [functional.alltypestiny]
| HDFS partitions=4/4 files=4 size=460B
| row-size=8B cardinality=8
|
06:AGGREGATE [FINALIZE]
| group by: int_col
| row-size=4B cardinality=10
|
05:EXCHANGE [HASH(int_col)]
|
01:AGGREGATE [STREAMING]
| group by: int_col
| row-size=4B cardinality=10
|
00:SCAN HDFS [functional.alltypessmall]
HDFS partitions=4/4 files=4 size=6.32KB
row-size=4B cardinality=100
====
# Test joins with union inputs. One side of the join is a union.
select a.id, b.id, a.string_col, b.string_col
from
(select id, string_col from functional.alltypessmall
where year = 2009 and month = 1
union all
select id, string_col from functional.alltypessmall
where year = 2009 and month = 2
union all
select 0, '1234') a
inner join
functional.alltypestiny b
on a.id = b.id
where b.id < 5
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
07:EXCHANGE [UNPARTITIONED]
|
04:HASH JOIN [INNER JOIN, PARTITIONED]
| hash predicates: b.id = id
| runtime filters: RF000 <- id
| row-size=34B cardinality=7
|
|--06:EXCHANGE [HASH(id)]
| |
| 00:UNION
| | constant-operands=1
| | row-size=17B cardinality=7
| |
| |--02:SCAN HDFS [functional.alltypessmall]
| | partition predicates: `year` = 2009, `month` = 2
| | HDFS partitions=1/4 files=1 size=1.58KB
| | predicates: functional.alltypessmall.id < 5
| | row-size=17B cardinality=3
| |
| 01:SCAN HDFS [functional.alltypessmall]
| partition predicates: `year` = 2009, `month` = 1
| HDFS partitions=1/4 files=1 size=1.57KB
| predicates: functional.alltypessmall.id < 5
| row-size=17B cardinality=3
|
05:EXCHANGE [HASH(b.id)]
|
03:SCAN HDFS [functional.alltypestiny b]
HDFS partitions=4/4 files=4 size=460B
predicates: b.id < 5
runtime filters: RF000 -> b.id
row-size=17B cardinality=1
====
# Test joins with union inputs. One input is a union.
select a.id, b.id, a.string_col, b.string_col
from
functional.alltypestiny b
left outer join
(select id, string_col from functional.alltypessmall
where year = 2009 and month = 1
union all
select id, string_col from functional.alltypessmall
where year = 2009 and month = 2
union all
select 0, '1234') a
on a.id = b.id
where b.id < 5
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
07:EXCHANGE [UNPARTITIONED]
|
04:HASH JOIN [LEFT OUTER JOIN, PARTITIONED]
| hash predicates: b.id = id
| row-size=34B cardinality=1
|
|--06:EXCHANGE [HASH(id)]
| |
| 01:UNION
| | constant-operands=1
| | row-size=17B cardinality=7
| |
| |--03:SCAN HDFS [functional.alltypessmall]
| | partition predicates: `year` = 2009, `month` = 2
| | HDFS partitions=1/4 files=1 size=1.58KB
| | predicates: functional.alltypessmall.id < 5
| | row-size=17B cardinality=3
| |
| 02:SCAN HDFS [functional.alltypessmall]
| partition predicates: `year` = 2009, `month` = 1
| HDFS partitions=1/4 files=1 size=1.57KB
| predicates: functional.alltypessmall.id < 5
| row-size=17B cardinality=3
|
05:EXCHANGE [HASH(b.id)]
|
00:SCAN HDFS [functional.alltypestiny b]
HDFS partitions=4/4 files=4 size=460B
predicates: b.id < 5
row-size=17B cardinality=1
====
# Test joins with union inputs. Both inputs are a union.
select a.id, b.id, a.string_col, b.string_col
from
(select id, string_col from functional.alltypessmall
where year = 2009 and month = 1
union all
select id, string_col from functional.alltypessmall
where year = 2009 and month = 2
union all
select 0, '1234') a
full outer join
(select id, string_col from functional.alltypessmall
where year = 2009 and month = 1
union all
select id, string_col from functional.alltypessmall
where year = 2009 and month = 2
union all
select 0, '5678') b
on a.id = b.id
where b.id < 5
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
09:EXCHANGE [UNPARTITIONED]
|
06:HASH JOIN [FULL OUTER JOIN, PARTITIONED]
| hash predicates: id = id
| other predicates: id < 5
| row-size=34B cardinality=58
|
|--08:EXCHANGE [HASH(id)]
| |
| 03:UNION
| | constant-operands=1
| | row-size=17B cardinality=7
| |
| |--05:SCAN HDFS [functional.alltypessmall]
| | partition predicates: `year` = 2009, `month` = 2
| | HDFS partitions=1/4 files=1 size=1.58KB
| | predicates: functional.alltypessmall.id < 5
| | row-size=17B cardinality=3
| |
| 04:SCAN HDFS [functional.alltypessmall]
| partition predicates: `year` = 2009, `month` = 1
| HDFS partitions=1/4 files=1 size=1.57KB
| predicates: functional.alltypessmall.id < 5
| row-size=17B cardinality=3
|
07:EXCHANGE [HASH(id)]
|
00:UNION
| constant-operands=1
| row-size=17B cardinality=51
|
|--02:SCAN HDFS [functional.alltypessmall]
| partition predicates: `year` = 2009, `month` = 2
| HDFS partitions=1/4 files=1 size=1.58KB
| row-size=17B cardinality=25
|
01:SCAN HDFS [functional.alltypessmall]
partition predicates: `year` = 2009, `month` = 1
HDFS partitions=1/4 files=1 size=1.57KB
row-size=17B cardinality=25
====
# Test joins with decimals with different precision and scale
# Regression test for IMPALA-1121
select straight_join count(*)
from functional.decimal_tbl a join functional.decimal_tbl b on a.d1 = b.d5
---- PLAN
PLAN-ROOT SINK
|
03:AGGREGATE [FINALIZE]
| output: count(*)
| row-size=8B cardinality=1
|
02:HASH JOIN [INNER JOIN]
| hash predicates: a.d1 = b.d5
| runtime filters: RF000 <- b.d5
| row-size=12B cardinality=unavailable
|
|--01:SCAN HDFS [functional.decimal_tbl b]
| HDFS partitions=1/1 files=1 size=195B
| row-size=8B cardinality=unavailable
|
00:SCAN HDFS [functional.decimal_tbl a]
HDFS partitions=1/1 files=1 size=195B
runtime filters: RF000 -> a.d1
row-size=4B cardinality=unavailable
====
# Test left semi join including correct predicate assignment and propagation
select j.* from functional.jointbl j left semi join functional.dimtbl d
on (j.test_id = d.id and j.test_zip < d.zip and d.name = 'Name2')
where j.test_id < 10
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: j.test_id = d.id
| other join predicates: j.test_zip < d.zip
| runtime filters: RF000 <- d.id
| row-size=33B cardinality=1
|
|--01:SCAN HDFS [functional.dimtbl d]
| HDFS partitions=1/1 files=1 size=171B
| predicates: d.id < 10, d.name = 'Name2'
| row-size=29B cardinality=1
|
00:SCAN HDFS [functional.jointbl j]
HDFS partitions=1/1 files=1 size=433B
predicates: j.test_id < 10
runtime filters: RF000 -> j.test_id
row-size=33B cardinality=2
====
# Test right semi join including correct predicate assignment and propagation
select b.* from functional.alltypes a right semi join functional.alltypestiny b
on (a.id = b.id and a.int_col < b.int_col and a.bool_col = false)
where b.id < 10
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [RIGHT SEMI JOIN]
| hash predicates: a.id = b.id
| other join predicates: a.int_col < b.int_col
| runtime filters: RF000 <- b.id
| row-size=89B cardinality=1
|
|--01:SCAN HDFS [functional.alltypestiny b]
| HDFS partitions=4/4 files=4 size=460B
| predicates: b.id < 10
| row-size=89B cardinality=1
|
00:SCAN HDFS [functional.alltypes a]
HDFS partitions=24/24 files=24 size=478.45KB
predicates: a.id < 10, a.bool_col = FALSE
runtime filters: RF000 -> a.id
row-size=9B cardinality=516
====
# Test left anti join including correct predicate assignment and propagation
select j.* from functional.jointbl j left anti join functional.dimtbl d
on (j.test_id = d.id and j.test_zip < d.zip and d.name = 'Name2')
where j.test_id < 10
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [LEFT ANTI JOIN]
| hash predicates: j.test_id = d.id
| other join predicates: j.test_zip < d.zip
| row-size=33B cardinality=2
|
|--01:SCAN HDFS [functional.dimtbl d]
| HDFS partitions=1/1 files=1 size=171B
| predicates: d.id < 10, d.name = 'Name2'
| row-size=29B cardinality=1
|
00:SCAN HDFS [functional.jointbl j]
HDFS partitions=1/1 files=1 size=433B
predicates: j.test_id < 10
row-size=33B cardinality=2
====
# Test query with anti join and inner join and predicates
select count(*) from functional.JoinTbl j
left anti join functional.DimTbl d on j.test_id = d.id
inner join functional.JoinTbl k on j.test_id = k.test_id and j.alltypes_id = 5000
---- PLAN
PLAN-ROOT SINK
|
06:AGGREGATE [FINALIZE]
| output: count(*)
| row-size=8B cardinality=1
|
05:HASH JOIN [INNER JOIN]
| hash predicates: k.test_id = j.test_id
| runtime filters: RF000 <- j.test_id
| row-size=20B cardinality=27
|
|--04:HASH JOIN [LEFT ANTI JOIN]
| | hash predicates: j.test_id = d.id
| | row-size=12B cardinality=10
| |
| |--01:SCAN HDFS [functional.dimtbl d]
| | HDFS partitions=1/1 files=1 size=171B
| | row-size=8B cardinality=10
| |
| 00:SCAN HDFS [functional.jointbl j]
| HDFS partitions=1/1 files=1 size=433B
| predicates: j.alltypes_id = 5000
| row-size=12B cardinality=10
|
02:SCAN HDFS [functional.jointbl k]
HDFS partitions=1/1 files=1 size=433B
runtime filters: RF000 -> k.test_id
row-size=8B cardinality=19
====
# Test legacy-style join hints.
select straight_join * from functional.alltypestiny a
inner join [broadcast] functional.alltypes b on a.id = b.id
inner join [shuffle] functional.alltypessmall c on b.id = c.id
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
08:EXCHANGE [UNPARTITIONED]
|
04:HASH JOIN [INNER JOIN, PARTITIONED]
| hash predicates: b.id = c.id
| runtime filters: RF000 <- c.id
| row-size=267B cardinality=1
|
|--07:EXCHANGE [HASH(c.id)]
| |
| 02:SCAN HDFS [functional.alltypessmall c]
| HDFS partitions=4/4 files=4 size=6.32KB
| row-size=89B cardinality=100
|
06:EXCHANGE [HASH(b.id)]
|
03:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: a.id = b.id
| runtime filters: RF002 <- b.id
| row-size=178B cardinality=8
|
|--05:EXCHANGE [BROADCAST]
| |
| 01:SCAN HDFS [functional.alltypes b]
| HDFS partitions=24/24 files=24 size=478.45KB
| runtime filters: RF000 -> b.id
| row-size=89B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypestiny a]
HDFS partitions=4/4 files=4 size=460B
runtime filters: RF000 -> a.id, RF002 -> a.id
row-size=89B cardinality=8
====
# Test traditional commented join hints.
select /* +straight_join */ * from functional.alltypestiny a
inner join /* +broadcast */ functional.alltypes b on a.id = b.id
inner join /* +shuffle */ functional.alltypessmall c on b.id = c.id
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
08:EXCHANGE [UNPARTITIONED]
|
04:HASH JOIN [INNER JOIN, PARTITIONED]
| hash predicates: b.id = c.id
| runtime filters: RF000 <- c.id
| row-size=267B cardinality=1
|
|--07:EXCHANGE [HASH(c.id)]
| |
| 02:SCAN HDFS [functional.alltypessmall c]
| HDFS partitions=4/4 files=4 size=6.32KB
| row-size=89B cardinality=100
|
06:EXCHANGE [HASH(b.id)]
|
03:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: a.id = b.id
| runtime filters: RF002 <- b.id
| row-size=178B cardinality=8
|
|--05:EXCHANGE [BROADCAST]
| |
| 01:SCAN HDFS [functional.alltypes b]
| HDFS partitions=24/24 files=24 size=478.45KB
| runtime filters: RF000 -> b.id
| row-size=89B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypestiny a]
HDFS partitions=4/4 files=4 size=460B
runtime filters: RF000 -> a.id, RF002 -> a.id
row-size=89B cardinality=8
====
# Test end-of-line commented join hints.
select
-- +straight_join
* from functional.alltypestiny a
inner join
-- +broadcast
functional.alltypes b on a.id = b.id
inner join
-- +shuffle
functional.alltypessmall c on b.id = c.id
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
08:EXCHANGE [UNPARTITIONED]
|
04:HASH JOIN [INNER JOIN, PARTITIONED]
| hash predicates: b.id = c.id
| runtime filters: RF000 <- c.id
| row-size=267B cardinality=1
|
|--07:EXCHANGE [HASH(c.id)]
| |
| 02:SCAN HDFS [functional.alltypessmall c]
| HDFS partitions=4/4 files=4 size=6.32KB
| row-size=89B cardinality=100
|
06:EXCHANGE [HASH(b.id)]
|
03:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: a.id = b.id
| runtime filters: RF002 <- b.id
| row-size=178B cardinality=8
|
|--05:EXCHANGE [BROADCAST]
| |
| 01:SCAN HDFS [functional.alltypes b]
| HDFS partitions=24/24 files=24 size=478.45KB
| runtime filters: RF000 -> b.id
| row-size=89B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypestiny a]
HDFS partitions=4/4 files=4 size=460B
runtime filters: RF000 -> a.id, RF002 -> a.id
row-size=89B cardinality=8
====
# Regression test for IMPALA-1289. Predicates should be assigned correctly
# to inverted joins.
select * from functional.alltypestiny a
inner join functional.alltypestiny b
on (a.id = b.id)
# should be inverted
cross join functional.alltypes c
# assigned to inverted cross join
where c.id != b.id
---- PLAN
PLAN-ROOT SINK
|
04:NESTED LOOP JOIN [INNER JOIN]
| predicates: c.id != b.id
| row-size=267B cardinality=8
|
|--03:HASH JOIN [INNER JOIN]
| | hash predicates: a.id = b.id
| | runtime filters: RF000 <- b.id
| | row-size=178B cardinality=8
| |
| |--01:SCAN HDFS [functional.alltypestiny b]
| | HDFS partitions=4/4 files=4 size=460B
| | row-size=89B cardinality=8
| |
| 00:SCAN HDFS [functional.alltypestiny a]
| HDFS partitions=4/4 files=4 size=460B
| runtime filters: RF000 -> a.id
| row-size=89B cardinality=8
|
02:SCAN HDFS [functional.alltypes c]
HDFS partitions=24/24 files=24 size=478.45KB
row-size=89B cardinality=7.30K
====
# Tests the generation of a distributed plan in which the input fragments
# of a join node have compatible but not the same number of partitioning exprs with
# the corresponding join exprs (IMPALA-1307).
# TODO: Instead of adding redundant hash exprs to the new exchange node, we
# should remove redundant group by expressions.
select * from
(select tinyint_col, int_col
from functional.alltypestiny
group by tinyint_col, int_col) v1
inner join
(select tinyint_col, int_col, bigint_col
from functional.alltypessmall
group by tinyint_col, int_col, bigint_col) v2
on (v1.tinyint_col = v2.tinyint_col and
v1.tinyint_col = v2.int_col and
v1.tinyint_col = v2.bigint_col)
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
10:EXCHANGE [UNPARTITIONED]
|
04:HASH JOIN [INNER JOIN, PARTITIONED]
| hash predicates: tinyint_col = tinyint_col
| runtime filters: RF000 <- tinyint_col
| row-size=18B cardinality=40
|
|--09:EXCHANGE [HASH(tinyint_col,tinyint_col,tinyint_col)]
| |
| 08:AGGREGATE [FINALIZE]
| | group by: tinyint_col, int_col
| | row-size=5B cardinality=4
| |
| 07:EXCHANGE [HASH(tinyint_col,int_col)]
| |
| 01:AGGREGATE [STREAMING]
| | group by: tinyint_col, int_col
| | row-size=5B cardinality=4
| |
| 00:SCAN HDFS [functional.alltypestiny]
| HDFS partitions=4/4 files=4 size=460B
| row-size=5B cardinality=8
|
06:AGGREGATE [FINALIZE]
| group by: tinyint_col, int_col, bigint_col
| row-size=13B cardinality=10
|
05:EXCHANGE [HASH(tinyint_col,int_col,bigint_col)]
|
03:AGGREGATE [STREAMING]
| group by: tinyint_col, int_col, bigint_col
| row-size=13B cardinality=10
|
02:SCAN HDFS [functional.alltypessmall]
HDFS partitions=4/4 files=4 size=6.32KB
predicates: functional.alltypessmall.tinyint_col = functional.alltypessmall.bigint_col, functional.alltypessmall.tinyint_col = functional.alltypessmall.int_col
runtime filters: RF000 -> functional.alltypessmall.tinyint_col
row-size=13B cardinality=10
====
# Same as above but with a full outer join.
# Tests the generation of a distributed plan in which the input fragments
# of a join node have compatible but not the same number of partitioning exprs with
# the corresponding join exprs (IMPALA-1307).
# TODO: Instead of adding redundant hash exprs to the new exchange node, we
# should remove redundant group by expressions.
select * from
(select tinyint_col, int_col
from functional.alltypestiny
group by tinyint_col, int_col) v1
full outer join
(select tinyint_col, int_col, bigint_col
from functional.alltypessmall
group by tinyint_col, int_col, bigint_col) v2
on (v1.tinyint_col = v2.tinyint_col and
v1.tinyint_col = v2.int_col and
v1.tinyint_col = v2.bigint_col)
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
10:EXCHANGE [UNPARTITIONED]
|
04:HASH JOIN [FULL OUTER JOIN, PARTITIONED]
| hash predicates: tinyint_col = tinyint_col, bigint_col = tinyint_col, int_col = tinyint_col
| row-size=18B cardinality=104
|
|--09:EXCHANGE [HASH(tinyint_col,tinyint_col,tinyint_col)]
| |
| 08:AGGREGATE [FINALIZE]
| | group by: tinyint_col, int_col
| | row-size=5B cardinality=4
| |
| 07:EXCHANGE [HASH(tinyint_col,int_col)]
| |
| 01:AGGREGATE [STREAMING]
| | group by: tinyint_col, int_col
| | row-size=5B cardinality=4
| |
| 00:SCAN HDFS [functional.alltypestiny]
| HDFS partitions=4/4 files=4 size=460B
| row-size=5B cardinality=8
|
06:AGGREGATE [FINALIZE]
| group by: tinyint_col, int_col, bigint_col
| row-size=13B cardinality=100
|
05:EXCHANGE [HASH(tinyint_col,int_col,bigint_col)]
|
03:AGGREGATE [STREAMING]
| group by: tinyint_col, int_col, bigint_col
| row-size=13B cardinality=100
|
02:SCAN HDFS [functional.alltypessmall]
HDFS partitions=4/4 files=4 size=6.32KB
row-size=13B cardinality=100
====
# Test conservative treatment of partition-compatible input fragments
# to a partitioned hash join. Making the partitions physically compatible
# would require changing all senders to the lhs or rhs fragment, so we
# do not perform this optimization. It is expected that the join is placed
# into the lhs compatible fragment, and the rhs is exchanged by enforcing
# physical hash-expr compatibility (IMPALA-1324).
select straight_join * from
(select bigint_col, string_col
from functional.alltypestiny
group by bigint_col, string_col) v1
inner join
(select string_col, bigint_col
from functional.alltypessmall
group by string_col, bigint_col) v2
on (v1.string_col = v2.string_col and
v1.bigint_col = v2.bigint_col)
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
10:EXCHANGE [UNPARTITIONED]
|
04:HASH JOIN [INNER JOIN, PARTITIONED]
| hash predicates: bigint_col = bigint_col, string_col = string_col
| runtime filters: RF000 <- bigint_col, RF001 <- string_col
| row-size=42B cardinality=4
|
|--09:EXCHANGE [HASH(bigint_col,string_col)]
| |
| 08:AGGREGATE [FINALIZE]
| | group by: string_col, bigint_col
| | row-size=21B cardinality=100
| |
| 07:EXCHANGE [HASH(string_col,bigint_col)]
| |
| 03:AGGREGATE [STREAMING]
| | group by: string_col, bigint_col
| | row-size=21B cardinality=100
| |
| 02:SCAN HDFS [functional.alltypessmall]
| HDFS partitions=4/4 files=4 size=6.32KB
| row-size=21B cardinality=100
|
06:AGGREGATE [FINALIZE]
| group by: bigint_col, string_col
| row-size=21B cardinality=4
|
05:EXCHANGE [HASH(bigint_col,string_col)]
|
01:AGGREGATE [STREAMING]
| group by: bigint_col, string_col
| row-size=21B cardinality=4
|
00:SCAN HDFS [functional.alltypestiny]
HDFS partitions=4/4 files=4 size=460B
runtime filters: RF000 -> functional.alltypestiny.bigint_col, RF001 -> functional.alltypestiny.string_col
row-size=21B cardinality=8
====
# Test that hash exprs are re-ordered as necessary when placing a join into
# a fragment with a compatible data partition (IMPALA-1324)
select straight_join * from
(select distinct int_col, string_col
from functional.alltypestiny) a
left semi join functional.alltypes b
# predicates are in reverse order of compatible group by exprs
on (a.string_col = b.string_col and a.int_col = b.int_col)
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
07:EXCHANGE [UNPARTITIONED]
|
03:HASH JOIN [LEFT SEMI JOIN, PARTITIONED]
| hash predicates: int_col = b.int_col, string_col = b.string_col
| runtime filters: RF000 <- b.int_col, RF001 <- b.string_col
| row-size=17B cardinality=4
|
|--06:EXCHANGE [HASH(b.int_col,b.string_col)]
| |
| 02:SCAN HDFS [functional.alltypes b]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=17B cardinality=7.30K
|
05:AGGREGATE [FINALIZE]
| group by: int_col, string_col
| row-size=17B cardinality=4
|
04:EXCHANGE [HASH(int_col,string_col)]
|
01:AGGREGATE [STREAMING]
| group by: int_col, string_col
| row-size=17B cardinality=4
|
00:SCAN HDFS [functional.alltypestiny]
HDFS partitions=4/4 files=4 size=460B
runtime filters: RF000 -> functional.alltypestiny.int_col, RF001 -> functional.alltypestiny.string_col
row-size=17B cardinality=8
====
# Same as above but with the join inverted.
select straight_join * from
functional.alltypes a
right semi join
(select distinct int_col, string_col
from functional.alltypestiny) b
# predicates are in reverse order of compatible group by exprs
on (a.string_col = b.string_col and a.int_col = b.int_col)
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
07:EXCHANGE [UNPARTITIONED]
|
03:HASH JOIN [RIGHT SEMI JOIN, PARTITIONED]
| hash predicates: a.int_col = int_col, a.string_col = string_col
| runtime filters: RF000 <- int_col, RF001 <- string_col
| row-size=17B cardinality=4
|
|--05:AGGREGATE [FINALIZE]
| | group by: int_col, string_col
| | row-size=17B cardinality=4
| |
| 04:EXCHANGE [HASH(int_col,string_col)]
| |
| 02:AGGREGATE [STREAMING]
| | group by: int_col, string_col
| | row-size=17B cardinality=4
| |
| 01:SCAN HDFS [functional.alltypestiny]
| HDFS partitions=4/4 files=4 size=460B
| row-size=17B cardinality=8
|
06:EXCHANGE [HASH(a.int_col,a.string_col)]
|
00:SCAN HDFS [functional.alltypes a]
HDFS partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> a.int_col, RF001 -> a.string_col
row-size=17B cardinality=7.30K
====
# Complex combined regression test for IMPALA-1307 and IMPALA-1324
select straight_join * from
(select distinct string_col, int_col, smallint_col
from functional.alltypessmall) a
inner join [shuffle]
(select distinct tinyint_col, smallint_col, int_col, string_col
from functional.alltypestiny) b
on (a.int_col = b.int_col and a.smallint_col = b.int_col and
a.smallint_col = b.smallint_col and
a.string_col = b.string_col)
inner join
(select distinct string_col, tinyint_col, smallint_col, int_col, bigint_col
from functional.alltypes
where bigint_col = smallint_col and smallint_col = tinyint_col) c
on (b.int_col = c.smallint_col and b.string_col = c.string_col)
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
16:EXCHANGE [UNPARTITIONED]
|
07:HASH JOIN [INNER JOIN, PARTITIONED]
| hash predicates: int_col = smallint_col, string_col = string_col
| runtime filters: RF000 <- smallint_col, RF001 <- string_col
| row-size=67B cardinality=3.65K
|
|--15:EXCHANGE [HASH(string_col,smallint_col,smallint_col)]
| |
| 14:AGGREGATE [FINALIZE]
| | group by: string_col, tinyint_col, smallint_col, int_col, bigint_col
| | row-size=28B cardinality=730
| |
| 13:EXCHANGE [HASH(string_col,tinyint_col,smallint_col,int_col,bigint_col)]
| |
| 05:AGGREGATE [STREAMING]
| | group by: string_col, tinyint_col, smallint_col, int_col, bigint_col
| | row-size=28B cardinality=730
| |
| 04:SCAN HDFS [functional.alltypes]
| HDFS partitions=24/24 files=24 size=478.45KB
| predicates: bigint_col = smallint_col, smallint_col = tinyint_col
| row-size=28B cardinality=730
|
06:HASH JOIN [INNER JOIN, PARTITIONED]
| hash predicates: int_col = int_col, string_col = string_col
| runtime filters: RF004 <- int_col, RF005 <- string_col
| row-size=39B cardinality=10
|
|--12:EXCHANGE [HASH(string_col,int_col,int_col)]
| |
| 11:AGGREGATE [FINALIZE]
| | group by: tinyint_col, smallint_col, int_col, string_col
| | row-size=20B cardinality=1
| |
| 10:EXCHANGE [HASH(tinyint_col,smallint_col,int_col,string_col)]
| |
| 03:AGGREGATE [STREAMING]
| | group by: tinyint_col, smallint_col, int_col, string_col
| | row-size=20B cardinality=1
| |
| 02:SCAN HDFS [functional.alltypestiny]
| HDFS partitions=4/4 files=4 size=460B
| predicates: functional.alltypestiny.int_col = functional.alltypestiny.smallint_col
| runtime filters: RF000 -> functional.alltypestiny.smallint_col, RF001 -> functional.alltypestiny.string_col
| row-size=20B cardinality=1
|
09:AGGREGATE [FINALIZE]
| group by: string_col, int_col, smallint_col
| row-size=19B cardinality=10
|
08:EXCHANGE [HASH(string_col,int_col,smallint_col)]
|
01:AGGREGATE [STREAMING]
| group by: string_col, int_col, smallint_col
| row-size=19B cardinality=10
|
00:SCAN HDFS [functional.alltypessmall]
HDFS partitions=4/4 files=4 size=6.32KB
predicates: functional.alltypessmall.smallint_col = functional.alltypessmall.int_col
runtime filters: RF000 -> functional.alltypessmall.int_col, RF001 -> functional.alltypessmall.string_col, RF004 -> functional.alltypessmall.int_col, RF005 -> functional.alltypessmall.string_col
row-size=19B cardinality=10
====
# Assignment of predicates from the On-clause of an
# anti join; inner join followed by anti join (IMPALA-1387)
select a.*
from functional.alltypes a inner join functional.alltypes b
on a.id = b.id
where not exists (select *
from functional.alltypes c
# Predicate on a (visible side of anti join) is assigned to the anti join, predicate on
# a and b is also assigned to the anti join
where a.id = c.id and a.int_col = b.int_col and a.tinyint_col = 10
# Predicate on c (invisible side of anti join) is assigned to the scan node
and c.int_col < 10)
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [LEFT ANTI JOIN]
| hash predicates: a.id = c.id
| other join predicates: a.tinyint_col = 10, a.int_col = b.int_col
| row-size=97B cardinality=7.30K
|
|--02:SCAN HDFS [functional.alltypes c]
| HDFS partitions=24/24 files=24 size=478.45KB
| predicates: c.int_col < 10
| row-size=8B cardinality=730
|
03:HASH JOIN [INNER JOIN]
| hash predicates: a.id = b.id
| runtime filters: RF000 <- b.id
| row-size=97B cardinality=7.30K
|
|--01:SCAN HDFS [functional.alltypes b]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=8B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypes a]
HDFS partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> a.id
row-size=89B cardinality=7.30K
====
# Assignment of predicates from the On-clause of an anti join;
# sequence of inner joins interleaved with an anti join.
select *
from functional.alltypes a inner join functional.alltypes b
on (a.id = b.id)
left anti join functional.alltypestiny c
# Predicate on a and b from the On-clause is assigned to the anti join,
# predicate on c is assigned to the scan node
on (a.int_col = c.int_col and a.tinyint_col = b.tinyint_col and a.float_col = 2.1
and c.bool_col = false)
inner join functional.alltypesagg d
on (a.tinyint_col = d.tinyint_col and a.int_col < 10)
where a.float_col < b.float_col
---- PLAN
PLAN-ROOT SINK
|
08:HASH JOIN [INNER JOIN]
| hash predicates: d.tinyint_col = a.tinyint_col
| runtime filters: RF000 <- a.tinyint_col
| row-size=273B cardinality=892.22K
|
|--07:HASH JOIN [LEFT ANTI JOIN]
| | hash predicates: a.int_col = c.int_col
| | other join predicates: a.float_col = 2.1, a.tinyint_col = b.tinyint_col
| | row-size=178B cardinality=730
| |
| |--02:SCAN HDFS [functional.alltypestiny c]
| | HDFS partitions=4/4 files=4 size=460B
| | predicates: c.int_col < 10, c.bool_col = FALSE
| | row-size=5B cardinality=1
| |
| 06:HASH JOIN [INNER JOIN]
| | hash predicates: b.id = a.id
| | other predicates: a.float_col < b.float_col
| | runtime filters: RF002 <- a.id
| | row-size=178B cardinality=730
| |
| |--00:SCAN HDFS [functional.alltypes a]
| | HDFS partitions=24/24 files=24 size=478.45KB
| | predicates: a.int_col < 10
| | row-size=89B cardinality=730
| |
| 01:SCAN HDFS [functional.alltypes b]
| HDFS partitions=24/24 files=24 size=478.45KB
| runtime filters: RF002 -> b.id
| row-size=89B cardinality=7.30K
|
03:SCAN HDFS [functional.alltypesagg d]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> d.tinyint_col
row-size=95B cardinality=11.00K
====
# Inner joins with non-equi join predicates
select *
from functional.alltypestiny a inner join functional.alltypessmall b
on a.id < b.id
inner join functional.alltypes c
on a.int_col != c.int_col
inner join functional.alltypesagg d
on b.tinyint_col > d.int_col or b.id != d.id
where a.int_col = 10 and c.bigint_col = d.bigint_col
---- PLAN
PLAN-ROOT SINK
|
06:NESTED LOOP JOIN [INNER JOIN]
| predicates: a.id < b.id, b.tinyint_col > d.int_col OR b.id != d.id
| row-size=362B cardinality=79.66K
|
|--01:SCAN HDFS [functional.alltypessmall b]
| HDFS partitions=4/4 files=4 size=6.32KB
| row-size=89B cardinality=100
|
05:NESTED LOOP JOIN [INNER JOIN]
| predicates: a.int_col != c.int_col
| row-size=273B cardinality=79.66K
|
|--00:SCAN HDFS [functional.alltypestiny a]
| HDFS partitions=4/4 files=4 size=460B
| predicates: a.int_col = 10
| row-size=89B cardinality=4
|
04:HASH JOIN [INNER JOIN]
| hash predicates: d.bigint_col = c.bigint_col
| runtime filters: RF000 <- c.bigint_col
| row-size=184B cardinality=79.66K
|
|--02:SCAN HDFS [functional.alltypes c]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=89B cardinality=7.30K
|
03:SCAN HDFS [functional.alltypesagg d]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> d.bigint_col
row-size=95B cardinality=11.00K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
11:EXCHANGE [UNPARTITIONED]
|
06:NESTED LOOP JOIN [INNER JOIN, BROADCAST]
| predicates: a.id < b.id, b.tinyint_col > d.int_col OR b.id != d.id
| row-size=362B cardinality=79.66K
|
|--10:EXCHANGE [BROADCAST]
| |
| 01:SCAN HDFS [functional.alltypessmall b]
| HDFS partitions=4/4 files=4 size=6.32KB
| row-size=89B cardinality=100
|
05:NESTED LOOP JOIN [INNER JOIN, BROADCAST]
| predicates: a.int_col != c.int_col
| row-size=273B cardinality=79.66K
|
|--09:EXCHANGE [BROADCAST]
| |
| 00:SCAN HDFS [functional.alltypestiny a]
| HDFS partitions=4/4 files=4 size=460B
| predicates: a.int_col = 10
| row-size=89B cardinality=4
|
04:HASH JOIN [INNER JOIN, PARTITIONED]
| hash predicates: d.bigint_col = c.bigint_col
| runtime filters: RF000 <- c.bigint_col
| row-size=184B cardinality=79.66K
|
|--08:EXCHANGE [HASH(c.bigint_col)]
| |
| 02:SCAN HDFS [functional.alltypes c]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=89B cardinality=7.30K
|
07:EXCHANGE [HASH(d.bigint_col)]
|
03:SCAN HDFS [functional.alltypesagg d]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> d.bigint_col
row-size=95B cardinality=11.00K
====
# Inner joins between inline views with non-equi join predicates
select *
from
(select a.*
from functional.alltypestiny a inner join functional.alltypessmall b
on a.id <= b.id) vv1
inner join
(select c.*
from functional.alltypesagg c inner join functional.alltypes d
on c.int_col != d.int_col where c.tinyint_col = 10) vv2
on vv1.bigint_col > vv2.bigint_col
where vv1.tinyint_col < 10
---- PLAN
PLAN-ROOT SINK
|
06:NESTED LOOP JOIN [INNER JOIN]
| predicates: a.bigint_col > c.bigint_col
| row-size=192B cardinality=1.22K
|
|--02:NESTED LOOP JOIN [INNER JOIN]
| | predicates: a.id <= b.id
| | row-size=93B cardinality=100
| |
| |--00:SCAN HDFS [functional.alltypestiny a]
| | HDFS partitions=4/4 files=4 size=460B
| | predicates: a.tinyint_col < 10
| | row-size=89B cardinality=1
| |
| 01:SCAN HDFS [functional.alltypessmall b]
| HDFS partitions=4/4 files=4 size=6.32KB
| row-size=4B cardinality=100
|
05:NESTED LOOP JOIN [INNER JOIN]
| predicates: c.int_col != d.int_col
| row-size=99B cardinality=1.22K
|
|--04:SCAN HDFS [functional.alltypes d]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=4B cardinality=7.30K
|
03:SCAN HDFS [functional.alltypesagg c]
HDFS partitions=11/11 files=11 size=814.73KB
predicates: c.tinyint_col = 10
row-size=95B cardinality=1.22K
====
# Inner, left-outer, left-semi and left-anti joins with non-equi join
# predicates
select *
from functional.alltypestiny a inner join functional.alltypessmall b
on a.id < b.id
left outer join functional.alltypes c
on a.int_col != c.int_col or a.tinyint_col > c.tinyint_col
left semi join functional.alltypesagg d
on b.bigint_col > d.bigint_col
left anti join functional.alltypesagg e
on c.string_col != e.string_col
where a.id < 10
---- PLAN
PLAN-ROOT SINK
|
08:NESTED LOOP JOIN [RIGHT ANTI JOIN]
| join predicates: c.string_col != e.string_col
| row-size=267B cardinality=100
|
|--07:NESTED LOOP JOIN [RIGHT SEMI JOIN]
| | join predicates: b.bigint_col > d.bigint_col
| | row-size=267B cardinality=100
| |
| |--06:NESTED LOOP JOIN [RIGHT OUTER JOIN]
| | | join predicates: a.int_col != c.int_col OR a.tinyint_col > c.tinyint_col
| | | row-size=267B cardinality=100
| | |
| | |--05:NESTED LOOP JOIN [INNER JOIN]
| | | | predicates: a.id < b.id
| | | | row-size=178B cardinality=100
| | | |
| | | |--00:SCAN HDFS [functional.alltypestiny a]
| | | | HDFS partitions=4/4 files=4 size=460B
| | | | predicates: a.id < 10
| | | | row-size=89B cardinality=1
| | | |
| | | 01:SCAN HDFS [functional.alltypessmall b]
| | | HDFS partitions=4/4 files=4 size=6.32KB
| | | row-size=89B cardinality=100
| | |
| | 02:SCAN HDFS [functional.alltypes c]
| | HDFS partitions=24/24 files=24 size=478.45KB
| | row-size=89B cardinality=7.30K
| |
| 03:SCAN HDFS [functional.alltypesagg d]
| HDFS partitions=11/11 files=11 size=814.73KB
| row-size=8B cardinality=11.00K
|
04:SCAN HDFS [functional.alltypesagg e]
HDFS partitions=11/11 files=11 size=814.73KB
row-size=15B cardinality=11.00K
====
# Regression test for IMPALA-2495: Crash: impala::InPredicate::SetLookupPrepare
select count(id) from functional.alltypestiny t1
left join (select coalesce(1, 10) as int_col
from functional.alltypessmall) t2 on t1.id = t2.int_col
where t2.int_col in (t2.int_col, 10);
---- PLAN
PLAN-ROOT SINK
|
03:AGGREGATE [FINALIZE]
| output: count(id)
| row-size=8B cardinality=1
|
02:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: t1.id = if(TupleIsNull(1), NULL, 1)
| other predicates: if(TupleIsNull(1), NULL, 1) IN (if(TupleIsNull(1), NULL, 1), 10)
| row-size=4B cardinality=8
|
|--01:SCAN HDFS [functional.alltypessmall]
| HDFS partitions=4/4 files=4 size=6.32KB
| row-size=0B cardinality=100
|
00:SCAN HDFS [functional.alltypestiny t1]
HDFS partitions=4/4 files=4 size=460B
row-size=4B cardinality=8
====
# Test queries that appear earlier in this file, but substitute "<=>" or "IS DISTINCT
# FROM" for "=" in the join predicates.
select *
from functional.testtbl t1 join functional.testtbl t2
where t1.id <=> t2.id and t1.zip = 94611
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [INNER JOIN]
| hash predicates: t1.id IS NOT DISTINCT FROM t2.id
| runtime filters: RF000 <- t2.id
| row-size=48B cardinality=0
|
|--01:SCAN HDFS [functional.testtbl t2]
| HDFS partitions=1/1 files=0 size=0B
| row-size=24B cardinality=0
|
00:SCAN HDFS [functional.testtbl t1]
HDFS partitions=1/1 files=0 size=0B
predicates: t1.zip = 94611
runtime filters: RF000 -> t1.id
row-size=24B cardinality=0
====
select *
from functional.testtbl t1 join functional.testtbl t2
where t1.id is not distinct from t2.id and t1.zip = 94611
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [INNER JOIN]
| hash predicates: t1.id IS NOT DISTINCT FROM t2.id
| runtime filters: RF000 <- t2.id
| row-size=48B cardinality=0
|
|--01:SCAN HDFS [functional.testtbl t2]
| HDFS partitions=1/1 files=0 size=0B
| row-size=24B cardinality=0
|
00:SCAN HDFS [functional.testtbl t1]
HDFS partitions=1/1 files=0 size=0B
predicates: t1.zip = 94611
runtime filters: RF000 -> t1.id
row-size=24B cardinality=0
====
select *
from functional.testtbl t1 join functional.testtbl t2
where (t1.id IS DISTINCT FROM t2.id) and t1.zip = 94611
---- PLAN
PLAN-ROOT SINK
|
02:NESTED LOOP JOIN [INNER JOIN]
| predicates: (t1.id IS DISTINCT FROM t2.id)
| row-size=48B cardinality=0
|
|--01:SCAN HDFS [functional.testtbl t2]
| HDFS partitions=1/1 files=0 size=0B
| row-size=24B cardinality=0
|
00:SCAN HDFS [functional.testtbl t1]
HDFS partitions=1/1 files=0 size=0B
predicates: t1.zip = 94611
row-size=24B cardinality=0
====
# Test that "is not distinct from" plans the same as "=" in the same query above.
select t1.*
from (select * from functional.alltypestiny) t1
join (select * from functional.alltypestiny) t2 on (t1.id is not distinct from t2.id)
join functional.alltypestiny t3 on (coalesce(t1.id, t2.id) is not distinct from t3.id)
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [INNER JOIN]
| hash predicates: coalesce(functional.alltypestiny.id, functional.alltypestiny.id) IS NOT DISTINCT FROM t3.id
| row-size=97B cardinality=8
|
|--02:SCAN HDFS [functional.alltypestiny t3]
| HDFS partitions=4/4 files=4 size=460B
| row-size=4B cardinality=8
|
03:HASH JOIN [INNER JOIN]
| hash predicates: functional.alltypestiny.id IS NOT DISTINCT FROM functional.alltypestiny.id
| runtime filters: RF000 <- functional.alltypestiny.id
| row-size=93B cardinality=8
|
|--01:SCAN HDFS [functional.alltypestiny]
| HDFS partitions=4/4 files=4 size=460B
| row-size=4B cardinality=8
|
00:SCAN HDFS [functional.alltypestiny]
HDFS partitions=4/4 files=4 size=460B
runtime filters: RF000 -> functional.alltypestiny.id
row-size=89B cardinality=8
====
select *
from functional.alltypesagg a
full outer join functional.alltypessmall b on (a.id <=> b.id and a.int_col = b.int_col)
right join functional.alltypesaggnonulls c on (a.id = c.id and b.string_col <=> c.string_col)
where a.day >= 6
and b.month > 2
and c.day < 3
and a.tinyint_col = 15
and b.string_col = '15'
and a.tinyint_col + b.tinyint_col < 15
and a.float_col - c.double_col < 0
and (b.double_col * c.tinyint_col > 1000 or c.tinyint_col < 1000)
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: c.id = a.id, c.string_col IS NOT DISTINCT FROM b.string_col
| other predicates: a.tinyint_col = 15, b.string_col = '15', a.`day` >= 6, b.`month` > 2, a.float_col - c.double_col < 0, a.tinyint_col + b.tinyint_col < 15, (b.double_col * c.tinyint_col > 1000 OR c.tinyint_col < 1000)
| row-size=279B cardinality=2.00K
|
|--03:HASH JOIN [FULL OUTER JOIN]
| | hash predicates: a.id IS NOT DISTINCT FROM b.id, a.int_col = b.int_col
| | row-size=184B cardinality=561
| |
| |--01:SCAN HDFS [functional.alltypessmall b]
| | partition predicates: b.`month` > 2
| | HDFS partitions=2/4 files=2 size=3.17KB
| | predicates: b.string_col = '15'
| | row-size=89B cardinality=5
| |
| 00:SCAN HDFS [functional.alltypesagg a]
| partition predicates: a.`day` >= 6
| HDFS partitions=5/11 files=5 size=372.38KB
| predicates: a.tinyint_col = 15
| row-size=95B cardinality=556
|
02:SCAN HDFS [functional.alltypesaggnonulls c]
partition predicates: c.`day` < 3
HDFS partitions=2/10 files=2 size=148.10KB
row-size=95B cardinality=2.00K
====
select t1.d, t2.d from functional.nulltable t1, functional.nulltable t2
where not(t1.d IS DISTINCT FROM t2.d)
---- PLAN
PLAN-ROOT SINK
|
02:NESTED LOOP JOIN [INNER JOIN]
| predicates: NOT (t1.d IS DISTINCT FROM t2.d)
| row-size=8B cardinality=1
|
|--01:SCAN HDFS [functional.nulltable t2]
| HDFS partitions=1/1 files=1 size=18B
| row-size=4B cardinality=1
|
00:SCAN HDFS [functional.nulltable t1]
HDFS partitions=1/1 files=1 size=18B
row-size=4B cardinality=1
====
select t1.d, t2.d
from functional.nulltable t1, functional.nulltable t2, functional.nulltable t3
where t1.d IS DISTINCT FROM t2.d
and t3.a != t2.g
---- PLAN
PLAN-ROOT SINK
|
04:NESTED LOOP JOIN [INNER JOIN]
| predicates: t3.a != t2.g
| row-size=34B cardinality=1
|
|--02:SCAN HDFS [functional.nulltable t3]
| HDFS partitions=1/1 files=1 size=18B
| row-size=13B cardinality=1
|
03:NESTED LOOP JOIN [INNER JOIN]
| predicates: t1.d IS DISTINCT FROM t2.d
| row-size=21B cardinality=1
|
|--00:SCAN HDFS [functional.nulltable t1]
| HDFS partitions=1/1 files=1 size=18B
| row-size=4B cardinality=1
|
01:SCAN HDFS [functional.nulltable t2]
HDFS partitions=1/1 files=1 size=18B
row-size=17B cardinality=1
====
# IMPALA-3450: limits on join nodes are reflected in cardinality estimates. The test for
# this is embedded in PlannerTestBase.java and is not visible in these plans, as they only
# have explain_level=1
select a.c_custkey as c_custkey from tpch.customer a, tpch.customer b limit 1
---- PLAN
PLAN-ROOT SINK
|
02:NESTED LOOP JOIN [CROSS JOIN]
| limit: 1
| row-size=8B cardinality=1
|
|--01:SCAN HDFS [tpch.customer b]
| HDFS partitions=1/1 files=1 size=23.08MB
| row-size=0B cardinality=150.00K
|
00:SCAN HDFS [tpch.customer a]
HDFS partitions=1/1 files=1 size=23.08MB
row-size=8B cardinality=150.00K
====
select a.c_custkey as c_custkey from tpch.customer a left semi join tpch.customer b
using (c_custkey) limit 1
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: a.c_custkey = b.c_custkey
| runtime filters: RF000 <- b.c_custkey
| limit: 1
| row-size=8B cardinality=1
|
|--01:SCAN HDFS [tpch.customer b]
| HDFS partitions=1/1 files=1 size=23.08MB
| row-size=8B cardinality=150.00K
|
00:SCAN HDFS [tpch.customer a]
HDFS partitions=1/1 files=1 size=23.08MB
runtime filters: RF000 -> a.c_custkey
row-size=8B cardinality=150.00K
====
select b.c_custkey as c_custkey from tpch.customer a right semi join tpch.customer b
using (c_custkey) limit 1
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [RIGHT SEMI JOIN]
| hash predicates: a.c_custkey = b.c_custkey
| runtime filters: RF000 <- b.c_custkey
| limit: 1
| row-size=8B cardinality=1
|
|--01:SCAN HDFS [tpch.customer b]
| HDFS partitions=1/1 files=1 size=23.08MB
| row-size=8B cardinality=150.00K
|
00:SCAN HDFS [tpch.customer a]
HDFS partitions=1/1 files=1 size=23.08MB
runtime filters: RF000 -> a.c_custkey
row-size=8B cardinality=150.00K
====
select a.c_custkey as c_custkey from tpch.customer a left outer join tpch.customer b
using (c_custkey) limit 1
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: a.c_custkey = b.c_custkey
| limit: 1
| row-size=16B cardinality=1
|
|--01:SCAN HDFS [tpch.customer b]
| HDFS partitions=1/1 files=1 size=23.08MB
| row-size=8B cardinality=150.00K
|
00:SCAN HDFS [tpch.customer a]
HDFS partitions=1/1 files=1 size=23.08MB
row-size=8B cardinality=150.00K
====
select b.c_custkey as c_custkey from tpch.customer a right outer join tpch.customer b
using (c_custkey) limit 1
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [RIGHT OUTER JOIN]
| hash predicates: a.c_custkey = b.c_custkey
| runtime filters: RF000 <- b.c_custkey
| limit: 1
| row-size=16B cardinality=1
|
|--01:SCAN HDFS [tpch.customer b]
| HDFS partitions=1/1 files=1 size=23.08MB
| row-size=8B cardinality=150.00K
|
00:SCAN HDFS [tpch.customer a]
HDFS partitions=1/1 files=1 size=23.08MB
runtime filters: RF000 -> a.c_custkey
row-size=8B cardinality=150.00K
====
select a.c_custkey as c_custkey from tpch.customer a full outer join tpch.customer b
using (c_custkey) limit 1
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [FULL OUTER JOIN]
| hash predicates: a.c_custkey = b.c_custkey
| limit: 1
| row-size=16B cardinality=1
|
|--01:SCAN HDFS [tpch.customer b]
| HDFS partitions=1/1 files=1 size=23.08MB
| row-size=8B cardinality=150.00K
|
00:SCAN HDFS [tpch.customer a]
HDFS partitions=1/1 files=1 size=23.08MB
row-size=8B cardinality=150.00K
====
select a.c_custkey as c_custkey from tpch.customer a left anti join tpch.customer b
using (c_custkey) limit 1
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [LEFT ANTI JOIN]
| hash predicates: a.c_custkey = b.c_custkey
| limit: 1
| row-size=8B cardinality=1
|
|--01:SCAN HDFS [tpch.customer b]
| HDFS partitions=1/1 files=1 size=23.08MB
| row-size=8B cardinality=150.00K
|
00:SCAN HDFS [tpch.customer a]
HDFS partitions=1/1 files=1 size=23.08MB
row-size=8B cardinality=150.00K
====
select b.c_custkey as c_custkey from tpch.customer a right anti join tpch.customer b
using (c_custkey) limit 1
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [RIGHT ANTI JOIN]
| hash predicates: a.c_custkey = b.c_custkey
| limit: 1
| row-size=8B cardinality=1
|
|--01:SCAN HDFS [tpch.customer b]
| HDFS partitions=1/1 files=1 size=23.08MB
| row-size=8B cardinality=150.00K
|
00:SCAN HDFS [tpch.customer a]
HDFS partitions=1/1 files=1 size=23.08MB
row-size=8B cardinality=150.00K
====
with
t1 as (select cast(0 as decimal(20, 0)) as c1),
t2 as (select cast(0 as decimal(19, 19)) as c2)
select * from t1 join t2 on (c1 = c2)
---- QUERYOPTIONS
decimal_v2=false
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [INNER JOIN]
| hash predicates: c1 = c2
| row-size=32B cardinality=1
|
|--01:UNION
| constant-operands=1
| row-size=16B cardinality=1
|
00:UNION
constant-operands=1
row-size=16B cardinality=1
====
# Cannot create a hash join because decimal types are incompatible due to decimal_v2.
with
t1 as (select cast(0 as decimal(20, 0)) as c1),
t2 as (select cast(0 as decimal(19, 19)) as c2)
select * from t1 join t2 on (c1 = c2)
---- QUERYOPTIONS
decimal_v2=true
---- PLAN
InternalException: Unable create a hash join with equi-join predicate c1 = c2 because the operands cannot be cast without loss of precision. Operand types: DECIMAL(20,0) = DECIMAL(19,19).
====
with
t1 as (select cast(0 as decimal(19, 0)) as c1),
t2 as (select cast(0 as decimal(19, 19)) as c2)
select * from t1 join t2 on (c1 = c2)
---- QUERYOPTIONS
decimal_v2=true
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [INNER JOIN]
| hash predicates: c1 = c2
| row-size=32B cardinality=1
|
|--01:UNION
| constant-operands=1
| row-size=16B cardinality=1
|
00:UNION
constant-operands=1
row-size=16B cardinality=1
====
# join involving tables with no table stats
# one of the tables (alltypes) is a compressed text file
# tests that the default join strategy is broadcast
select * from functional_text_gzip.emptytable a inner join
functional_text_gzip.alltypes b on a.f2 = b.int_col
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [INNER JOIN]
| hash predicates: a.f2 = b.int_col
| runtime filters: RF000 <- b.int_col
| row-size=96B cardinality=0
|
|--01:SCAN HDFS [functional_text_gzip.alltypes b]
| HDFS partitions=24/24 files=24 size=77.88KB
| row-size=80B cardinality=unavailable
|
00:SCAN HDFS [functional_text_gzip.emptytable a]
partitions=0/0 files=0 size=0B
runtime filters: RF000 -> a.f2
row-size=16B cardinality=0
====
# join involving tables with no table stats
# one of the tables (alltypes) is a compressed text file
# tests that the default join strategy is broadcast
select * from functional_text_bzip.emptytable a inner join
functional_text_bzip.alltypes b on a.f2 = b.int_col
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [INNER JOIN]
| hash predicates: a.f2 = b.int_col
| runtime filters: RF000 <- b.int_col
| row-size=96B cardinality=0
|
|--01:SCAN HDFS [functional_text_bzip.alltypes b]
| HDFS partitions=24/24 files=24 size=56.23KB
| row-size=80B cardinality=unavailable
|
00:SCAN HDFS [functional_text_bzip.emptytable a]
partitions=0/0 files=0 size=0B
runtime filters: RF000 -> a.f2
row-size=16B cardinality=0
====
# join involving tables with no table stats
# one of the tables (alltypes) is a compressed text file
# tests that the default join strategy is broadcast
select * from functional_text_lzo.emptytable a inner join
functional_text_lzo.alltypes b on a.f2 = b.int_col
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [INNER JOIN]
| hash predicates: a.f2 = b.int_col
| runtime filters: RF000 <- b.int_col
| row-size=96B cardinality=0
|
|--01:SCAN HDFS [functional_text_lzo.alltypes b]
| HDFS partitions=24/24 files=24 size=123.32KB
| row-size=80B cardinality=unavailable
|
00:SCAN HDFS [functional_text_lzo.emptytable a]
partitions=0/0 files=0 size=0B
runtime filters: RF000 -> a.f2
row-size=16B cardinality=0
====
# join involving tables with no table stats
# one of the tables (alltypes) is a compressed text file
# tests that the default join strategy is broadcast
select * from functional_text_snap.emptytable a inner join
functional_text_snap.alltypes b on a.f2 = b.int_col
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [INNER JOIN]
| hash predicates: a.f2 = b.int_col
| runtime filters: RF000 <- b.int_col
| row-size=96B cardinality=0
|
|--01:SCAN HDFS [functional_text_snap.alltypes b]
| HDFS partitions=24/24 files=24 size=121.15KB
| row-size=80B cardinality=unavailable
|
00:SCAN HDFS [functional_text_snap.emptytable a]
partitions=0/0 files=0 size=0B
runtime filters: RF000 -> a.f2
row-size=16B cardinality=0
====