blob: 15e01d9ccacad5402362ae6e3691e7072362f405 [file] [log] [blame]
# Two-way join query
select straight_join * from functional.alltypesagg t1, functional.alltypesnopart t2
where t1.year = t2.int_col and t2.id < 10 and t1.id = 10
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [INNER JOIN]
| hash predicates: t1.`year` = t2.int_col
| runtime filters: RF000 <- t2.int_col
| row-size=167B cardinality=1
|
|--01:SCAN HDFS [functional.alltypesnopart t2]
| HDFS partitions=1/1 files=0 size=0B
| predicates: t2.id < 10
| row-size=72B cardinality=0
|
00:SCAN HDFS [functional.alltypesagg t1]
HDFS partitions=11/11 files=11 size=814.73KB
predicates: t1.id = 10
runtime filters: RF000 -> t1.`year`
row-size=95B cardinality=1
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
04:EXCHANGE [UNPARTITIONED]
|
02:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: t1.`year` = t2.int_col
| runtime filters: RF000 <- t2.int_col
| row-size=167B cardinality=1
|
|--03:EXCHANGE [BROADCAST]
| |
| 01:SCAN HDFS [functional.alltypesnopart t2]
| HDFS partitions=1/1 files=0 size=0B
| predicates: t2.id < 10
| row-size=72B cardinality=0
|
00:SCAN HDFS [functional.alltypesagg t1]
HDFS partitions=11/11 files=11 size=814.73KB
predicates: t1.id = 10
runtime filters: RF000 -> t1.`year`
row-size=95B cardinality=1
====
# Four-way join query
select straight_join * from functional.alltypestiny t1, functional.alltypesagg t2,
functional.alltypestiny t3, functional.alltypesnopart t4
where t1.year = t2.int_col and t3.tinyint_col = t2.id and t3.month = t4.id and
t2.bool_col = true and t4.bigint_col < 10
---- PLAN
PLAN-ROOT SINK
|
06:HASH JOIN [INNER JOIN]
| hash predicates: t3.`month` = t4.id
| runtime filters: RF000 <- t4.id
| row-size=345B cardinality=9
|
|--03:SCAN HDFS [functional.alltypesnopart t4]
| HDFS partitions=1/1 files=0 size=0B
| predicates: t4.bigint_col < 10
| row-size=72B cardinality=0
|
05:HASH JOIN [INNER JOIN]
| hash predicates: t2.id = t3.tinyint_col
| runtime filters: RF002 <- t3.tinyint_col
| row-size=273B cardinality=9
|
|--02:SCAN HDFS [functional.alltypestiny t3]
| HDFS partitions=4/4 files=4 size=460B
| runtime filters: RF000 -> t3.`month`
| row-size=89B cardinality=8
|
04:HASH JOIN [INNER JOIN]
| hash predicates: t1.`year` = t2.int_col
| runtime filters: RF004 <- t2.int_col
| row-size=184B cardinality=92
|
|--01:SCAN HDFS [functional.alltypesagg t2]
| HDFS partitions=11/11 files=11 size=814.73KB
| predicates: t2.bool_col = TRUE
| runtime filters: RF002 -> t2.id
| row-size=95B cardinality=5.50K
|
00:SCAN HDFS [functional.alltypestiny t1]
HDFS partitions=4/4 files=4 size=460B
runtime filters: RF004 -> t1.`year`
row-size=89B cardinality=8
====
# Two-way join query where multiple runtime filters are generated
select straight_join * from functional.alltypesagg t1, functional.alltypesnopart t2
where t1.year = t2.int_col and t1.month = t2.bigint_col and t2.id = 10
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [INNER JOIN]
| hash predicates: t1.`year` = t2.int_col, t1.`month` = t2.bigint_col
| runtime filters: RF000 <- t2.int_col, RF001 <- t2.bigint_col
| row-size=167B cardinality=11.00K
|
|--01:SCAN HDFS [functional.alltypesnopart t2]
| HDFS partitions=1/1 files=0 size=0B
| predicates: t2.id = 10
| row-size=72B cardinality=0
|
00:SCAN HDFS [functional.alltypesagg t1]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> t1.`year`, RF001 -> t1.`month`
row-size=95B cardinality=11.00K
====
# Two-way join query with an inline view in the build side of the join
select straight_join * from functional.alltypesagg t1,
(select * from functional.alltypesnopart t2 where t2.id = 1) v
where t1.year = v.int_col
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [INNER JOIN]
| hash predicates: t1.`year` = t2.int_col
| runtime filters: RF000 <- t2.int_col
| row-size=167B cardinality=11.00K
|
|--01:SCAN HDFS [functional.alltypesnopart t2]
| HDFS partitions=1/1 files=0 size=0B
| predicates: t2.id = 1
| row-size=72B cardinality=0
|
00:SCAN HDFS [functional.alltypesagg t1]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> t1.`year`
row-size=95B cardinality=11.00K
====
# Two-way join query with an inline view in the build side of the join where the
# right child of the join predicate is an arithmetic expr between two slots
select straight_join * from functional.alltypesagg t1,
(select id + int_col as id1 from functional.alltypesnopart t2
where t2.bigint_col < 10) v
where v.id1 = t1.year
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [INNER JOIN]
| hash predicates: t1.`year` = id + int_col
| runtime filters: RF000 <- id + int_col
| row-size=111B cardinality=11.00K
|
|--01:SCAN HDFS [functional.alltypesnopart t2]
| HDFS partitions=1/1 files=0 size=0B
| predicates: t2.bigint_col < 10
| row-size=16B cardinality=0
|
00:SCAN HDFS [functional.alltypesagg t1]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> t1.`year`
row-size=95B cardinality=11.00K
====
# Two-way join query where the lhs of the join predicate is an arithmetic expr
select straight_join * from functional.alltypesagg t1, functional.alltypesnopart t2
where t1.year + 1 = t2.id and t2.int_col < 10
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [INNER JOIN]
| hash predicates: t1.`year` + 1 = t2.id
| runtime filters: RF000 <- t2.id
| row-size=167B cardinality=11.00K
|
|--01:SCAN HDFS [functional.alltypesnopart t2]
| HDFS partitions=1/1 files=0 size=0B
| predicates: t2.int_col < 10
| row-size=72B cardinality=0
|
00:SCAN HDFS [functional.alltypesagg t1]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> t1.`year` + 1
row-size=95B cardinality=11.00K
====
# Two-way join query with join predicates that are not suitable for hashing
select straight_join * from functional.alltypesagg t1, functional.alltypesnopart t2
where t1.id = t2.id and t1.year + t2.int_col = t1.month + t2.tinyint_col
and t1.year = t1.month + t2.int_col and t1.year + t2.smallint_col = t2.tinyint_col
and t1.int_col = 1 and 1 = t2.bigint_col
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [INNER JOIN]
| hash predicates: t1.id = t2.id
| other predicates: t1.`year` = t1.`month` + t2.int_col, t2.tinyint_col = t1.`year` + t2.smallint_col, t1.`year` + t2.int_col = t1.`month` + t2.tinyint_col
| runtime filters: RF000 <- t2.id
| row-size=167B cardinality=11
|
|--01:SCAN HDFS [functional.alltypesnopart t2]
| HDFS partitions=1/1 files=0 size=0B
| predicates: t2.bigint_col = 1
| row-size=72B cardinality=0
|
00:SCAN HDFS [functional.alltypesagg t1]
HDFS partitions=11/11 files=11 size=814.73KB
predicates: t1.int_col = 1
runtime filters: RF000 -> t1.id
row-size=95B cardinality=11
====
# Two-way join query where the left child of the equi-join predicate
# is an arithmetic expr between two slots from the same scan tuple
select straight_join * from functional.alltypesagg t1, functional.alltypesnopart t2
where t1.year + t1.month = t2.id and t1.int_col + 1 - t1.tinyint_col = t2.smallint_col + 10
and t1.int_col * 100 = t2.bigint_col / 100 and t2.bool_col = false
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [INNER JOIN]
| hash predicates: t1.`year` + t1.`month` = t2.id, t1.int_col * 100 = t2.bigint_col / 100, t1.int_col + 1 - t1.tinyint_col = t2.smallint_col + 10
| runtime filters: RF000 <- t2.id, RF001 <- t2.bigint_col / 100, RF002 <- t2.smallint_col + 10
| row-size=167B cardinality=11.00K
|
|--01:SCAN HDFS [functional.alltypesnopart t2]
| HDFS partitions=1/1 files=0 size=0B
| predicates: t2.bool_col = FALSE
| row-size=72B cardinality=0
|
00:SCAN HDFS [functional.alltypesagg t1]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> t1.`year` + t1.`month`, RF001 -> t1.int_col * 100, RF002 -> t1.int_col + 1 - t1.tinyint_col
row-size=95B cardinality=11.00K
====
# Three-way join query with an inline view on the probe side of the join where the left
# child of the equi-join predicate is an arithmetic expr between two slots from
# different scan tuples
select straight_join * from
(select t1.year + t2.year as year from functional.alltypesagg t1 inner join
functional.alltypes t2 on t1.id = t2.id) v,
functional.alltypesnopart t3
where v.year = t3.int_col and t3.bool_col = true
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [INNER JOIN]
| hash predicates: t1.`year` + t2.`year` = t3.int_col
| row-size=88B cardinality=7.81K
|
|--03:SCAN HDFS [functional.alltypesnopart t3]
| HDFS partitions=1/1 files=0 size=0B
| predicates: t3.bool_col = TRUE
| row-size=72B cardinality=0
|
02:HASH JOIN [INNER JOIN]
| hash predicates: t1.id = t2.id
| runtime filters: RF000 <- t2.id
| row-size=16B cardinality=7.81K
|
|--01:SCAN HDFS [functional.alltypes t2]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=8B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypesagg t1]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> t1.id
row-size=8B cardinality=11.00K
====
# Two-way join query with an inline view in the build side of the join that has an
# aggregation
select straight_join * from functional.alltypesagg t1,
(select id, count(int_col) as cnt from functional.alltypesnopart t2 group by id
having count(int_col) < 10) v
where v.cnt = t1.year and v.id = t1.month
---- PLAN
PLAN-ROOT SINK
|
03:HASH JOIN [INNER JOIN]
| hash predicates: t1.`month` = id, t1.`year` = count(int_col)
| runtime filters: RF000 <- id, RF001 <- count(int_col)
| row-size=107B cardinality=0
|
|--02:AGGREGATE [FINALIZE]
| | output: count(int_col)
| | group by: id
| | having: count(int_col) < 10
| | row-size=12B cardinality=0
| |
| 01:SCAN HDFS [functional.alltypesnopart t2]
| HDFS partitions=1/1 files=0 size=0B
| row-size=8B cardinality=0
|
00:SCAN HDFS [functional.alltypesagg t1]
partition predicates: t1.year < 10
partitions=0/11 files=0 size=0B
runtime filters: RF000 -> t1.`month`, RF001 -> t1.`year`
row-size=95B cardinality=0
====
# Two-way join query with an inline view in the build side of the join that has a
# two-way join (bushy plan)
select straight_join * from functional.alltypesagg t1,
(select t2.id + t3.id as id, t3.tinyint_col from functional.alltypesnopart t2,
functional.alltypesnopart t3 where t2.int_col = t3.int_col) v
where v.id = t1.year and t1.month = v.tinyint_col
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [INNER JOIN]
| hash predicates: t1.`month` = t3.tinyint_col, t1.`year` = t2.id + t3.id
| runtime filters: RF000 <- t3.tinyint_col, RF001 <- t2.id + t3.id
| row-size=112B cardinality=11.00K
|
|--03:HASH JOIN [INNER JOIN]
| | hash predicates: t2.int_col = t3.int_col
| | runtime filters: RF004 <- t3.int_col
| | row-size=17B cardinality=0
| |
| |--02:SCAN HDFS [functional.alltypesnopart t3]
| | HDFS partitions=1/1 files=0 size=0B
| | row-size=9B cardinality=0
| |
| 01:SCAN HDFS [functional.alltypesnopart t2]
| HDFS partitions=1/1 files=0 size=0B
| runtime filters: RF004 -> t2.int_col
| row-size=8B cardinality=0
|
00:SCAN HDFS [functional.alltypesagg t1]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> t1.`month`, RF001 -> t1.`year`
row-size=95B cardinality=11.00K
====
# Four-way join query with an inline view in the build side of the join where the
# inline view has a tree-way cyclic join (bushy plan)
select straight_join * from functional.alltypesagg t1,
(select t2.int_col, t3.id, t4.tinyint_col from functional.alltypesnopart t2,
functional.alltypesnopart t3, functional.alltypesnopart t4
where t2.id = t3.id and t3.int_col = t4.int_col and t4.tinyint_col = t2.tinyint_col) v
where t1.year = v.int_col and t1.year = v.id and t1.month = v.tinyint_col
---- PLAN
PLAN-ROOT SINK
|
06:HASH JOIN [INNER JOIN]
| hash predicates: t1.`year` = t2.int_col, t1.`month` = t4.tinyint_col
| runtime filters: RF000 <- t2.int_col, RF001 <- t4.tinyint_col
| row-size=117B cardinality=11.00K
|
|--05:HASH JOIN [INNER JOIN]
| | hash predicates: t2.tinyint_col = t4.tinyint_col, t3.int_col = t4.int_col
| | runtime filters: RF004 <- t4.tinyint_col, RF005 <- t4.int_col
| | row-size=22B cardinality=0
| |
| |--03:SCAN HDFS [functional.alltypesnopart t4]
| | HDFS partitions=1/1 files=0 size=0B
| | row-size=5B cardinality=0
| |
| 04:HASH JOIN [INNER JOIN]
| | hash predicates: t2.id = t3.id
| | runtime filters: RF008 <- t3.id
| | row-size=17B cardinality=0
| |
| |--02:SCAN HDFS [functional.alltypesnopart t3]
| | HDFS partitions=1/1 files=0 size=0B
| | runtime filters: RF005 -> t3.int_col
| | row-size=8B cardinality=0
| |
| 01:SCAN HDFS [functional.alltypesnopart t2]
| HDFS partitions=1/1 files=0 size=0B
| predicates: t2.int_col = t2.id
| runtime filters: RF004 -> t2.tinyint_col, RF008 -> t2.id
| row-size=9B cardinality=0
|
00:SCAN HDFS [functional.alltypesagg t1]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> t1.`year`, RF001 -> t1.`month`
row-size=95B cardinality=11.00K
====
# Four-way join query between base tables in a star schema
select straight_join * from functional.alltypesagg t1, functional.alltypesnopart t2,
functional.alltypesnopart t3, functional.alltypesnopart t4
where t1.year = t2.id and t1.year = t3.int_col and t1.year = t4.tinyint_col and
t2.bool_col = false and t3.bool_col = true and t4.bigint_col in (1,2)
---- PLAN
PLAN-ROOT SINK
|
06:HASH JOIN [INNER JOIN]
| hash predicates: t1.`year` = t4.tinyint_col
| runtime filters: RF000 <- t4.tinyint_col
| row-size=311B cardinality=11.00K
|
|--03:SCAN HDFS [functional.alltypesnopart t4]
| HDFS partitions=1/1 files=0 size=0B
| predicates: t4.bigint_col IN (1, 2)
| row-size=72B cardinality=0
|
05:HASH JOIN [INNER JOIN]
| hash predicates: t1.`year` = t3.int_col
| runtime filters: RF002 <- t3.int_col
| row-size=239B cardinality=11.00K
|
|--02:SCAN HDFS [functional.alltypesnopart t3]
| HDFS partitions=1/1 files=0 size=0B
| predicates: t3.bool_col = TRUE
| runtime filters: RF000 -> t3.int_col
| row-size=72B cardinality=0
|
04:HASH JOIN [INNER JOIN]
| hash predicates: t1.`year` = t2.id
| runtime filters: RF004 <- t2.id
| row-size=167B cardinality=11.00K
|
|--01:SCAN HDFS [functional.alltypesnopart t2]
| HDFS partitions=1/1 files=0 size=0B
| predicates: t2.bool_col = FALSE
| runtime filters: RF000 -> t2.id, RF002 -> t2.id
| row-size=72B cardinality=0
|
00:SCAN HDFS [functional.alltypesagg t1]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> t1.`year`, RF002 -> t1.`year`, RF004 -> t1.`year`
row-size=95B cardinality=11.00K
====
# Five-way cyclic join query
select straight_join * from functional.alltypesagg t1, functional.alltypesnopart t2,
functional.alltypessmall t3, functional.alltypesnopart t4, functional.alltypesnopart t5
where t1.year = t2.id and t2.int_col = t3.tinyint_col and t3.month = t4.bigint_col
and t4.smallint_col = t5.smallint_col and t5.id = t1.month
---- PLAN
PLAN-ROOT SINK
|
08:HASH JOIN [INNER JOIN]
| hash predicates: t1.`month` = t5.id, t4.smallint_col = t5.smallint_col
| runtime filters: RF000 <- t5.id, RF001 <- t5.smallint_col
| row-size=400B cardinality=11.00K
|
|--04:SCAN HDFS [functional.alltypesnopart t5]
| HDFS partitions=1/1 files=0 size=0B
| row-size=72B cardinality=0
|
07:HASH JOIN [INNER JOIN]
| hash predicates: t3.`month` = t4.bigint_col
| runtime filters: RF004 <- t4.bigint_col
| row-size=328B cardinality=11.00K
|
|--03:SCAN HDFS [functional.alltypesnopart t4]
| HDFS partitions=1/1 files=0 size=0B
| runtime filters: RF001 -> t4.smallint_col
| row-size=72B cardinality=0
|
06:HASH JOIN [INNER JOIN]
| hash predicates: t2.int_col = t3.tinyint_col
| runtime filters: RF006 <- t3.tinyint_col
| row-size=256B cardinality=11.00K
|
|--02:SCAN HDFS [functional.alltypessmall t3]
| HDFS partitions=4/4 files=4 size=6.32KB
| runtime filters: RF004 -> t3.`month`
| row-size=89B cardinality=100
|
05:HASH JOIN [INNER JOIN]
| hash predicates: t1.`year` = t2.id
| runtime filters: RF008 <- t2.id
| row-size=167B cardinality=11.00K
|
|--01:SCAN HDFS [functional.alltypesnopart t2]
| HDFS partitions=1/1 files=0 size=0B
| runtime filters: RF006 -> t2.int_col
| row-size=72B cardinality=0
|
00:SCAN HDFS [functional.alltypesagg t1]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> t1.`month`, RF008 -> t1.`year`
row-size=95B cardinality=11.00K
====
# Two-way left outer join query; no runtime filters should be generated from the
# ON-clause equi-join predicate
select straight_join * from functional.alltypesagg t1 left outer join functional.alltypesnopart t2
on t1.year = t2.int_col
where t2.id = 1
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: t1.`year` = t2.int_col
| other predicates: t2.id = 1
| row-size=167B cardinality=11.00K
|
|--01:SCAN HDFS [functional.alltypesnopart t2]
| HDFS partitions=1/1 files=0 size=0B
| predicates: t2.id = 1
| row-size=72B cardinality=0
|
00:SCAN HDFS [functional.alltypesagg t1]
HDFS partitions=11/11 files=11 size=814.73KB
row-size=95B cardinality=11.00K
====
# Two-way left outer join query where not all equi-join predicates should
# generate a runtime filter
select straight_join * from functional.alltypesagg t1 left outer join functional.alltypesnopart t2
on t1.year = t2.int_col
where t2.id = 2 and t1.month = t2.tinyint_col
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: t1.`year` = t2.int_col
| other predicates: t2.id = 2, t1.`month` = t2.tinyint_col
| runtime filters: RF000 <- t2.tinyint_col
| row-size=167B cardinality=11.00K
|
|--01:SCAN HDFS [functional.alltypesnopart t2]
| HDFS partitions=1/1 files=0 size=0B
| predicates: t2.id = 2
| row-size=72B cardinality=0
|
00:SCAN HDFS [functional.alltypesagg t1]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> t1.`month`
row-size=95B cardinality=11.00K
====
# Multi-way join query with outer joins
select straight_join * from functional.alltypesagg t1 left outer join functional.alltypesnopart t2
on t1.year = t2.int_col left outer join functional.alltypesnopart t3
on t1.year = t3.id full outer join functional.alltypesnopart t4
on t1.year = t4.tinyint_col right outer join functional.alltypesnopart t5
on t1.year = t5.smallint_col
where t2.id = 1 and t3.int_col = 1 and t4.bool_col = true and t5.bool_col = false
---- PLAN
PLAN-ROOT SINK
|
08:HASH JOIN [RIGHT OUTER JOIN]
| hash predicates: t1.`year` = t5.smallint_col
| other predicates: t2.id = 1, t3.int_col = 1, t4.bool_col = TRUE
| runtime filters: RF000 <- t5.smallint_col
| row-size=383B cardinality=11.00K
|
|--04:SCAN HDFS [functional.alltypesnopart t5]
| HDFS partitions=1/1 files=0 size=0B
| predicates: t5.bool_col = FALSE
| row-size=72B cardinality=0
|
07:HASH JOIN [FULL OUTER JOIN]
| hash predicates: t1.`year` = t4.tinyint_col
| row-size=311B cardinality=11.00K
|
|--03:SCAN HDFS [functional.alltypesnopart t4]
| HDFS partitions=1/1 files=0 size=0B
| predicates: t4.bool_col = TRUE
| row-size=72B cardinality=0
|
06:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: t1.`year` = t3.id
| row-size=239B cardinality=11.00K
|
|--02:SCAN HDFS [functional.alltypesnopart t3]
| HDFS partitions=1/1 files=0 size=0B
| predicates: t3.int_col = 1
| runtime filters: RF000 -> t3.id
| row-size=72B cardinality=0
|
05:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: t1.`year` = t2.int_col
| row-size=167B cardinality=11.00K
|
|--01:SCAN HDFS [functional.alltypesnopart t2]
| HDFS partitions=1/1 files=0 size=0B
| predicates: t2.id = 1
| runtime filters: RF000 -> t2.int_col
| row-size=72B cardinality=0
|
00:SCAN HDFS [functional.alltypesagg t1]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> t1.`year`
row-size=95B cardinality=11.00K
====
# Two-way right outer join query where a runtime filter can be pushed to the nullable
# probe side
select straight_join *
from functional.alltypesagg t1 right outer join functional.alltypesnopart t2
on t1.year = t2.int_col and t1.month = 1 and t2.int_col = 10
where t2.id = 10 and t1.month = t2.tinyint_col and t1.int_col = 1
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [RIGHT OUTER JOIN]
| hash predicates: t1.`year` = t2.int_col
| other join predicates: t2.int_col = 10
| other predicates: t1.int_col = 1, t1.`month` = t2.tinyint_col
| runtime filters: RF000 <- t2.int_col, RF001 <- t2.tinyint_col
| row-size=167B cardinality=0
|
|--01:SCAN HDFS [functional.alltypesnopart t2]
| HDFS partitions=1/1 files=0 size=0B
| predicates: t2.id = 10
| row-size=72B cardinality=0
|
00:SCAN HDFS [functional.alltypesagg t1]
partition predicates: t1.`month` = 1, t1.year = 10
partitions=0/11 files=0 size=0B
predicates: t1.int_col = 1
runtime filters: RF000 -> t1.`year`, RF001 -> t1.`month`
row-size=95B cardinality=0
====
# Three-way join query with semi joins
select straight_join * from functional.alltypesagg t1 left semi join functional.alltypesnopart t2
on t1.year = t2.int_col right semi join functional.alltypesnopart t3
on t1.month = t3.tinyint_col
where t3.id = 1
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [RIGHT SEMI JOIN]
| hash predicates: t1.`month` = t3.tinyint_col
| runtime filters: RF000 <- t3.tinyint_col
| row-size=72B cardinality=0
|
|--02:SCAN HDFS [functional.alltypesnopart t3]
| HDFS partitions=1/1 files=0 size=0B
| predicates: t3.id = 1
| row-size=72B cardinality=0
|
03:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: t1.`year` = t2.int_col
| runtime filters: RF002 <- t2.int_col
| row-size=95B cardinality=11.00K
|
|--01:SCAN HDFS [functional.alltypesnopart t2]
| HDFS partitions=1/1 files=0 size=0B
| row-size=4B cardinality=0
|
00:SCAN HDFS [functional.alltypesagg t1]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> t1.`month`, RF002 -> t1.`year`
row-size=95B cardinality=11.00K
====
# Query with a subquery that is converted to a null-aware left anti join
select straight_join * from functional.alltypesagg t1
where t1.year not in (select id from functional.alltypesnopart where int_col = 10)
and t1.int_col < 10
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [NULL AWARE LEFT ANTI JOIN]
| hash predicates: t1.`year` = id
| row-size=95B cardinality=1.10K
|
|--01:SCAN HDFS [functional.alltypesnopart]
| HDFS partitions=1/1 files=0 size=0B
| predicates: int_col = 10
| row-size=8B cardinality=0
|
00:SCAN HDFS [functional.alltypesagg t1]
HDFS partitions=11/11 files=11 size=814.73KB
predicates: t1.int_col < 10
row-size=95B cardinality=1.10K
====
# Two-way join query between two inline views where the scan node to apply the filter
# is below an aggregation node in the probe side of the join
select straight_join * from
(select year, count(*) from functional.alltypesagg t1 group by year) v1,
(select id, int_col from functional.alltypesnopart where tinyint_col < 10) v2
where v1.year = v2.id
---- PLAN
PLAN-ROOT SINK
|
03:HASH JOIN [INNER JOIN]
| hash predicates: `year` = id
| runtime filters: RF000 <- id
| row-size=21B cardinality=1
|
|--02:SCAN HDFS [functional.alltypesnopart]
| HDFS partitions=1/1 files=0 size=0B
| predicates: tinyint_col < 10
| row-size=9B cardinality=0
|
01:AGGREGATE [FINALIZE]
| output: count(*)
| group by: `year`
| row-size=12B cardinality=1
|
00:SCAN HDFS [functional.alltypesagg t1]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> t1.year
row-size=4B cardinality=11.00K
====
# Two-way join query where the lhs of the join is an inline view with an aggregation;
# the runtime filter cannot be pushed through the aggregation node
select straight_join * from
(select year, count(*) as cnt from functional.alltypesagg t1 group by year) v1,
functional.alltypes t2
where v1.cnt = t2.id and t2.int_col = 1
---- PLAN
PLAN-ROOT SINK
|
03:HASH JOIN [INNER JOIN]
| hash predicates: count(*) = t2.id
| row-size=101B cardinality=1
|
|--02:SCAN HDFS [functional.alltypes t2]
| HDFS partitions=24/24 files=24 size=478.45KB
| predicates: t2.int_col = 1
| row-size=89B cardinality=730
|
01:AGGREGATE [FINALIZE]
| output: count(*)
| group by: `year`
| row-size=12B cardinality=1
|
00:SCAN HDFS [functional.alltypesagg t1]
HDFS partitions=11/11 files=11 size=814.73KB
row-size=4B cardinality=11.00K
====
# Two-way join query with multiple nested inline views in the probe side of the join
# where the scan node to apply the filter is below multiple aggregation nodes
select straight_join * from
(select year, int_col, count(*) from
(select year, month, max(id) from functional.alltypesagg t1 group by year, month) v1,
functional.alltypesnopart t2 where v1.month = t2.int_col
group by v1.year, t2.int_col) v2,
functional.alltypesnopart t3
where v2.year = t3.smallint_col and t3.id = 1
---- PLAN
PLAN-ROOT SINK
|
06:HASH JOIN [INNER JOIN]
| hash predicates: v1.`year` = t3.smallint_col
| runtime filters: RF000 <- t3.smallint_col
| row-size=88B cardinality=1
|
|--05:SCAN HDFS [functional.alltypesnopart t3]
| HDFS partitions=1/1 files=0 size=0B
| predicates: t3.id = 1
| row-size=72B cardinality=0
|
04:AGGREGATE [FINALIZE]
| output: count(*)
| group by: `year`, t2.int_col
| row-size=16B cardinality=1
|
03:HASH JOIN [INNER JOIN]
| hash predicates: `month` = t2.int_col
| runtime filters: RF002 <- t2.int_col
| row-size=12B cardinality=1
|
|--02:SCAN HDFS [functional.alltypesnopart t2]
| HDFS partitions=1/1 files=0 size=0B
| row-size=4B cardinality=0
|
01:AGGREGATE [FINALIZE]
| group by: `year`, `month`
| row-size=8B cardinality=1
|
00:SCAN HDFS [functional.alltypesagg t1]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> t1.year, RF002 -> t1.month
row-size=8B cardinality=11.00K
====
# Four-way join query between an inline view with an aggregation and three base tables
select straight_join 1 from
(select distinct id, year, month from functional.alltypes) v
join functional.alltypestiny a on v.year = a.year
join functional.alltypestiny b on v.year = b.year
join functional.alltypestiny c on v.year = c.year
where b.int_col < 10
---- PLAN
PLAN-ROOT SINK
|
07:HASH JOIN [INNER JOIN]
| hash predicates: `year` = c.`year`
| runtime filters: RF000 <- c.`year`
| row-size=28B cardinality=58.40K
|
|--04:SCAN HDFS [functional.alltypestiny c]
| HDFS partitions=4/4 files=4 size=460B
| row-size=4B cardinality=8
|
06:HASH JOIN [INNER JOIN]
| hash predicates: `year` = b.`year`
| runtime filters: RF002 <- b.`year`
| row-size=24B cardinality=14.60K
|
|--03:SCAN HDFS [functional.alltypestiny b]
| HDFS partitions=4/4 files=4 size=460B
| predicates: b.int_col < 10
| runtime filters: RF000 -> b.year
| row-size=8B cardinality=1
|
05:HASH JOIN [INNER JOIN]
| hash predicates: `year` = a.`year`
| runtime filters: RF004 <- a.`year`
| row-size=16B cardinality=29.20K
|
|--02:SCAN HDFS [functional.alltypestiny a]
| HDFS partitions=4/4 files=4 size=460B
| runtime filters: RF000 -> a.year, RF002 -> a.year
| row-size=4B cardinality=8
|
01:AGGREGATE [FINALIZE]
| group by: id, `year`, `month`
| row-size=12B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> functional.alltypes.year, RF002 -> functional.alltypes.year, RF004 -> functional.alltypes.year
row-size=12B cardinality=7.30K
====
# Two-way join query with an inline view in the probe side of the join where the
# scan node to apply the filter is below a top-n (order by with limit) operator
select straight_join * from
(select year, int_col from functional.alltypesagg order by year limit 10) v1,
functional.alltypesnopart t2
where v1.year = t2.id and v1.int_col = t2.int_col and t2.smallint_col = 1
---- PLAN
PLAN-ROOT SINK
|
03:HASH JOIN [INNER JOIN]
| hash predicates: int_col = t2.int_col, year = t2.id
| row-size=80B cardinality=10
|
|--02:SCAN HDFS [functional.alltypesnopart t2]
| HDFS partitions=1/1 files=0 size=0B
| predicates: t2.smallint_col = 1
| row-size=72B cardinality=0
|
01:TOP-N [LIMIT=10]
| order by: year ASC
| row-size=8B cardinality=10
|
00:SCAN HDFS [functional.alltypesagg]
HDFS partitions=11/11 files=11 size=814.73KB
row-size=8B cardinality=11.00K
====
# Two-way join query with an inline in the probe side of the join that has a union
select straight_join * from
(select year from functional.alltypesagg t1 union all
select year from functional.alltypestiny t2) v,
functional.alltypesnopart t3
where v.year = t3.int_col and t3.bool_col = false
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [INNER JOIN]
| hash predicates: year = t3.int_col
| runtime filters: RF000 <- t3.int_col
| row-size=76B cardinality=11.01K
|
|--03:SCAN HDFS [functional.alltypesnopart t3]
| HDFS partitions=1/1 files=0 size=0B
| predicates: t3.bool_col = FALSE
| row-size=72B cardinality=0
|
00:UNION
| pass-through-operands: all
| row-size=4B cardinality=11.01K
|
|--02:SCAN HDFS [functional.alltypestiny t2]
| HDFS partitions=4/4 files=4 size=460B
| runtime filters: RF000 -> t2.year
| row-size=4B cardinality=8
|
01:SCAN HDFS [functional.alltypesagg t1]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> t1.year
row-size=4B cardinality=11.00K
====
# Query with nested UNION ALL operators
select straight_join count(*) from
(select month, year from functional.alltypes
union all
(select month, year from functional.alltypes
union all
select month, year from functional.alltypes)) a
join functional.alltypessmall b
on a.month = b.month
where b.int_col = 1
---- PLAN
PLAN-ROOT SINK
|
06:AGGREGATE [FINALIZE]
| output: count(*)
| row-size=8B cardinality=1
|
05:HASH JOIN [INNER JOIN]
| hash predicates: month = b.`month`
| runtime filters: RF000 <- b.`month`
| row-size=12B cardinality=21.90K
|
|--04:SCAN HDFS [functional.alltypessmall b]
| HDFS partitions=4/4 files=4 size=6.32KB
| predicates: b.int_col = 1
| row-size=8B cardinality=10
|
00:UNION
| pass-through-operands: all
| row-size=4B cardinality=21.90K
|
|--03:SCAN HDFS [functional.alltypes]
| HDFS partitions=24/24 files=24 size=478.45KB
| runtime filters: RF000 -> functional.alltypes.month
| row-size=4B cardinality=7.30K
|
|--02:SCAN HDFS [functional.alltypes]
| HDFS partitions=24/24 files=24 size=478.45KB
| runtime filters: RF000 -> functional.alltypes.month
| row-size=4B cardinality=7.30K
|
01:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> functional.alltypes.month
row-size=4B cardinality=7.30K
====
# Query with nested UNION DISTINCT operators
select straight_join count(*) from
(select month, year from functional.alltypes
union distinct
(select month, year from functional.alltypes
union distinct
select month, year from functional.alltypes)) a
join functional.alltypessmall b
on a.month = b.month
where b.int_col = 1
---- PLAN
PLAN-ROOT SINK
|
07:AGGREGATE [FINALIZE]
| output: count(*)
| row-size=8B cardinality=1
|
06:HASH JOIN [INNER JOIN]
| hash predicates: month = b.`month`
| runtime filters: RF000 <- b.`month`
| row-size=16B cardinality=216
|
|--05:SCAN HDFS [functional.alltypessmall b]
| HDFS partitions=4/4 files=4 size=6.32KB
| predicates: b.int_col = 1
| row-size=8B cardinality=10
|
04:AGGREGATE [FINALIZE]
| group by: month, year
| row-size=8B cardinality=216
|
00:UNION
| pass-through-operands: all
| row-size=8B cardinality=21.90K
|
|--03:SCAN HDFS [functional.alltypes]
| HDFS partitions=24/24 files=24 size=478.45KB
| runtime filters: RF000 -> functional.alltypes.month
| row-size=8B cardinality=7.30K
|
|--02:SCAN HDFS [functional.alltypes]
| HDFS partitions=24/24 files=24 size=478.45KB
| runtime filters: RF000 -> functional.alltypes.month
| row-size=8B cardinality=7.30K
|
01:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> functional.alltypes.month
row-size=8B cardinality=7.30K
====
# UNION ALL query
select straight_join t2.id, t1.year from functional.alltypesagg t1, functional.alltypesnopart t2
where t1.year = t2.int_col and t2.bool_col = false
union all
select straight_join t4.id, t3.year from functional.alltypes t3, functional.alltypesnopart t4
where t3.month = t4.smallint_col and t4.bool_col = true
---- PLAN
PLAN-ROOT SINK
|
00:UNION
| row-size=8B cardinality=18.30K
|
|--06:HASH JOIN [INNER JOIN]
| | hash predicates: t3.`month` = t4.smallint_col
| | runtime filters: RF002 <- t4.smallint_col
| | row-size=15B cardinality=7.30K
| |
| |--05:SCAN HDFS [functional.alltypesnopart t4]
| | HDFS partitions=1/1 files=0 size=0B
| | predicates: t4.bool_col = TRUE
| | row-size=7B cardinality=0
| |
| 04:SCAN HDFS [functional.alltypes t3]
| HDFS partitions=24/24 files=24 size=478.45KB
| runtime filters: RF002 -> t3.`month`
| row-size=8B cardinality=7.30K
|
03:HASH JOIN [INNER JOIN]
| hash predicates: t1.`year` = t2.int_col
| runtime filters: RF000 <- t2.int_col
| row-size=13B cardinality=11.00K
|
|--02:SCAN HDFS [functional.alltypesnopart t2]
| HDFS partitions=1/1 files=0 size=0B
| predicates: t2.bool_col = FALSE
| row-size=9B cardinality=0
|
01:SCAN HDFS [functional.alltypesagg t1]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> t1.`year`
row-size=4B cardinality=11.00K
====
# Query with UNION ALL operator on the rhs of a join node
select straight_join count(*) from functional.alltypes a
join [broadcast]
(select id, int_col from functional.alltypes
union all
select id, int_col from functional.alltypes) b
on a.id = b.id
where (b.id - b.id) < 1 AND (b.int_col - b.int_col) < 1
---- PLAN
PLAN-ROOT SINK
|
05:AGGREGATE [FINALIZE]
| output: count(*)
| row-size=8B cardinality=1
|
04:HASH JOIN [INNER JOIN]
| hash predicates: a.id = id
| runtime filters: RF000 <- id
| row-size=8B cardinality=730
|
|--01:UNION
| | row-size=4B cardinality=1.46K
| |
| |--03:SCAN HDFS [functional.alltypes]
| | HDFS partitions=24/24 files=24 size=478.45KB
| | predicates: (functional.alltypes.id - functional.alltypes.id) < 1, (functional.alltypes.int_col - functional.alltypes.int_col) < 1
| | row-size=8B cardinality=730
| |
| 02:SCAN HDFS [functional.alltypes]
| HDFS partitions=24/24 files=24 size=478.45KB
| predicates: (functional.alltypes.id - functional.alltypes.id) < 1, (functional.alltypes.int_col - functional.alltypes.int_col) < 1
| row-size=8B cardinality=730
|
00:SCAN HDFS [functional.alltypes a]
HDFS partitions=24/24 files=24 size=478.45KB
predicates: (a.id - a.id) < 1
runtime filters: RF000 -> a.id
row-size=4B cardinality=730
====
# Two-way join query with an inline view in the probe side of the join where the
# scan node to apply the filter in below an analytic function on the probe side of the join
select straight_join * from
(select t1.year, t1.month, count(t2.id) over(partition by year order by month desc)
from functional.alltypesagg t1 inner join functional.alltypesnopart t2
on t1.year = t2.id) v1,
functional.alltypesnopart t3
where v1.month = t3.tinyint_col and v1.year = t3.id and t3.bool_col = false
---- PLAN
PLAN-ROOT SINK
|
06:HASH JOIN [INNER JOIN]
| hash predicates: year = t3.id, month = t3.tinyint_col
| row-size=92B cardinality=11.00K
|
|--05:SCAN HDFS [functional.alltypesnopart t3]
| HDFS partitions=1/1 files=0 size=0B
| predicates: t3.bool_col = FALSE
| row-size=72B cardinality=0
|
04:ANALYTIC
| functions: count(id)
| partition by: `year`
| order by: month DESC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
| row-size=20B cardinality=11.00K
|
03:SORT
| order by: year ASC NULLS FIRST, month DESC
| row-size=12B cardinality=11.00K
|
02:HASH JOIN [INNER JOIN]
| hash predicates: t1.`year` = t2.id
| runtime filters: RF000 <- t2.id
| row-size=12B cardinality=11.00K
|
|--01:SCAN HDFS [functional.alltypesnopart t2]
| HDFS partitions=1/1 files=0 size=0B
| row-size=4B cardinality=0
|
00:SCAN HDFS [functional.alltypesagg t1]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> t1.`year`
row-size=8B cardinality=11.00K
====
# Two-way join query with an analytic function on the probe side
# TODO: Propagate a runtime filter through the analytic function
select straight_join * from
(select id, int_col, year,
sum(int_col) over (partition by year order by id) as s
from functional.alltypes) v, functional.alltypestiny v1
where v.year = v1.int_col and v.year = 2009
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [INNER JOIN]
| hash predicates: year = v1.int_col
| row-size=109B cardinality=14.60K
|
|--03:SCAN HDFS [functional.alltypestiny v1]
| HDFS partitions=4/4 files=4 size=460B
| predicates: v1.int_col = 2009
| row-size=89B cardinality=4
|
02:ANALYTIC
| functions: sum(int_col)
| partition by: `year`
| order by: id ASC
| window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
| row-size=20B cardinality=3.65K
|
01:SORT
| order by: year ASC NULLS FIRST, id ASC
| row-size=12B cardinality=3.65K
|
00:SCAN HDFS [functional.alltypes]
partition predicates: functional.alltypes.year = 2009
HDFS partitions=12/24 files=12 size=238.68KB
row-size=12B cardinality=3.65K
====
# Multi-way join query with a bushy plan
select straight_join * from
(select t1.* from functional.alltypesagg t1,
(select t2.id + t3.id + 1 as id, t3.int_col
from functional.alltypesnopart t2 left outer join functional.alltypesnopart t3
on t2.id = t3.id
where t3.bool_col = false) v1
where t1.year = v1.id
) v2,
(select t4.int_col as intcol1, t5.bigint_col as intcol2
from functional.alltypesnopart t4 left outer join functional.alltypesnopart t5
on t4.smallint_col = t5.smallint_col
where t5.bool_col = true
) v3
where v2.month = v3.intcol1
---- PLAN
PLAN-ROOT SINK
|
08:HASH JOIN [INNER JOIN]
| hash predicates: t1.month = t4.int_col
| runtime filters: RF000 <- t4.int_col
| row-size=121B cardinality=11.00K
|
|--07:HASH JOIN [LEFT OUTER JOIN]
| | hash predicates: t4.smallint_col = t5.smallint_col
| | other predicates: t5.bool_col = TRUE
| | row-size=17B cardinality=0
| |
| |--06:SCAN HDFS [functional.alltypesnopart t5]
| | HDFS partitions=1/1 files=0 size=0B
| | predicates: t5.bool_col = TRUE
| | row-size=11B cardinality=0
| |
| 05:SCAN HDFS [functional.alltypesnopart t4]
| HDFS partitions=1/1 files=0 size=0B
| row-size=6B cardinality=0
|
04:HASH JOIN [INNER JOIN]
| hash predicates: t1.`year` = t2.id + t3.id + 1
| runtime filters: RF002 <- t2.id + t3.id + 1
| row-size=104B cardinality=11.00K
|
|--03:HASH JOIN [LEFT OUTER JOIN]
| | hash predicates: t2.id = t3.id
| | other predicates: t3.bool_col = FALSE
| | row-size=9B cardinality=0
| |
| |--02:SCAN HDFS [functional.alltypesnopart t3]
| | HDFS partitions=1/1 files=0 size=0B
| | predicates: t3.bool_col = FALSE
| | row-size=5B cardinality=0
| |
| 01:SCAN HDFS [functional.alltypesnopart t2]
| HDFS partitions=1/1 files=0 size=0B
| row-size=4B cardinality=0
|
00:SCAN HDFS [functional.alltypesagg t1]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> t1.month, RF002 -> t1.`year`
row-size=95B cardinality=11.00K
====
# Multi-way join query where the slots of all the join predicates belong to the same
# equivalence class
select straight_join 1 from functional.alltypestiny t1 join functional.alltypestiny t2 on t1.id = t2.id
join functional.alltypestiny t3 on t2.id = t3.id
where t3.int_col = 1
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [INNER JOIN]
| hash predicates: t2.id = t3.id
| runtime filters: RF000 <- t3.id
| row-size=16B cardinality=4
|
|--02:SCAN HDFS [functional.alltypestiny t3]
| HDFS partitions=4/4 files=4 size=460B
| predicates: t3.int_col = 1
| row-size=8B cardinality=4
|
03:HASH JOIN [INNER JOIN]
| hash predicates: t1.id = t2.id
| runtime filters: RF002 <- t2.id
| row-size=8B cardinality=8
|
|--01:SCAN HDFS [functional.alltypestiny t2]
| HDFS partitions=4/4 files=4 size=460B
| runtime filters: RF000 -> t2.id
| row-size=4B cardinality=8
|
00:SCAN HDFS [functional.alltypestiny t1]
HDFS partitions=4/4 files=4 size=460B
runtime filters: RF000 -> t1.id, RF002 -> t1.id
row-size=4B cardinality=8
====
# Equivalent query to the one above; the same runtime filters should be generated
select straight_join 1 from functional.alltypestiny t1 join functional.alltypestiny t2 on t1.id = t2.id
join functional.alltypestiny t3 on t1.id = t3.id
where t3.int_col = 1
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [INNER JOIN]
| hash predicates: t1.id = t3.id
| runtime filters: RF000 <- t3.id
| row-size=16B cardinality=4
|
|--02:SCAN HDFS [functional.alltypestiny t3]
| HDFS partitions=4/4 files=4 size=460B
| predicates: t3.int_col = 1
| row-size=8B cardinality=4
|
03:HASH JOIN [INNER JOIN]
| hash predicates: t1.id = t2.id
| runtime filters: RF002 <- t2.id
| row-size=8B cardinality=8
|
|--01:SCAN HDFS [functional.alltypestiny t2]
| HDFS partitions=4/4 files=4 size=460B
| runtime filters: RF000 -> t2.id
| row-size=4B cardinality=8
|
00:SCAN HDFS [functional.alltypestiny t1]
HDFS partitions=4/4 files=4 size=460B
runtime filters: RF000 -> t1.id, RF002 -> t1.id
row-size=4B cardinality=8
====
# Check that runtime filters are not generated in subplans
select straight_join 1 from tpch_nested_parquet.customer c,
(select o1.o_orderkey
from c.c_orders o1 join c.c_orders o2
on o1.o_orderkey = o2.o_orderkey) v
where c_custkey = v.o_orderkey
---- PLAN
PLAN-ROOT SINK
|
01:SUBPLAN
| row-size=48B cardinality=150.00K
|
|--06:HASH JOIN [INNER JOIN]
| | hash predicates: c_custkey = o1.o_orderkey
| | row-size=48B cardinality=1
| |
| |--05:HASH JOIN [INNER JOIN]
| | | hash predicates: o1.o_orderkey = o2.o_orderkey
| | | row-size=16B cardinality=10
| | |
| | |--04:UNNEST [c.c_orders o2]
| | | row-size=0B cardinality=10
| | |
| | 03:UNNEST [c.c_orders o1]
| | row-size=0B cardinality=10
| |
| 02:SINGULAR ROW SRC
| row-size=32B cardinality=1
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
HDFS partitions=1/1 files=4 size=288.99MB
row-size=32B cardinality=150.00K
====
# Two-way join query where the build side is optimized into an empty set
select straight_join 1
from functional.alltypestiny t1 join
(select * from functional.alltypessmall t2 where false) v on t1.id = v.id
where v.int_col = 1
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [INNER JOIN]
| hash predicates: t1.id = t2.id
| runtime filters: RF000 <- t2.id
| row-size=8B cardinality=0
|
|--01:EMPTYSET
|
00:SCAN HDFS [functional.alltypestiny t1]
HDFS partitions=4/4 files=4 size=460B
runtime filters: RF000 -> t1.id
row-size=4B cardinality=8
====
# Two-way join query where both the build side and probe side are optimized
# into empty sets
select straight_join 1 from
(select * from functional.alltypestiny where false) v1 join
(select * from functional.alltypessmall where false) v2 on v1.id = v2.id
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [INNER JOIN]
| hash predicates: functional.alltypestiny.id = functional.alltypessmall.id
| row-size=8B cardinality=0
|
|--01:EMPTYSET
|
00:EMPTYSET
====
# Multi-way join query where a runtime filter can be assigned to scan node
# even though the apply expr is bound by different scan tuple descriptors
select straight_join 1 from functional.alltypestiny t1 join functional.alltypestiny t2
on t1.id = t2.id
join functional.alltypestiny t3 on t1.id = t3.id
join functional.alltypestiny t4 on t1.id + t2.id = t4.id
---- PLAN
PLAN-ROOT SINK
|
06:HASH JOIN [INNER JOIN]
| hash predicates: t1.id + t2.id = t4.id
| runtime filters: RF000 <- t4.id
| row-size=16B cardinality=8
|
|--03:SCAN HDFS [functional.alltypestiny t4]
| HDFS partitions=4/4 files=4 size=460B
| row-size=4B cardinality=8
|
05:HASH JOIN [INNER JOIN]
| hash predicates: t1.id = t3.id
| runtime filters: RF002 <- t3.id
| row-size=12B cardinality=8
|
|--02:SCAN HDFS [functional.alltypestiny t3]
| HDFS partitions=4/4 files=4 size=460B
| runtime filters: RF000 -> t3.id + t3.id
| row-size=4B cardinality=8
|
04:HASH JOIN [INNER JOIN]
| hash predicates: t1.id = t2.id
| runtime filters: RF004 <- t2.id
| row-size=8B cardinality=8
|
|--01:SCAN HDFS [functional.alltypestiny t2]
| HDFS partitions=4/4 files=4 size=460B
| runtime filters: RF000 -> t2.id + t2.id, RF002 -> t2.id
| row-size=4B cardinality=8
|
00:SCAN HDFS [functional.alltypestiny t1]
HDFS partitions=4/4 files=4 size=460B
runtime filters: RF000 -> t1.id + t1.id, RF002 -> t1.id, RF004 -> t1.id
row-size=4B cardinality=8
====
# IMPALA-3074: Generated runtime filter has multiple candidate target nodes not all of
# which are valid due to type mismatch between the associated source and target
# exprs.
select straight_join 1 from functional.alltypestiny a1
inner join functional.alltypestiny a3 ON a3.smallint_col = a1.int_col
inner join functional.alltypes a4 ON a4.smallint_col = a3.smallint_col
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [INNER JOIN]
| hash predicates: a3.smallint_col = a4.smallint_col
| runtime filters: RF000 <- a4.smallint_col
| row-size=8B cardinality=23.36K
|
|--02:SCAN HDFS [functional.alltypes a4]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=2B cardinality=7.30K
|
03:HASH JOIN [INNER JOIN]
| hash predicates: a1.int_col = a3.smallint_col
| runtime filters: RF002 <- a3.smallint_col
| row-size=6B cardinality=32
|
|--01:SCAN HDFS [functional.alltypestiny a3]
| HDFS partitions=4/4 files=4 size=460B
| runtime filters: RF000 -> a3.smallint_col
| row-size=2B cardinality=8
|
00:SCAN HDFS [functional.alltypestiny a1]
HDFS partitions=4/4 files=4 size=460B
runtime filters: RF002 -> a1.int_col
row-size=4B cardinality=8
====
# IMPALA-3574: Runtime filter generated from a targer expr that contains a TupleIsNull
# predicate.
select distinct t1.int_col
from functional.alltypestiny t1 left join
(select coalesce(t3.int_col, 384) as int_col
from (select int_col from functional.alltypes) t1
right semi join (select int_col from functional.alltypesagg) t3
on t1.int_col = t3.int_col) t2
on t2.int_col = t1.month
where t1.month is not null
---- PLAN
PLAN-ROOT SINK
|
05:AGGREGATE [FINALIZE]
| group by: t1.int_col
| row-size=4B cardinality=2
|
04:HASH JOIN [RIGHT OUTER JOIN]
| hash predicates: if(TupleIsNull(3), NULL, coalesce(int_col, 384)) = t1.`month`
| runtime filters: RF000 <- t1.`month`
| row-size=12B cardinality=8
|
|--00:SCAN HDFS [functional.alltypestiny t1]
| partition predicates: t1.`month` IS NOT NULL
| HDFS partitions=4/4 files=4 size=460B
| row-size=8B cardinality=8
|
03:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: int_col = int_col
| runtime filters: RF002 <- int_col
| row-size=4B cardinality=115
|
|--01:SCAN HDFS [functional.alltypes]
| HDFS partitions=24/24 files=24 size=478.45KB
| runtime filters: RF000 -> coalesce(functional.alltypes.int_col, 384)
| row-size=4B cardinality=7.30K
|
02:SCAN HDFS [functional.alltypesagg]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> coalesce(int_col, 384), RF002 -> int_col
row-size=4B cardinality=11.00K
====
# IMPALA-4076: Test pruning the least selective runtime filters to obey
# MAX_NUM_RUNTIME_FILTERS in the presence of zero-cardinality plan nodes. This query was
# constructed by hand to trigger the issue with the sort compare method violating the
# comparison contract. In order to trigger the issue, the number of runtime filters has to
# be greater than 32 and they have to be in a certain initial order.
with big_six as (
select straight_join a.id
from functional.alltypes a
inner join functional.alltypes b on a.id = b.id
and a.bigint_col = b.bigint_col
and a.bool_col = b.bool_col
and a.int_col = b.int_col
and a.smallint_col = b.smallint_col
and a.tinyint_col = b.tinyint_col
), small_two as (
select straight_join a.bool_col
from functional.alltypes a
inner join functional.alltypestiny b on a.id = b.id
and a.bool_col = b.bool_col
), big_eight as (
select straight_join a.id
from functional.alltypes a
inner join functional.alltypes b on a.id = b.id
and a.bool_col = b.bool_col
and a.date_string_col = b.date_string_col
and a.double_col = b.double_col
and a.smallint_col = b.smallint_col
and a.string_col = b.string_col
and a.timestamp_col = b.timestamp_col
and a.tinyint_col = b.tinyint_col
), small_four as (
select straight_join a.bool_col
from functional.alltypes a
inner join functional.alltypestiny b on a.id = b.id
and a.bigint_col = b.bigint_col
and a.bool_col = b.bool_col
and a.double_col = b.double_col
and a.float_col = b.float_col
and a.int_col = b.int_col
and a.smallint_col = b.smallint_col
and a.tinyint_col = b.tinyint_col
), big_one as (
select straight_join a.id
from functional.alltypes a
inner join functional.alltypes b on a.id = b.id
), nan as (
with zero_card as (
select straight_join b.id, b.int_col
from (values(1 id) limit 0) a
inner join functional.alltypes b on a.id = b.id
)
select straight_join 1
from zero_card z
inner join functional.alltypestiny x on x.id = z.id
), small_six as (
select straight_join a.bool_col
from functional.alltypes a
inner join functional.alltypestiny b on a.id = b.id
and a.bigint_col = b.bigint_col
and a.bool_col = b.bool_col
and a.int_col = b.int_col
and a.smallint_col = b.smallint_col
and a.tinyint_col = b.tinyint_col
), big_three as (
select straight_join a.id
from functional.alltypes a
inner join functional.alltypes b on a.id = b.id
and a.bool_col = b.bool_col
and a.tinyint_col = b.tinyint_col
), small_four_2 as (
select straight_join a.bool_col
from functional.alltypes a
inner join functional.alltypestiny b on a.id = b.id
and a.bigint_col = b.bigint_col
and a.bool_col = b.bool_col
and a.double_col = b.double_col
and a.float_col = b.float_col
and a.int_col = b.int_col
and a.smallint_col = b.smallint_col
and a.tinyint_col = b.tinyint_col
)
select straight_join 1
from big_six
inner join small_two
inner join big_eight
inner join small_four
inner join big_one
inner join nan
inner join small_six
inner join big_three
inner join small_four_2
---- PLAN
PLAN-ROOT SINK
|
36:NESTED LOOP JOIN [CROSS JOIN]
| row-size=377B cardinality=0
|
|--28:HASH JOIN [INNER JOIN]
| | hash predicates: a.bigint_col = b.bigint_col, a.bool_col = b.bool_col, a.double_col = b.double_col, a.float_col = b.float_col, a.id = b.id, a.int_col = b.int_col, a.smallint_col = b.smallint_col, a.tinyint_col = b.tinyint_col
| | row-size=64B cardinality=8
| |
| |--27:SCAN HDFS [functional.alltypestiny b]
| | HDFS partitions=4/4 files=4 size=460B
| | row-size=32B cardinality=8
| |
| 26:SCAN HDFS [functional.alltypes a]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=32B cardinality=7.30K
|
35:NESTED LOOP JOIN [CROSS JOIN]
| row-size=313B cardinality=0
|
|--25:HASH JOIN [INNER JOIN]
| | hash predicates: a.bool_col = b.bool_col, a.id = b.id, a.tinyint_col = b.tinyint_col
| | row-size=12B cardinality=7.30K
| |
| |--24:SCAN HDFS [functional.alltypes b]
| | HDFS partitions=24/24 files=24 size=478.45KB
| | row-size=6B cardinality=7.30K
| |
| 23:SCAN HDFS [functional.alltypes a]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=6B cardinality=7.30K
|
34:NESTED LOOP JOIN [CROSS JOIN]
| row-size=301B cardinality=0
|
|--22:HASH JOIN [INNER JOIN]
| | hash predicates: a.bigint_col = b.bigint_col, a.bool_col = b.bool_col, a.id = b.id, a.int_col = b.int_col, a.smallint_col = b.smallint_col, a.tinyint_col = b.tinyint_col
| | row-size=40B cardinality=8
| |
| |--21:SCAN HDFS [functional.alltypestiny b]
| | HDFS partitions=4/4 files=4 size=460B
| | row-size=20B cardinality=8
| |
| 20:SCAN HDFS [functional.alltypes a]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=20B cardinality=7.30K
|
33:NESTED LOOP JOIN [CROSS JOIN]
| row-size=261B cardinality=0
|
|--19:HASH JOIN [INNER JOIN]
| | hash predicates: b.id = x.id
| | row-size=9B cardinality=0
| |
| |--18:SCAN HDFS [functional.alltypestiny x]
| | HDFS partitions=4/4 files=4 size=460B
| | row-size=4B cardinality=8
| |
| 17:HASH JOIN [INNER JOIN]
| | hash predicates: id = b.id
| | row-size=5B cardinality=0
| |
| |--16:SCAN HDFS [functional.alltypes b]
| | HDFS partitions=24/24 files=24 size=478.45KB
| | row-size=4B cardinality=7.30K
| |
| 15:EMPTYSET
|
32:NESTED LOOP JOIN [CROSS JOIN]
| row-size=252B cardinality=24.90T
|
|--14:HASH JOIN [INNER JOIN]
| | hash predicates: a.id = b.id
| | row-size=8B cardinality=7.30K
| |
| |--13:SCAN HDFS [functional.alltypes b]
| | HDFS partitions=24/24 files=24 size=478.45KB
| | row-size=4B cardinality=7.30K
| |
| 12:SCAN HDFS [functional.alltypes a]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=4B cardinality=7.30K
|
31:NESTED LOOP JOIN [CROSS JOIN]
| row-size=244B cardinality=3.41G
|
|--11:HASH JOIN [INNER JOIN]
| | hash predicates: a.bigint_col = b.bigint_col, a.bool_col = b.bool_col, a.double_col = b.double_col, a.float_col = b.float_col, a.id = b.id, a.int_col = b.int_col, a.smallint_col = b.smallint_col, a.tinyint_col = b.tinyint_col
| | runtime filters: RF032 <- b.bigint_col, RF033 <- b.bool_col, RF034 <- b.double_col, RF035 <- b.float_col, RF036 <- b.id, RF037 <- b.int_col, RF038 <- b.smallint_col, RF039 <- b.tinyint_col
| | row-size=64B cardinality=8
| |
| |--10:SCAN HDFS [functional.alltypestiny b]
| | HDFS partitions=4/4 files=4 size=460B
| | row-size=32B cardinality=8
| |
| 09:SCAN HDFS [functional.alltypes a]
| HDFS partitions=24/24 files=24 size=478.45KB
| runtime filters: RF032 -> a.bigint_col, RF033 -> a.bool_col, RF034 -> a.double_col, RF035 -> a.float_col, RF036 -> a.id, RF037 -> a.int_col, RF038 -> a.smallint_col, RF039 -> a.tinyint_col
| row-size=32B cardinality=7.30K
|
30:NESTED LOOP JOIN [CROSS JOIN]
| row-size=180B cardinality=426.32M
|
|--08:HASH JOIN [INNER JOIN]
| | hash predicates: a.bool_col = b.bool_col, a.double_col = b.double_col, a.id = b.id, a.smallint_col = b.smallint_col, a.timestamp_col = b.timestamp_col, a.tinyint_col = b.tinyint_col, a.string_col = b.string_col, a.date_string_col = b.date_string_col
| | row-size=130B cardinality=7.30K
| |
| |--07:SCAN HDFS [functional.alltypes b]
| | HDFS partitions=24/24 files=24 size=478.45KB
| | row-size=65B cardinality=7.30K
| |
| 06:SCAN HDFS [functional.alltypes a]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=65B cardinality=7.30K
|
29:NESTED LOOP JOIN [CROSS JOIN]
| row-size=50B cardinality=58.40K
|
|--05:HASH JOIN [INNER JOIN]
| | hash predicates: a.bool_col = b.bool_col, a.id = b.id
| | runtime filters: RF012 <- b.bool_col, RF013 <- b.id
| | row-size=10B cardinality=8
| |
| |--04:SCAN HDFS [functional.alltypestiny b]
| | HDFS partitions=4/4 files=4 size=460B
| | row-size=5B cardinality=8
| |
| 03:SCAN HDFS [functional.alltypes a]
| HDFS partitions=24/24 files=24 size=478.45KB
| runtime filters: RF012 -> a.bool_col, RF013 -> a.id
| row-size=5B cardinality=7.30K
|
02:HASH JOIN [INNER JOIN]
| hash predicates: a.bigint_col = b.bigint_col, a.bool_col = b.bool_col, a.id = b.id, a.int_col = b.int_col, a.smallint_col = b.smallint_col, a.tinyint_col = b.tinyint_col
| row-size=40B cardinality=7.30K
|
|--01:SCAN HDFS [functional.alltypes b]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=20B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypes a]
HDFS partitions=24/24 files=24 size=478.45KB
row-size=20B cardinality=7.30K
====
# IMPALA-4490: Only generate runtime filters for hash join nodes, even if there is an
# otherwise suitable equality predicate.
select 1 from functional.alltypes a
inner join functional.alltypes b
on a.id = b.id
left outer join functional.alltypes c
on b.id is distinct from c.id
where b.int_col + b.bigint_col = c.int_col
---- PLAN
PLAN-ROOT SINK
|
04:NESTED LOOP JOIN [LEFT OUTER JOIN]
| join predicates: b.id IS DISTINCT FROM c.id
| predicates: c.int_col = b.int_col + b.bigint_col
| row-size=28B cardinality=7.30K
|
|--02:SCAN HDFS [functional.alltypes c]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=8B cardinality=7.30K
|
03:HASH JOIN [INNER JOIN]
| hash predicates: b.id = a.id
| runtime filters: RF000 <- a.id
| row-size=20B cardinality=7.30K
|
|--00:SCAN HDFS [functional.alltypes a]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=4B cardinality=7.30K
|
01:SCAN HDFS [functional.alltypes b]
HDFS partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> b.id
row-size=16B cardinality=7.30K
====
# IMPALA-5597: Runtime filter should be generated and assigned successfully when the
# source expr and target expr have different decimal types.
select *
from tpch_parquet.lineitem
left join tpch_parquet.part on if(l_orderkey % 2 = 0, NULL, l_partkey) = p_partkey
where l_orderkey = 965 and l_extendedprice * l_tax = p_retailprice;
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [RIGHT OUTER JOIN]
| hash predicates: p_partkey = if(l_orderkey % 2 = 0, NULL, l_partkey)
| other predicates: p_retailprice = l_extendedprice * l_tax
| runtime filters: RF000 <- if(l_orderkey % 2 = 0, NULL, l_partkey), RF001 <- l_extendedprice * l_tax
| row-size=419B cardinality=4
|
|--00:SCAN HDFS [tpch_parquet.lineitem]
| HDFS partitions=1/1 files=3 size=193.99MB
| predicates: l_orderkey = 965
| row-size=231B cardinality=4
|
01:SCAN HDFS [tpch_parquet.part]
HDFS partitions=1/1 files=1 size=6.24MB
runtime filters: RF000 -> p_partkey, RF001 -> p_retailprice
row-size=188B cardinality=200.00K
====
# IMPALA-6286: Runtime filter must not be assigned at scan 01 because that could
# alter the query results due to the coalesce() in the join condition of join 04.
select /* +straight_join */ 1 from functional.alltypestiny t1
left outer join functional.alltypestiny t2
on t1.int_col = t2.int_col
where coalesce(t2.id + 10, 100) in (select 100);
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: coalesce(t2.id + 10, 100) = `$a$1`.`$c$1`
| row-size=12B cardinality=32
|
|--02:UNION
| constant-operands=1
| row-size=1B cardinality=1
|
03:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: t1.int_col = t2.int_col
| row-size=12B cardinality=32
|
|--01:SCAN HDFS [functional.alltypestiny t2]
| HDFS partitions=4/4 files=4 size=460B
| row-size=8B cardinality=8
|
00:SCAN HDFS [functional.alltypestiny t1]
HDFS partitions=4/4 files=4 size=460B
row-size=4B cardinality=8
====
# IMPALA-6286: Same as above but with an inline view.
select /* +straight_join */ 1 from functional.alltypestiny t1
left outer join (select * from functional.alltypestiny t2) v
on t1.int_col = v.int_col
where coalesce(v.id + 10, 100) in (select 100);
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: coalesce(t2.id + 10, 100) = `$a$1`.`$c$1`
| row-size=12B cardinality=32
|
|--02:UNION
| constant-operands=1
| row-size=1B cardinality=1
|
03:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: t1.int_col = t2.int_col
| row-size=12B cardinality=32
|
|--01:SCAN HDFS [functional.alltypestiny t2]
| HDFS partitions=4/4 files=4 size=460B
| row-size=8B cardinality=8
|
00:SCAN HDFS [functional.alltypestiny t1]
HDFS partitions=4/4 files=4 size=460B
row-size=4B cardinality=8
====
# IMPALA-6286: The runtime filter produced by inner join 05 can safely be assigned
# at scan 01. It would also be safe to produce a runtime filter at join 06 and assign
# it to scan 00, but our check is too conservative to recognize the opportunity.
select /* +straight_join */ 1 from functional.alltypestiny t1
left outer join functional.alltypestiny t2
on t1.int_col = t2.int_col
inner join functional.alltypestiny t3
on t2.id = t3.id
where coalesce(t2.id + 10, 100) in (select 100)
---- PLAN
PLAN-ROOT SINK
|
06:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: coalesce(t2.id + 10, 100) = `$a$1`.`$c$1`
| row-size=16B cardinality=32
|
|--03:UNION
| constant-operands=1
| row-size=1B cardinality=1
|
05:HASH JOIN [INNER JOIN]
| hash predicates: t2.id = t3.id
| runtime filters: RF000 <- t3.id
| row-size=16B cardinality=32
|
|--02:SCAN HDFS [functional.alltypestiny t3]
| HDFS partitions=4/4 files=4 size=460B
| row-size=4B cardinality=8
|
04:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: t1.int_col = t2.int_col
| row-size=12B cardinality=32
|
|--01:SCAN HDFS [functional.alltypestiny t2]
| HDFS partitions=4/4 files=4 size=460B
| runtime filters: RF000 -> t2.id
| row-size=8B cardinality=8
|
00:SCAN HDFS [functional.alltypestiny t1]
HDFS partitions=4/4 files=4 size=460B
row-size=4B cardinality=8
====