blob: 79242f4d8e8871cd795fdb835a57585c4d0ac3c0 [file] [log] [blame]
# Q3 - Shipping Priority Query
# Modifications: Added round() calls
select
l_orderkey,
round(sum(l_extendedprice * (1 - l_discount)), 5) as revenue,
o_orderdate,
o_shippriority
from tpch.customer c,
tpch.orders o,
tpch.lineitem l
where
c.c_mktsegment = 'BUILDING'
and c.c_custkey = o.o_custkey
and l.l_orderkey = o.o_orderkey
and o_orderdate < '1995-03-15'
and l_shipdate > '1995-03-15'
group by
l_orderkey,
o_orderdate,
o_shippriority
order by
revenue desc,
o_orderdate
limit 10
---- PLAN
PLAN-ROOT SINK
|
06:TOP-N [LIMIT=10]
| order by: round(sum(l_extendedprice * (1 - l_discount)), 5) DESC, o_orderdate ASC
| row-size=50B cardinality=10
|
05:AGGREGATE [FINALIZE]
| output: sum(l_extendedprice * (1 - l_discount))
| group by: l_orderkey, o_orderdate, o_shippriority
| row-size=50B cardinality=17.56K
|
04:HASH JOIN [INNER JOIN]
| hash predicates: o.o_custkey = c.c_custkey
| runtime filters: RF000 <- c.c_custkey
| row-size=117B cardinality=17.56K
|
|--00:SCAN HDFS [tpch.customer c]
| partitions=1/1 files=1 size=23.08MB
| predicates: c.c_mktsegment = 'BUILDING'
| row-size=29B cardinality=30.00K
|
03:HASH JOIN [INNER JOIN]
| hash predicates: l.l_orderkey = o.o_orderkey
| runtime filters: RF002 <- o.o_orderkey
| row-size=88B cardinality=57.58K
|
|--01:SCAN HDFS [tpch.orders o]
| partitions=1/1 files=1 size=162.56MB
| predicates: o_orderdate < '1995-03-15'
| runtime filters: RF000 -> o.o_custkey
| row-size=42B cardinality=150.00K
|
02:SCAN HDFS [tpch.lineitem l]
partitions=1/1 files=1 size=718.94MB
predicates: l_shipdate > '1995-03-15'
runtime filters: RF002 -> l.l_orderkey
row-size=46B cardinality=600.12K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
11:MERGING-EXCHANGE [UNPARTITIONED]
| order by: round(sum(l_extendedprice * (1 - l_discount)), 5) DESC, o_orderdate ASC
| limit: 10
|
06:TOP-N [LIMIT=10]
| order by: round(sum(l_extendedprice * (1 - l_discount)), 5) DESC, o_orderdate ASC
| row-size=50B cardinality=10
|
10:AGGREGATE [FINALIZE]
| output: sum:merge(l_extendedprice * (1 - l_discount))
| group by: l_orderkey, o_orderdate, o_shippriority
| row-size=50B cardinality=17.56K
|
09:EXCHANGE [HASH(l_orderkey,o_orderdate,o_shippriority)]
|
05:AGGREGATE [STREAMING]
| output: sum(l_extendedprice * (1 - l_discount))
| group by: l_orderkey, o_orderdate, o_shippriority
| row-size=50B cardinality=17.56K
|
04:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: o.o_custkey = c.c_custkey
| runtime filters: RF000 <- c.c_custkey
| row-size=117B cardinality=17.56K
|
|--08:EXCHANGE [BROADCAST]
| |
| 00:SCAN HDFS [tpch.customer c]
| partitions=1/1 files=1 size=23.08MB
| predicates: c.c_mktsegment = 'BUILDING'
| row-size=29B cardinality=30.00K
|
03:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: l.l_orderkey = o.o_orderkey
| runtime filters: RF002 <- o.o_orderkey
| row-size=88B cardinality=57.58K
|
|--07:EXCHANGE [BROADCAST]
| |
| 01:SCAN HDFS [tpch.orders o]
| partitions=1/1 files=1 size=162.56MB
| predicates: o_orderdate < '1995-03-15'
| runtime filters: RF000 -> o.o_custkey
| row-size=42B cardinality=150.00K
|
02:SCAN HDFS [tpch.lineitem l]
partitions=1/1 files=1 size=718.94MB
predicates: l_shipdate > '1995-03-15'
runtime filters: RF002 -> l.l_orderkey
row-size=46B cardinality=600.12K
====
# Q3 - Shipping Priority Query
# straight_join prevents join order optimization
select straight_join
l_orderkey,
round(sum(l_extendedprice * (1 - l_discount)), 5) as revenue,
o_orderdate,
o_shippriority
from tpch.customer c,
tpch.orders o,
tpch.lineitem l
where
c.c_mktsegment = 'BUILDING'
and c.c_custkey = o.o_custkey
and l.l_orderkey = o.o_orderkey
and o_orderdate < '1995-03-15'
and l_shipdate > '1995-03-15'
group by
l_orderkey,
o_orderdate,
o_shippriority
order by
revenue desc,
o_orderdate
limit 10
---- PLAN
PLAN-ROOT SINK
|
06:TOP-N [LIMIT=10]
| order by: round(sum(l_extendedprice * (1 - l_discount)), 5) DESC, o_orderdate ASC
| row-size=50B cardinality=10
|
05:AGGREGATE [FINALIZE]
| output: sum(l_extendedprice * (1 - l_discount))
| group by: l_orderkey, o_orderdate, o_shippriority
| row-size=50B cardinality=575.77K
|
04:HASH JOIN [INNER JOIN]
| hash predicates: o.o_orderkey = l.l_orderkey
| runtime filters: RF000 <- l.l_orderkey
| row-size=117B cardinality=575.77K
|
|--02:SCAN HDFS [tpch.lineitem l]
| partitions=1/1 files=1 size=718.94MB
| predicates: l_shipdate > '1995-03-15'
| row-size=46B cardinality=600.12K
|
03:HASH JOIN [INNER JOIN]
| hash predicates: c.c_custkey = o.o_custkey
| runtime filters: RF002 <- o.o_custkey
| row-size=71B cardinality=150.00K
|
|--01:SCAN HDFS [tpch.orders o]
| partitions=1/1 files=1 size=162.56MB
| predicates: o_orderdate < '1995-03-15'
| runtime filters: RF000 -> o.o_orderkey
| row-size=42B cardinality=150.00K
|
00:SCAN HDFS [tpch.customer c]
partitions=1/1 files=1 size=23.08MB
predicates: c.c_mktsegment = 'BUILDING'
runtime filters: RF002 -> c.c_custkey
row-size=29B cardinality=30.00K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
11:MERGING-EXCHANGE [UNPARTITIONED]
| order by: round(sum(l_extendedprice * (1 - l_discount)), 5) DESC, o_orderdate ASC
| limit: 10
|
06:TOP-N [LIMIT=10]
| order by: round(sum(l_extendedprice * (1 - l_discount)), 5) DESC, o_orderdate ASC
| row-size=50B cardinality=10
|
10:AGGREGATE [FINALIZE]
| output: sum:merge(l_extendedprice * (1 - l_discount))
| group by: l_orderkey, o_orderdate, o_shippriority
| row-size=50B cardinality=575.77K
|
09:EXCHANGE [HASH(l_orderkey,o_orderdate,o_shippriority)]
|
05:AGGREGATE [STREAMING]
| output: sum(l_extendedprice * (1 - l_discount))
| group by: l_orderkey, o_orderdate, o_shippriority
| row-size=50B cardinality=575.77K
|
04:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: o.o_orderkey = l.l_orderkey
| runtime filters: RF000 <- l.l_orderkey
| row-size=117B cardinality=575.77K
|
|--08:EXCHANGE [BROADCAST]
| |
| 02:SCAN HDFS [tpch.lineitem l]
| partitions=1/1 files=1 size=718.94MB
| predicates: l_shipdate > '1995-03-15'
| row-size=46B cardinality=600.12K
|
03:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: c.c_custkey = o.o_custkey
| runtime filters: RF002 <- o.o_custkey
| row-size=71B cardinality=150.00K
|
|--07:EXCHANGE [BROADCAST]
| |
| 01:SCAN HDFS [tpch.orders o]
| partitions=1/1 files=1 size=162.56MB
| predicates: o_orderdate < '1995-03-15'
| runtime filters: RF000 -> o.o_orderkey
| row-size=42B cardinality=150.00K
|
00:SCAN HDFS [tpch.customer c]
partitions=1/1 files=1 size=23.08MB
predicates: c.c_mktsegment = 'BUILDING'
runtime filters: RF002 -> c.c_custkey
row-size=29B cardinality=30.00K
====
# Q5 - Local Supplier Volume Query
# Modifications: Added round() call, converted selects from multiple tables
# to joins, added limit
select
n_name,
round(sum(l_extendedprice * (1 - l_discount)), 5) as revenue
from tpch.customer,
tpch.orders o,
tpch.lineitem l,
tpch.supplier s,
tpch.nation,
tpch.region
where l_orderkey = o_orderkey
and l_suppkey = s_suppkey
and c_nationkey = s_nationkey
and c_custkey = o_custkey
and s_nationkey = n_nationkey
and n_regionkey = r_regionkey
and r_name = 'ASIA'
and o_orderdate >= '1994-01-01'
and o_orderdate < '1995-01-01'
group by
n_name
order by
revenue desc
limit 100
---- PLAN
PLAN-ROOT SINK
|
12:TOP-N [LIMIT=100]
| order by: round(sum(l_extendedprice * (1 - l_discount)), 5) DESC
| row-size=35B cardinality=25
|
11:AGGREGATE [FINALIZE]
| output: sum(l_extendedprice * (1 - l_discount))
| group by: n_name
| row-size=35B cardinality=25
|
10:HASH JOIN [INNER JOIN]
| hash predicates: n_regionkey = r_regionkey
| runtime filters: RF000 <- r_regionkey
| row-size=134B cardinality=115.16K
|
|--05:SCAN HDFS [tpch.region]
| partitions=1/1 files=1 size=384B
| predicates: r_name = 'ASIA'
| row-size=21B cardinality=1
|
09:HASH JOIN [INNER JOIN]
| hash predicates: s_nationkey = n_nationkey
| runtime filters: RF002 <- n_nationkey
| row-size=113B cardinality=575.77K
|
|--04:SCAN HDFS [tpch.nation]
| partitions=1/1 files=1 size=2.15KB
| runtime filters: RF000 -> n_regionkey
| row-size=23B cardinality=25
|
08:HASH JOIN [INNER JOIN]
| hash predicates: c_nationkey = s_nationkey, l_suppkey = s_suppkey
| runtime filters: RF004 <- s_nationkey, RF005 <- s_suppkey
| row-size=90B cardinality=575.77K
|
|--03:SCAN HDFS [tpch.supplier s]
| partitions=1/1 files=1 size=1.33MB
| runtime filters: RF002 -> s_nationkey
| row-size=10B cardinality=10.00K
|
07:HASH JOIN [INNER JOIN]
| hash predicates: o_custkey = c_custkey
| runtime filters: RF008 <- c_custkey
| row-size=80B cardinality=575.77K
|
|--00:SCAN HDFS [tpch.customer]
| partitions=1/1 files=1 size=23.08MB
| runtime filters: RF002 -> tpch.customer.c_nationkey, RF004 -> c_nationkey
| row-size=10B cardinality=150.00K
|
06:HASH JOIN [INNER JOIN]
| hash predicates: l_orderkey = o_orderkey
| runtime filters: RF010 <- o_orderkey
| row-size=70B cardinality=575.77K
|
|--01:SCAN HDFS [tpch.orders o]
| partitions=1/1 files=1 size=162.56MB
| predicates: o_orderdate < '1995-01-01', o_orderdate >= '1994-01-01'
| runtime filters: RF008 -> o_custkey
| row-size=38B cardinality=150.00K
|
02:SCAN HDFS [tpch.lineitem l]
partitions=1/1 files=1 size=718.94MB
runtime filters: RF005 -> l_suppkey, RF010 -> l_orderkey
row-size=32B cardinality=6.00M
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
20:MERGING-EXCHANGE [UNPARTITIONED]
| order by: round(sum(l_extendedprice * (1 - l_discount)), 5) DESC
| limit: 100
|
12:TOP-N [LIMIT=100]
| order by: round(sum(l_extendedprice * (1 - l_discount)), 5) DESC
| row-size=35B cardinality=25
|
19:AGGREGATE [FINALIZE]
| output: sum:merge(l_extendedprice * (1 - l_discount))
| group by: n_name
| row-size=35B cardinality=25
|
18:EXCHANGE [HASH(n_name)]
|
11:AGGREGATE [STREAMING]
| output: sum(l_extendedprice * (1 - l_discount))
| group by: n_name
| row-size=35B cardinality=25
|
10:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: n_regionkey = r_regionkey
| runtime filters: RF000 <- r_regionkey
| row-size=134B cardinality=115.16K
|
|--17:EXCHANGE [BROADCAST]
| |
| 05:SCAN HDFS [tpch.region]
| partitions=1/1 files=1 size=384B
| predicates: r_name = 'ASIA'
| row-size=21B cardinality=1
|
09:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: s_nationkey = n_nationkey
| runtime filters: RF002 <- n_nationkey
| row-size=113B cardinality=575.77K
|
|--16:EXCHANGE [BROADCAST]
| |
| 04:SCAN HDFS [tpch.nation]
| partitions=1/1 files=1 size=2.15KB
| runtime filters: RF000 -> n_regionkey
| row-size=23B cardinality=25
|
08:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: c_nationkey = s_nationkey, l_suppkey = s_suppkey
| runtime filters: RF004 <- s_nationkey, RF005 <- s_suppkey
| row-size=90B cardinality=575.77K
|
|--15:EXCHANGE [BROADCAST]
| |
| 03:SCAN HDFS [tpch.supplier s]
| partitions=1/1 files=1 size=1.33MB
| runtime filters: RF002 -> s_nationkey
| row-size=10B cardinality=10.00K
|
07:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: o_custkey = c_custkey
| runtime filters: RF008 <- c_custkey
| row-size=80B cardinality=575.77K
|
|--14:EXCHANGE [BROADCAST]
| |
| 00:SCAN HDFS [tpch.customer]
| partitions=1/1 files=1 size=23.08MB
| runtime filters: RF002 -> tpch.customer.c_nationkey, RF004 -> c_nationkey
| row-size=10B cardinality=150.00K
|
06:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: l_orderkey = o_orderkey
| runtime filters: RF010 <- o_orderkey
| row-size=70B cardinality=575.77K
|
|--13:EXCHANGE [BROADCAST]
| |
| 01:SCAN HDFS [tpch.orders o]
| partitions=1/1 files=1 size=162.56MB
| predicates: o_orderdate < '1995-01-01', o_orderdate >= '1994-01-01'
| runtime filters: RF008 -> o_custkey
| row-size=38B cardinality=150.00K
|
02:SCAN HDFS [tpch.lineitem l]
partitions=1/1 files=1 size=718.94MB
runtime filters: RF005 -> l_suppkey, RF010 -> l_orderkey
row-size=32B cardinality=6.00M
====
# Q2 - Minimum Cost Supplier Query
select
s.s_acctbal,
s.s_name,
n.n_name,
p.p_partkey,
ps.ps_supplycost,
p.p_mfgr,
s.s_address,
s.s_phone,
s.s_comment
from
tpch.part p,
tpch.supplier s,
tpch.partsupp ps,
tpch.nation n,
tpch.region r
where
p.p_size = 15
and p.p_type like '%BRASS'
and r.r_name = 'EUROPE'
and p.p_partkey = ps.ps_partkey
and s.s_suppkey = ps.ps_suppkey
and s.s_nationkey = n.n_nationkey
and n.n_regionkey = r.r_regionkey
---- PLAN
PLAN-ROOT SINK
|
08:HASH JOIN [INNER JOIN]
| hash predicates: n.n_regionkey = r.r_regionkey
| runtime filters: RF000 <- r.r_regionkey
| row-size=325B cardinality=1.01K
|
|--04:SCAN HDFS [tpch.region r]
| partitions=1/1 files=1 size=384B
| predicates: r.r_name = 'EUROPE'
| row-size=21B cardinality=1
|
07:HASH JOIN [INNER JOIN]
| hash predicates: s.s_nationkey = n.n_nationkey
| runtime filters: RF002 <- n.n_nationkey
| row-size=304B cardinality=5.05K
|
|--03:SCAN HDFS [tpch.nation n]
| partitions=1/1 files=1 size=2.15KB
| runtime filters: RF000 -> n.n_regionkey
| row-size=23B cardinality=25
|
06:HASH JOIN [INNER JOIN]
| hash predicates: s.s_suppkey = ps.ps_suppkey
| runtime filters: RF004 <- ps.ps_suppkey
| row-size=281B cardinality=5.05K
|
|--05:HASH JOIN [INNER JOIN]
| | hash predicates: ps.ps_partkey = p.p_partkey
| | runtime filters: RF006 <- p.p_partkey
| | row-size=95B cardinality=5.05K
| |
| |--00:SCAN HDFS [tpch.part p]
| | partitions=1/1 files=1 size=22.83MB
| | predicates: p.p_size = 15, p.p_type LIKE '%BRASS'
| | row-size=71B cardinality=1.26K
| |
| 02:SCAN HDFS [tpch.partsupp ps]
| partitions=1/1 files=1 size=112.71MB
| runtime filters: RF006 -> ps.ps_partkey
| row-size=24B cardinality=800.00K
|
01:SCAN HDFS [tpch.supplier s]
partitions=1/1 files=1 size=1.33MB
runtime filters: RF002 -> s.s_nationkey, RF004 -> s.s_suppkey
row-size=187B cardinality=10.00K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
13:EXCHANGE [UNPARTITIONED]
|
08:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: n.n_regionkey = r.r_regionkey
| runtime filters: RF000 <- r.r_regionkey
| row-size=325B cardinality=1.01K
|
|--12:EXCHANGE [BROADCAST]
| |
| 04:SCAN HDFS [tpch.region r]
| partitions=1/1 files=1 size=384B
| predicates: r.r_name = 'EUROPE'
| row-size=21B cardinality=1
|
07:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: s.s_nationkey = n.n_nationkey
| runtime filters: RF002 <- n.n_nationkey
| row-size=304B cardinality=5.05K
|
|--11:EXCHANGE [BROADCAST]
| |
| 03:SCAN HDFS [tpch.nation n]
| partitions=1/1 files=1 size=2.15KB
| runtime filters: RF000 -> n.n_regionkey
| row-size=23B cardinality=25
|
06:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: s.s_suppkey = ps.ps_suppkey
| runtime filters: RF004 <- ps.ps_suppkey
| row-size=281B cardinality=5.05K
|
|--10:EXCHANGE [BROADCAST]
| |
| 05:HASH JOIN [INNER JOIN, BROADCAST]
| | hash predicates: ps.ps_partkey = p.p_partkey
| | runtime filters: RF006 <- p.p_partkey
| | row-size=95B cardinality=5.05K
| |
| |--09:EXCHANGE [BROADCAST]
| | |
| | 00:SCAN HDFS [tpch.part p]
| | partitions=1/1 files=1 size=22.83MB
| | predicates: p.p_size = 15, p.p_type LIKE '%BRASS'
| | row-size=71B cardinality=1.26K
| |
| 02:SCAN HDFS [tpch.partsupp ps]
| partitions=1/1 files=1 size=112.71MB
| runtime filters: RF006 -> ps.ps_partkey
| row-size=24B cardinality=800.00K
|
01:SCAN HDFS [tpch.supplier s]
partitions=1/1 files=1 size=1.33MB
runtime filters: RF002 -> s.s_nationkey, RF004 -> s.s_suppkey
row-size=187B cardinality=10.00K
====
# Q4 - Order Priority Checking Query
# the largest input is prevented from becoming the leftmost input by the semi-join
select
o_orderpriority,
count(*) as order_count
from tpch.orders
left semi join tpch.lineitem
on (o_orderkey = l_orderkey and
l_commitdate < l_receiptdate)
where
o_orderdate >= '1993-07-01' and
o_orderdate < '1993-10-01'
group by
o_orderpriority
order by
o_orderpriority
limit 10
---- PLAN
PLAN-ROOT SINK
|
04:TOP-N [LIMIT=10]
| order by: o_orderpriority ASC
| row-size=28B cardinality=5
|
03:AGGREGATE [FINALIZE]
| output: count(*)
| group by: o_orderpriority
| row-size=28B cardinality=5
|
02:HASH JOIN [RIGHT SEMI JOIN]
| hash predicates: l_orderkey = o_orderkey
| runtime filters: RF000 <- o_orderkey
| row-size=50B cardinality=150.00K
|
|--00:SCAN HDFS [tpch.orders]
| partitions=1/1 files=1 size=162.56MB
| predicates: o_orderdate < '1993-10-01', o_orderdate >= '1993-07-01'
| row-size=50B cardinality=150.00K
|
01:SCAN HDFS [tpch.lineitem]
partitions=1/1 files=1 size=718.94MB
predicates: l_commitdate < l_receiptdate
runtime filters: RF000 -> l_orderkey
row-size=52B cardinality=600.12K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
09:MERGING-EXCHANGE [UNPARTITIONED]
| order by: o_orderpriority ASC
| limit: 10
|
04:TOP-N [LIMIT=10]
| order by: o_orderpriority ASC
| row-size=28B cardinality=5
|
08:AGGREGATE [FINALIZE]
| output: count:merge(*)
| group by: o_orderpriority
| row-size=28B cardinality=5
|
07:EXCHANGE [HASH(o_orderpriority)]
|
03:AGGREGATE [STREAMING]
| output: count(*)
| group by: o_orderpriority
| row-size=28B cardinality=5
|
02:HASH JOIN [RIGHT SEMI JOIN, PARTITIONED]
| hash predicates: l_orderkey = o_orderkey
| runtime filters: RF000 <- o_orderkey
| row-size=50B cardinality=150.00K
|
|--06:EXCHANGE [HASH(o_orderkey)]
| |
| 00:SCAN HDFS [tpch.orders]
| partitions=1/1 files=1 size=162.56MB
| predicates: o_orderdate < '1993-10-01', o_orderdate >= '1993-07-01'
| row-size=50B cardinality=150.00K
|
05:EXCHANGE [HASH(l_orderkey)]
|
01:SCAN HDFS [tpch.lineitem]
partitions=1/1 files=1 size=718.94MB
predicates: l_commitdate < l_receiptdate
runtime filters: RF000 -> l_orderkey
row-size=52B cardinality=600.12K
====
select o_orderpriority, count(*) as order_count
from tpch.orders
full outer join tpch.lineitem
on (o_orderkey = l_orderkey)
group by o_orderpriority
order by o_orderpriority limit 10
---- PLAN
PLAN-ROOT SINK
|
04:TOP-N [LIMIT=10]
| order by: o_orderpriority ASC
| row-size=28B cardinality=5
|
03:AGGREGATE [FINALIZE]
| output: count(*)
| group by: o_orderpriority
| row-size=28B cardinality=5
|
02:HASH JOIN [FULL OUTER JOIN]
| hash predicates: l_orderkey = o_orderkey
| row-size=36B cardinality=7.50M
|
|--00:SCAN HDFS [tpch.orders]
| partitions=1/1 files=1 size=162.56MB
| row-size=28B cardinality=1.50M
|
01:SCAN HDFS [tpch.lineitem]
partitions=1/1 files=1 size=718.94MB
row-size=8B cardinality=6.00M
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
09:MERGING-EXCHANGE [UNPARTITIONED]
| order by: o_orderpriority ASC
| limit: 10
|
04:TOP-N [LIMIT=10]
| order by: o_orderpriority ASC
| row-size=28B cardinality=5
|
08:AGGREGATE [FINALIZE]
| output: count:merge(*)
| group by: o_orderpriority
| row-size=28B cardinality=5
|
07:EXCHANGE [HASH(o_orderpriority)]
|
03:AGGREGATE [STREAMING]
| output: count(*)
| group by: o_orderpriority
| row-size=28B cardinality=5
|
02:HASH JOIN [FULL OUTER JOIN, PARTITIONED]
| hash predicates: l_orderkey = o_orderkey
| row-size=36B cardinality=7.50M
|
|--06:EXCHANGE [HASH(o_orderkey)]
| |
| 00:SCAN HDFS [tpch.orders]
| partitions=1/1 files=1 size=162.56MB
| row-size=28B cardinality=1.50M
|
05:EXCHANGE [HASH(l_orderkey)]
|
01:SCAN HDFS [tpch.lineitem]
partitions=1/1 files=1 size=718.94MB
row-size=8B cardinality=6.00M
====
select o_orderpriority, count(*) as order_count
from tpch.orders
right outer join tpch.lineitem
on (o_orderkey = l_orderkey)
group by o_orderpriority
order by o_orderpriority limit 10
---- PLAN
PLAN-ROOT SINK
|
04:TOP-N [LIMIT=10]
| order by: o_orderpriority ASC
| row-size=28B cardinality=5
|
03:AGGREGATE [FINALIZE]
| output: count(*)
| group by: o_orderpriority
| row-size=28B cardinality=5
|
02:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: l_orderkey = o_orderkey
| row-size=36B cardinality=6.00M
|
|--00:SCAN HDFS [tpch.orders]
| partitions=1/1 files=1 size=162.56MB
| row-size=28B cardinality=1.50M
|
01:SCAN HDFS [tpch.lineitem]
partitions=1/1 files=1 size=718.94MB
row-size=8B cardinality=6.00M
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
09:MERGING-EXCHANGE [UNPARTITIONED]
| order by: o_orderpriority ASC
| limit: 10
|
04:TOP-N [LIMIT=10]
| order by: o_orderpriority ASC
| row-size=28B cardinality=5
|
08:AGGREGATE [FINALIZE]
| output: count:merge(*)
| group by: o_orderpriority
| row-size=28B cardinality=5
|
07:EXCHANGE [HASH(o_orderpriority)]
|
03:AGGREGATE [STREAMING]
| output: count(*)
| group by: o_orderpriority
| row-size=28B cardinality=5
|
02:HASH JOIN [LEFT OUTER JOIN, PARTITIONED]
| hash predicates: l_orderkey = o_orderkey
| row-size=36B cardinality=6.00M
|
|--06:EXCHANGE [HASH(o_orderkey)]
| |
| 00:SCAN HDFS [tpch.orders]
| partitions=1/1 files=1 size=162.56MB
| row-size=28B cardinality=1.50M
|
05:EXCHANGE [HASH(l_orderkey)]
|
01:SCAN HDFS [tpch.lineitem]
partitions=1/1 files=1 size=718.94MB
row-size=8B cardinality=6.00M
====
# order does not become the leftmost input because of the outer join;
# the join with nation is done first because it reduces the intermediate output
select count(*)
from tpch.customer
left outer join tpch.orders on (c_custkey = o_custkey)
join tpch.nation on (c_nationkey = n_nationkey)
where n_name = 'x'
---- PLAN
PLAN-ROOT SINK
|
05:AGGREGATE [FINALIZE]
| output: count(*)
| row-size=8B cardinality=1
|
04:HASH JOIN [INNER JOIN]
| hash predicates: c_nationkey = n_nationkey
| runtime filters: RF000 <- n_nationkey
| row-size=39B cardinality=60.00K
|
|--02:SCAN HDFS [tpch.nation]
| partitions=1/1 files=1 size=2.15KB
| predicates: n_name = 'x'
| row-size=21B cardinality=1
|
03:HASH JOIN [RIGHT OUTER JOIN]
| hash predicates: o_custkey = c_custkey
| runtime filters: RF002 <- c_custkey
| row-size=18B cardinality=1.50M
|
|--00:SCAN HDFS [tpch.customer]
| partitions=1/1 files=1 size=23.08MB
| runtime filters: RF000 -> c_nationkey
| row-size=10B cardinality=150.00K
|
01:SCAN HDFS [tpch.orders]
partitions=1/1 files=1 size=162.56MB
runtime filters: RF002 -> o_custkey
row-size=8B cardinality=1.50M
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
10:AGGREGATE [FINALIZE]
| output: count:merge(*)
| row-size=8B cardinality=1
|
09:EXCHANGE [UNPARTITIONED]
|
05:AGGREGATE
| output: count(*)
| row-size=8B cardinality=1
|
04:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: c_nationkey = n_nationkey
| runtime filters: RF000 <- n_nationkey
| row-size=39B cardinality=60.00K
|
|--08:EXCHANGE [BROADCAST]
| |
| 02:SCAN HDFS [tpch.nation]
| partitions=1/1 files=1 size=2.15KB
| predicates: n_name = 'x'
| row-size=21B cardinality=1
|
03:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED]
| hash predicates: o_custkey = c_custkey
| runtime filters: RF002 <- c_custkey
| row-size=18B cardinality=1.50M
|
|--07:EXCHANGE [HASH(c_custkey)]
| |
| 00:SCAN HDFS [tpch.customer]
| partitions=1/1 files=1 size=23.08MB
| runtime filters: RF000 -> c_nationkey
| row-size=10B cardinality=150.00K
|
06:EXCHANGE [HASH(o_custkey)]
|
01:SCAN HDFS [tpch.orders]
partitions=1/1 files=1 size=162.56MB
runtime filters: RF002 -> o_custkey
row-size=8B cardinality=1.50M
====
# order does not become the leftmost input because of the cross join;
# the join with nation is done first because it reduces the intermediate output
select count(*)
from tpch.customer
cross join tpch.orders
join tpch.nation on (c_nationkey = n_nationkey)
where n_name = 'x'
---- PLAN
PLAN-ROOT SINK
|
05:AGGREGATE [FINALIZE]
| output: count(*)
| row-size=8B cardinality=1
|
04:NESTED LOOP JOIN [CROSS JOIN]
| row-size=23B cardinality=9.00G
|
|--01:SCAN HDFS [tpch.orders]
| partitions=1/1 files=1 size=162.56MB
| row-size=0B cardinality=1.50M
|
03:HASH JOIN [INNER JOIN]
| hash predicates: c_nationkey = n_nationkey
| runtime filters: RF000 <- n_nationkey
| row-size=23B cardinality=6.00K
|
|--02:SCAN HDFS [tpch.nation]
| partitions=1/1 files=1 size=2.15KB
| predicates: n_name = 'x'
| row-size=21B cardinality=1
|
00:SCAN HDFS [tpch.customer]
partitions=1/1 files=1 size=23.08MB
runtime filters: RF000 -> c_nationkey
row-size=2B cardinality=150.00K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
09:AGGREGATE [FINALIZE]
| output: count:merge(*)
| row-size=8B cardinality=1
|
08:EXCHANGE [UNPARTITIONED]
|
05:AGGREGATE
| output: count(*)
| row-size=8B cardinality=1
|
04:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
| row-size=23B cardinality=9.00G
|
|--07:EXCHANGE [BROADCAST]
| |
| 01:SCAN HDFS [tpch.orders]
| partitions=1/1 files=1 size=162.56MB
| row-size=0B cardinality=1.50M
|
03:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: c_nationkey = n_nationkey
| runtime filters: RF000 <- n_nationkey
| row-size=23B cardinality=6.00K
|
|--06:EXCHANGE [BROADCAST]
| |
| 02:SCAN HDFS [tpch.nation]
| partitions=1/1 files=1 size=2.15KB
| predicates: n_name = 'x'
| row-size=21B cardinality=1
|
00:SCAN HDFS [tpch.customer]
partitions=1/1 files=1 size=23.08MB
runtime filters: RF000 -> c_nationkey
row-size=2B cardinality=150.00K
====
# Do not consider 'c' a candidate for the leftmost table (IMPALA-1281),
# because doing so requires careful consideration of the joinOps of
# all table refs between 'a' and 'c'. Due to lhs/rhs flipping 'c'
# can still become the leftmost table though.
select c.int_col from functional.alltypestiny a
cross join functional.alltypestiny b
cross join functional.alltypes c
---- PLAN
PLAN-ROOT SINK
|
04:NESTED LOOP JOIN [CROSS JOIN]
| row-size=4B cardinality=467.20K
|
|--03:NESTED LOOP JOIN [CROSS JOIN]
| | row-size=0B cardinality=64
| |
| |--01:SCAN HDFS [functional.alltypestiny b]
| | partitions=4/4 files=4 size=460B
| | row-size=0B cardinality=8
| |
| 00:SCAN HDFS [functional.alltypestiny a]
| partitions=4/4 files=4 size=460B
| row-size=0B cardinality=8
|
02:SCAN HDFS [functional.alltypes c]
partitions=24/24 files=24 size=478.45KB
row-size=4B cardinality=7.30K
====
# Test that tables are not re-ordered across outer/semi joins (IMPALA-860),
# but the tables to the left/right of outer/semi joins are still re-ordered.
select count(*) from
functional.alltypestiny t1
inner join functional.alltypes t2 on (t1.id = t2.id)
inner join functional.alltypessmall t3 on (t2.id = t3.id)
right join functional.alltypesagg t4 on (t3.id = t4.id)
inner join functional.alltypes t5 on (t4.id = t5.id)
inner join functional.alltypestiny t6 on (t5.id = t6.id)
---- PLAN
PLAN-ROOT SINK
|
11:AGGREGATE [FINALIZE]
| output: count(*)
| row-size=8B cardinality=1
|
10:HASH JOIN [INNER JOIN]
| hash predicates: t5.id = t4.id
| runtime filters: RF000 <- t4.id
| row-size=24B cardinality=7
|
|--09:HASH JOIN [INNER JOIN]
| | hash predicates: t4.id = t6.id
| | runtime filters: RF002 <- t6.id
| | row-size=20B cardinality=9
| |
| |--05:SCAN HDFS [functional.alltypestiny t6]
| | partitions=4/4 files=4 size=460B
| | row-size=4B cardinality=8
| |
| 08:HASH JOIN [LEFT OUTER JOIN]
| | hash predicates: t4.id = t3.id
| | row-size=16B cardinality=11.00K
| |
| |--07:HASH JOIN [INNER JOIN]
| | | hash predicates: t3.id = t2.id
| | | runtime filters: RF004 <- t2.id
| | | row-size=12B cardinality=1
| | |
| | |--06:HASH JOIN [INNER JOIN]
| | | | hash predicates: t2.id = t1.id
| | | | runtime filters: RF006 <- t1.id
| | | | row-size=8B cardinality=8
| | | |
| | | |--00:SCAN HDFS [functional.alltypestiny t1]
| | | | partitions=4/4 files=4 size=460B
| | | | runtime filters: RF002 -> t1.id
| | | | row-size=4B cardinality=8
| | | |
| | | 01:SCAN HDFS [functional.alltypes t2]
| | | partitions=24/24 files=24 size=478.45KB
| | | runtime filters: RF002 -> t2.id, RF006 -> t2.id
| | | row-size=4B cardinality=7.30K
| | |
| | 02:SCAN HDFS [functional.alltypessmall t3]
| | partitions=4/4 files=4 size=6.32KB
| | runtime filters: RF002 -> t3.id, RF004 -> t3.id
| | row-size=4B cardinality=100
| |
| 03:SCAN HDFS [functional.alltypesagg t4]
| partitions=11/11 files=11 size=814.73KB
| runtime filters: RF002 -> t4.id
| row-size=4B cardinality=11.00K
|
04:SCAN HDFS [functional.alltypes t5]
partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> t5.id
row-size=4B cardinality=7.30K
====
# No tables can be re-ordered because of semi and outer joins that must
# remain at a fixed position in the plan (IMPALA-860).
select count(*) from
functional.alltypestiny t1
left join functional.alltypes t2 on (t1.id = t2.id)
inner join functional.alltypessmall t3 on (t2.id = t3.id)
left semi join functional.alltypesagg t4 on (t3.id = t4.id)
inner join functional.alltypes t5 on (t3.id = t5.id)
right join functional.alltypestiny t6 on (t5.id = t6.id)
---- PLAN
PLAN-ROOT SINK
|
13:AGGREGATE [FINALIZE]
| output: count(*)
| row-size=8B cardinality=1
|
12:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: t6.id = t5.id
| row-size=20B cardinality=8
|
|--11:HASH JOIN [INNER JOIN]
| | hash predicates: t5.id = t3.id
| | runtime filters: RF000 <- t3.id
| | row-size=16B cardinality=1
| |
| |--10:HASH JOIN [RIGHT SEMI JOIN]
| | | hash predicates: t4.id = t3.id
| | | runtime filters: RF002 <- t3.id
| | | row-size=12B cardinality=1
| | |
| | |--09:HASH JOIN [INNER JOIN]
| | | | hash predicates: t3.id = t2.id
| | | | runtime filters: RF004 <- t2.id
| | | | row-size=12B cardinality=1
| | | |
| | | |--08:HASH JOIN [RIGHT OUTER JOIN]
| | | | | hash predicates: t2.id = t1.id
| | | | | runtime filters: RF006 <- t1.id
| | | | | row-size=8B cardinality=8
| | | | |
| | | | |--00:SCAN HDFS [functional.alltypestiny t1]
| | | | | partitions=4/4 files=4 size=460B
| | | | | row-size=4B cardinality=8
| | | | |
| | | | 01:SCAN HDFS [functional.alltypes t2]
| | | | partitions=24/24 files=24 size=478.45KB
| | | | runtime filters: RF006 -> t2.id
| | | | row-size=4B cardinality=7.30K
| | | |
| | | 02:SCAN HDFS [functional.alltypessmall t3]
| | | partitions=4/4 files=4 size=6.32KB
| | | runtime filters: RF004 -> t3.id
| | | row-size=4B cardinality=100
| | |
| | 03:SCAN HDFS [functional.alltypesagg t4]
| | partitions=11/11 files=11 size=814.73KB
| | runtime filters: RF002 -> t4.id
| | row-size=4B cardinality=11.00K
| |
| 04:SCAN HDFS [functional.alltypes t5]
| partitions=24/24 files=24 size=478.45KB
| runtime filters: RF000 -> t5.id
| row-size=4B cardinality=7.30K
|
05:SCAN HDFS [functional.alltypestiny t6]
partitions=4/4 files=4 size=460B
row-size=4B cardinality=8
====
# Check that a join in between outer/semi joins is re-ordered correctly.
# We expect t4 is joined before t3.
select count(*) from
functional.alltypestiny t1
left join functional.alltypes t2 on (t1.id = t2.id)
inner join functional.alltypesagg t3 on (t2.id = t3.id)
inner join functional.alltypessmall t4 on (t3.id = t4.id)
left semi join functional.alltypes t5 on (t4.id = t5.id)
inner join functional.alltypestiny t6 on (t3.id = t6.id)
---- PLAN
PLAN-ROOT SINK
|
13:AGGREGATE [FINALIZE]
| output: count(*)
| row-size=8B cardinality=1
|
12:HASH JOIN [INNER JOIN]
| hash predicates: t6.id = t3.id
| runtime filters: RF000 <- t3.id
| row-size=20B cardinality=1
|
|--11:HASH JOIN [RIGHT SEMI JOIN]
| | hash predicates: t5.id = t4.id
| | runtime filters: RF002 <- t4.id
| | row-size=16B cardinality=1
| |
| |--10:HASH JOIN [INNER JOIN]
| | | hash predicates: t3.id = t2.id
| | | runtime filters: RF004 <- t2.id
| | | row-size=16B cardinality=1
| | |
| | |--09:HASH JOIN [INNER JOIN]
| | | | hash predicates: t4.id = t2.id
| | | | runtime filters: RF006 <- t2.id
| | | | row-size=12B cardinality=1
| | | |
| | | |--08:HASH JOIN [RIGHT OUTER JOIN]
| | | | | hash predicates: t2.id = t1.id
| | | | | runtime filters: RF008 <- t1.id
| | | | | row-size=8B cardinality=8
| | | | |
| | | | |--00:SCAN HDFS [functional.alltypestiny t1]
| | | | | partitions=4/4 files=4 size=460B
| | | | | row-size=4B cardinality=8
| | | | |
| | | | 01:SCAN HDFS [functional.alltypes t2]
| | | | partitions=24/24 files=24 size=478.45KB
| | | | runtime filters: RF008 -> t2.id
| | | | row-size=4B cardinality=7.30K
| | | |
| | | 03:SCAN HDFS [functional.alltypessmall t4]
| | | partitions=4/4 files=4 size=6.32KB
| | | runtime filters: RF006 -> t4.id
| | | row-size=4B cardinality=100
| | |
| | 02:SCAN HDFS [functional.alltypesagg t3]
| | partitions=11/11 files=11 size=814.73KB
| | runtime filters: RF004 -> t3.id
| | row-size=4B cardinality=11.00K
| |
| 04:SCAN HDFS [functional.alltypes t5]
| partitions=24/24 files=24 size=478.45KB
| runtime filters: RF002 -> t5.id
| row-size=4B cardinality=7.30K
|
05:SCAN HDFS [functional.alltypestiny t6]
partitions=4/4 files=4 size=460B
runtime filters: RF000 -> t6.id
row-size=4B cardinality=8
====
# Same above but using an anti join instead of a semi join.
select count(*) from
functional.alltypestiny t1
left join functional.alltypes t2 on (t1.id = t2.id)
inner join functional.alltypesagg t3 on (t2.id = t3.id)
inner join functional.alltypessmall t4 on (t3.id = t4.id)
left anti join functional.alltypes t5 on (t4.id = t5.id)
inner join functional.alltypestiny t6 on (t3.id = t6.id)
---- PLAN
PLAN-ROOT SINK
|
13:AGGREGATE [FINALIZE]
| output: count(*)
| row-size=8B cardinality=1
|
12:HASH JOIN [INNER JOIN]
| hash predicates: t6.id = t3.id
| runtime filters: RF000 <- t3.id
| row-size=20B cardinality=1
|
|--11:HASH JOIN [RIGHT ANTI JOIN]
| | hash predicates: t5.id = t4.id
| | row-size=16B cardinality=1
| |
| |--10:HASH JOIN [INNER JOIN]
| | | hash predicates: t3.id = t2.id
| | | runtime filters: RF002 <- t2.id
| | | row-size=16B cardinality=1
| | |
| | |--09:HASH JOIN [INNER JOIN]
| | | | hash predicates: t4.id = t2.id
| | | | runtime filters: RF004 <- t2.id
| | | | row-size=12B cardinality=1
| | | |
| | | |--08:HASH JOIN [RIGHT OUTER JOIN]
| | | | | hash predicates: t2.id = t1.id
| | | | | runtime filters: RF006 <- t1.id
| | | | | row-size=8B cardinality=8
| | | | |
| | | | |--00:SCAN HDFS [functional.alltypestiny t1]
| | | | | partitions=4/4 files=4 size=460B
| | | | | row-size=4B cardinality=8
| | | | |
| | | | 01:SCAN HDFS [functional.alltypes t2]
| | | | partitions=24/24 files=24 size=478.45KB
| | | | runtime filters: RF006 -> t2.id
| | | | row-size=4B cardinality=7.30K
| | | |
| | | 03:SCAN HDFS [functional.alltypessmall t4]
| | | partitions=4/4 files=4 size=6.32KB
| | | runtime filters: RF004 -> t4.id
| | | row-size=4B cardinality=100
| | |
| | 02:SCAN HDFS [functional.alltypesagg t3]
| | partitions=11/11 files=11 size=814.73KB
| | runtime filters: RF002 -> t3.id
| | row-size=4B cardinality=11.00K
| |
| 04:SCAN HDFS [functional.alltypes t5]
| partitions=24/24 files=24 size=478.45KB
| row-size=4B cardinality=7.30K
|
05:SCAN HDFS [functional.alltypestiny t6]
partitions=4/4 files=4 size=460B
runtime filters: RF000 -> t6.id
row-size=4B cardinality=8
====
# Test inverting outer joins in a complex query plan.
select 1 from
(select count(distinct a.id) as id from functional.alltypestiny a
inner join functional.alltypestiny b
on (a.id = b.id)) t1
inner join
(select a.* from functional.alltypes a
right outer join functional.alltypestiny b
on (a.id = b.id)
where a.year < 10) t2
on (t1.id = t2.id)
left outer join functional.alltypes t3
on (t2.id = t3.id)
inner join functional.alltypestiny t4
on (t3.id = t4.id)
---- PLAN
PLAN-ROOT SINK
|
12:HASH JOIN [INNER JOIN]
| hash predicates: t3.id = t4.id
| runtime filters: RF000 <- t4.id
| row-size=28B cardinality=1
|
|--09:SCAN HDFS [functional.alltypestiny t4]
| partitions=4/4 files=4 size=460B
| row-size=4B cardinality=8
|
11:HASH JOIN [RIGHT OUTER JOIN]
| hash predicates: t3.id = a.id
| runtime filters: RF002 <- a.id
| row-size=24B cardinality=8
|
|--10:HASH JOIN [INNER JOIN]
| | hash predicates: a.id = count(a.id)
| | runtime filters: RF004 <- count(a.id)
| | row-size=20B cardinality=8
| |
| |--04:AGGREGATE [FINALIZE]
| | | output: count(a.id)
| | | row-size=8B cardinality=1
| | |
| | 03:AGGREGATE
| | | group by: a.id
| | | row-size=4B cardinality=8
| | |
| | 02:HASH JOIN [INNER JOIN]
| | | hash predicates: a.id = b.id
| | | runtime filters: RF006 <- b.id
| | | row-size=8B cardinality=8
| | |
| | |--01:SCAN HDFS [functional.alltypestiny b]
| | | partitions=4/4 files=4 size=460B
| | | row-size=4B cardinality=8
| | |
| | 00:SCAN HDFS [functional.alltypestiny a]
| | partitions=4/4 files=4 size=460B
| | runtime filters: RF006 -> a.id
| | row-size=4B cardinality=8
| |
| 07:HASH JOIN [LEFT OUTER JOIN]
| | hash predicates: b.id = a.id
| | other predicates: a.`year` < 10
| | row-size=12B cardinality=8
| |
| |--05:SCAN HDFS [functional.alltypes a]
| | partition predicates: a.`year` < 10
| | partitions=0/24 files=0 size=0B
| | runtime filters: RF004 -> a.id
| | row-size=8B cardinality=0
| |
| 06:SCAN HDFS [functional.alltypestiny b]
| partitions=4/4 files=4 size=460B
| row-size=4B cardinality=8
|
08:SCAN HDFS [functional.alltypes t3]
partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> t3.id, RF002 -> t3.id
row-size=4B cardinality=7.30K
====
# Same as above but with full outer joins.
select 1 from
(select count(distinct a.id) as id from functional.alltypestiny a
inner join functional.alltypestiny b
on (a.id = b.id)) t1
inner join
(select a.* from functional.alltypes a
full outer join functional.alltypestiny b
on (a.id = b.id)
where a.year < 10) t2
on (t1.id = t2.id)
full outer join functional.alltypes t3
on (t2.id = t3.id)
inner join functional.alltypestiny t4
on (t3.id = t4.id)
---- PLAN
PLAN-ROOT SINK
|
12:HASH JOIN [INNER JOIN]
| hash predicates: t3.id = t4.id
| runtime filters: RF000 <- t4.id
| row-size=28B cardinality=9
|
|--09:SCAN HDFS [functional.alltypestiny t4]
| partitions=4/4 files=4 size=460B
| row-size=4B cardinality=8
|
11:HASH JOIN [FULL OUTER JOIN]
| hash predicates: t3.id = a.id
| row-size=24B cardinality=7.31K
|
|--10:HASH JOIN [INNER JOIN]
| | hash predicates: a.id = count(a.id)
| | runtime filters: RF002 <- count(a.id)
| | row-size=20B cardinality=8
| |
| |--04:AGGREGATE [FINALIZE]
| | | output: count(a.id)
| | | row-size=8B cardinality=1
| | |
| | 03:AGGREGATE
| | | group by: a.id
| | | row-size=4B cardinality=8
| | |
| | 02:HASH JOIN [INNER JOIN]
| | | hash predicates: a.id = b.id
| | | runtime filters: RF004 <- b.id
| | | row-size=8B cardinality=8
| | |
| | |--01:SCAN HDFS [functional.alltypestiny b]
| | | partitions=4/4 files=4 size=460B
| | | row-size=4B cardinality=8
| | |
| | 00:SCAN HDFS [functional.alltypestiny a]
| | partitions=4/4 files=4 size=460B
| | runtime filters: RF004 -> a.id
| | row-size=4B cardinality=8
| |
| 07:HASH JOIN [FULL OUTER JOIN]
| | hash predicates: b.id = a.id
| | other predicates: a.`year` < 10
| | row-size=12B cardinality=8
| |
| |--05:SCAN HDFS [functional.alltypes a]
| | partition predicates: a.`year` < 10
| | partitions=0/24 files=0 size=0B
| | runtime filters: RF002 -> a.id
| | row-size=8B cardinality=0
| |
| 06:SCAN HDFS [functional.alltypestiny b]
| partitions=4/4 files=4 size=460B
| row-size=4B cardinality=8
|
08:SCAN HDFS [functional.alltypes t3]
partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> t3.id
row-size=4B cardinality=7.30K
====
# Test inverting semi joins in a complex query plan.
select 1 from
(select count(distinct a.id) as id from functional.alltypestiny a
inner join functional.alltypestiny b
on (a.id = b.id)) t1
inner join
(select b.* from functional.alltypestiny a
right semi join functional.alltypes b
on (a.id = b.id)) t2
on (t1.id = t2.id)
left semi join functional.alltypes t3
on (t2.id = t3.id)
inner join functional.alltypestiny t4
on (t2.id = t4.id)
where t2.month = 1
---- PLAN
PLAN-ROOT SINK
|
12:HASH JOIN [INNER JOIN]
| hash predicates: b.id = t4.id
| runtime filters: RF000 <- t4.id
| row-size=16B cardinality=1
|
|--09:SCAN HDFS [functional.alltypestiny t4]
| partitions=4/4 files=4 size=460B
| row-size=4B cardinality=8
|
11:HASH JOIN [RIGHT SEMI JOIN]
| hash predicates: t3.id = b.id
| runtime filters: RF002 <- b.id
| row-size=12B cardinality=8
|
|--10:HASH JOIN [INNER JOIN]
| | hash predicates: b.id = count(a.id)
| | runtime filters: RF004 <- count(a.id)
| | row-size=12B cardinality=8
| |
| |--04:AGGREGATE [FINALIZE]
| | | output: count(a.id)
| | | row-size=8B cardinality=1
| | |
| | 03:AGGREGATE
| | | group by: a.id
| | | row-size=4B cardinality=8
| | |
| | 02:HASH JOIN [INNER JOIN]
| | | hash predicates: a.id = b.id
| | | runtime filters: RF008 <- b.id
| | | row-size=8B cardinality=8
| | |
| | |--01:SCAN HDFS [functional.alltypestiny b]
| | | partitions=4/4 files=4 size=460B
| | | row-size=4B cardinality=8
| | |
| | 00:SCAN HDFS [functional.alltypestiny a]
| | partitions=4/4 files=4 size=460B
| | runtime filters: RF008 -> a.id
| | row-size=4B cardinality=8
| |
| 07:HASH JOIN [LEFT SEMI JOIN]
| | hash predicates: b.id = a.id
| | runtime filters: RF006 <- a.id
| | row-size=4B cardinality=8
| |
| |--05:SCAN HDFS [functional.alltypestiny a]
| | partitions=4/4 files=4 size=460B
| | runtime filters: RF000 -> a.id, RF004 -> a.id
| | row-size=4B cardinality=8
| |
| 06:SCAN HDFS [functional.alltypes b]
| partition predicates: b.month = 1
| partitions=2/24 files=2 size=40.32KB
| runtime filters: RF000 -> b.id, RF004 -> b.id, RF006 -> b.id
| row-size=4B cardinality=620
|
08:SCAN HDFS [functional.alltypes t3]
partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> t3.id, RF002 -> t3.id
row-size=4B cardinality=7.30K
====
# Same as above but with anti joins.
select 1 from
(select count(distinct a.id) as id from functional.alltypestiny a
inner join functional.alltypestiny b
on (a.id = b.id)) t1
inner join
(select b.* from functional.alltypestiny a
right anti join functional.alltypes b
on (a.id = b.id)) t2
on (t1.id = t2.id)
left anti join functional.alltypes t3
on (t2.id = t3.id)
inner join functional.alltypestiny t4
on (t2.id = t4.id)
where t2.month = 1
---- PLAN
PLAN-ROOT SINK
|
12:HASH JOIN [INNER JOIN]
| hash predicates: b.id = t4.id
| runtime filters: RF000 <- t4.id
| row-size=16B cardinality=1
|
|--09:SCAN HDFS [functional.alltypestiny t4]
| partitions=4/4 files=4 size=460B
| row-size=4B cardinality=8
|
11:HASH JOIN [RIGHT ANTI JOIN]
| hash predicates: t3.id = b.id
| row-size=12B cardinality=620
|
|--10:HASH JOIN [INNER JOIN]
| | hash predicates: b.id = count(a.id)
| | runtime filters: RF002 <- count(a.id)
| | row-size=12B cardinality=620
| |
| |--04:AGGREGATE [FINALIZE]
| | | output: count(a.id)
| | | row-size=8B cardinality=1
| | |
| | 03:AGGREGATE
| | | group by: a.id
| | | row-size=4B cardinality=8
| | |
| | 02:HASH JOIN [INNER JOIN]
| | | hash predicates: a.id = b.id
| | | runtime filters: RF004 <- b.id
| | | row-size=8B cardinality=8
| | |
| | |--01:SCAN HDFS [functional.alltypestiny b]
| | | partitions=4/4 files=4 size=460B
| | | row-size=4B cardinality=8
| | |
| | 00:SCAN HDFS [functional.alltypestiny a]
| | partitions=4/4 files=4 size=460B
| | runtime filters: RF004 -> a.id
| | row-size=4B cardinality=8
| |
| 07:HASH JOIN [LEFT ANTI JOIN]
| | hash predicates: b.id = a.id
| | row-size=4B cardinality=620
| |
| |--05:SCAN HDFS [functional.alltypestiny a]
| | partitions=4/4 files=4 size=460B
| | runtime filters: RF000 -> a.id, RF002 -> a.id
| | row-size=4B cardinality=8
| |
| 06:SCAN HDFS [functional.alltypes b]
| partition predicates: b.month = 1
| partitions=2/24 files=2 size=40.32KB
| runtime filters: RF000 -> b.id, RF002 -> b.id
| row-size=4B cardinality=620
|
08:SCAN HDFS [functional.alltypes t3]
partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> t3.id
row-size=4B cardinality=7.30K
====
# Regression test for IMPALA-1343.
SELECT sum(t4.tinyint_col)
FROM functional.alltypestiny t4
CROSS JOIN
(SELECT t2.smallint_col smallint_col_1
FROM functional.alltypes t1
INNER JOIN functional.alltypestiny t2
ON t2.smallint_col = t1.bigint_col LIMIT 1) `$a$1`
CROSS JOIN
(SELECT sum(t1.int_col) `$c$1`
FROM functional.alltypesagg t1 LIMIT 1) `$a$2`
LEFT SEMI JOIN
(SELECT tt1.int_col `$c$1`
FROM functional.alltypestiny tt1) `$a$3`
ON t4.bigint_col = `$a$3`.`$c$1`
WHERE `$a$2`.`$c$1` > t4.id
---- PLAN
PLAN-ROOT SINK
|
10:AGGREGATE [FINALIZE]
| output: sum(t4.tinyint_col)
| row-size=8B cardinality=1
|
09:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: t4.bigint_col = tt1.int_col
| runtime filters: RF000 <- tt1.int_col
| row-size=31B cardinality=8
|
|--06:SCAN HDFS [functional.alltypestiny tt1]
| partitions=4/4 files=4 size=460B
| row-size=4B cardinality=8
|
08:NESTED LOOP JOIN [INNER JOIN]
| predicates: sum(t1.int_col) > t4.id
| row-size=31B cardinality=8
|
|--05:AGGREGATE [FINALIZE]
| | output: sum(t1.int_col)
| | limit: 1
| | row-size=8B cardinality=1
| |
| 04:SCAN HDFS [functional.alltypesagg t1]
| partitions=11/11 files=11 size=814.73KB
| row-size=4B cardinality=11.00K
|
07:NESTED LOOP JOIN [CROSS JOIN]
| row-size=23B cardinality=8
|
|--03:HASH JOIN [INNER JOIN]
| | hash predicates: t1.bigint_col = t2.smallint_col
| | runtime filters: RF002 <- t2.smallint_col
| | limit: 1
| | row-size=10B cardinality=1
| |
| |--02:SCAN HDFS [functional.alltypestiny t2]
| | partitions=4/4 files=4 size=460B
| | row-size=2B cardinality=8
| |
| 01:SCAN HDFS [functional.alltypes t1]
| partitions=24/24 files=24 size=478.45KB
| runtime filters: RF002 -> t1.bigint_col
| row-size=8B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypestiny t4]
partitions=4/4 files=4 size=460B
runtime filters: RF000 -> t4.bigint_col
row-size=13B cardinality=8
====
# Tests assignment of conjuncts to inverted outer joins (IMPALA-1342).
select 1
from functional.alltypestiny a
left outer join functional.alltypessmall b
on b.id = a.id
# both predicates should appear in the 'other predicates'
where a.int_col = b.int_col and b.bigint_col < a.tinyint_col
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [RIGHT OUTER JOIN]
| hash predicates: b.id = a.id
| other predicates: a.int_col = b.int_col, b.bigint_col < a.tinyint_col
| runtime filters: RF000 <- a.id, RF001 <- a.int_col
| row-size=25B cardinality=8
|
|--00:SCAN HDFS [functional.alltypestiny a]
| partitions=4/4 files=4 size=460B
| row-size=9B cardinality=8
|
01:SCAN HDFS [functional.alltypessmall b]
partitions=4/4 files=4 size=6.32KB
runtime filters: RF000 -> b.id, RF001 -> b.int_col
row-size=16B cardinality=100
====
# Tests assignment of conjuncts to inverted outer joins (IMPALA-1342).
select 1
from functional.alltypestiny a
left outer join functional.alltypessmall b
on b.id = a.id
right outer join functional.alltypes c
on b.id = c.id
# all predicates should appear in the 'other predicates'
where a.int_col = b.int_col and b.bigint_col < a.tinyint_col
and b.tinyint_col = c.tinyint_col and b.bool_col != c.bool_col
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: c.id = b.id
| other predicates: a.int_col = b.int_col, b.bool_col != c.bool_col, b.tinyint_col = c.tinyint_col, b.bigint_col < a.tinyint_col
| runtime filters: RF000 <- b.tinyint_col
| row-size=33B cardinality=7.30K
|
|--03:HASH JOIN [RIGHT OUTER JOIN]
| | hash predicates: b.id = a.id
| | runtime filters: RF002 <- a.id
| | row-size=27B cardinality=8
| |
| |--00:SCAN HDFS [functional.alltypestiny a]
| | partitions=4/4 files=4 size=460B
| | row-size=9B cardinality=8
| |
| 01:SCAN HDFS [functional.alltypessmall b]
| partitions=4/4 files=4 size=6.32KB
| runtime filters: RF002 -> b.id
| row-size=18B cardinality=100
|
02:SCAN HDFS [functional.alltypes c]
partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> c.tinyint_col
row-size=6B cardinality=7.30K
====
# Tests assignment of conjuncts to inverted outer joins (IMPALA-1342).
select 1
from functional.alltypestiny a
inner join functional.alltypessmall b
on b.id = a.id
right outer join functional.alltypes c
on b.id = c.id
# all predicates should appear in the 'other predicates'
where b.tinyint_col = c.tinyint_col and b.bool_col != c.bool_col
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: c.id = b.id
| other predicates: b.bool_col != c.bool_col, b.tinyint_col = c.tinyint_col
| runtime filters: RF000 <- b.tinyint_col
| row-size=16B cardinality=7.30K
|
|--03:HASH JOIN [INNER JOIN]
| | hash predicates: b.id = a.id
| | runtime filters: RF002 <- a.id
| | row-size=10B cardinality=9
| |
| |--00:SCAN HDFS [functional.alltypestiny a]
| | partitions=4/4 files=4 size=460B
| | row-size=4B cardinality=8
| |
| 01:SCAN HDFS [functional.alltypessmall b]
| partitions=4/4 files=4 size=6.32KB
| runtime filters: RF002 -> b.id
| row-size=6B cardinality=100
|
02:SCAN HDFS [functional.alltypes c]
partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> c.tinyint_col
row-size=6B cardinality=7.30K
====
# Regression test for IMPALA-1342.
select count(1) from
(select t2.string_col AS string_col_1, t1.string_col as string_col_2
from functional.alltypesagg t1
left outer join functional.alltypes t2
on (t2.date_string_col = t1.string_col)
where t2.date_string_col = t1.string_col) t1
left outer join functional.alltypestiny t3
on (t3.string_col = t1.string_col_1 and t3.date_string_col = t1.string_col_2)
---- PLAN
PLAN-ROOT SINK
|
05:AGGREGATE [FINALIZE]
| output: count(*)
| row-size=8B cardinality=1
|
04:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: t2.string_col = t3.string_col, t1.string_col = t3.date_string_col
| row-size=81B cardinality=83.39K
|
|--03:SCAN HDFS [functional.alltypestiny t3]
| partitions=4/4 files=4 size=460B
| row-size=33B cardinality=8
|
02:HASH JOIN [RIGHT OUTER JOIN]
| hash predicates: t2.date_string_col = t1.string_col
| other predicates: t2.date_string_col = t1.string_col
| runtime filters: RF000 <- t1.string_col, RF001 <- t1.string_col
| row-size=48B cardinality=83.39K
|
|--00:SCAN HDFS [functional.alltypesagg t1]
| partitions=11/11 files=11 size=814.73KB
| row-size=15B cardinality=11.00K
|
01:SCAN HDFS [functional.alltypes t2]
partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> t2.date_string_col, RF001 -> t2.date_string_col
row-size=33B cardinality=7.30K
====
# Test that filtering with "<=>" sets selectivity, just as "=" does. First, the
# base case: functional.alltypes.timestamp_col has more distinct vals than
# functional.alltypes.date_string_col. As a result, in a left semi join between
# functional.alltypes and itself, if one side of the join is filtered on timestamp_col and
# the other on date_string_col, the one filtered on timestamp_col is expected by the
# planner to be smaller and becomes the build side of the hash join.
select * from functional.alltypes a
left semi join
(select * from functional.alltypes
where timestamp_col = cast("2016-11-20" as timestamp)) b
on (a.id = b.id)
and a.date_string_col = ''
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: a.id = functional.alltypes.id
| runtime filters: RF000 <- functional.alltypes.id
| row-size=89B cardinality=1
|
|--01:SCAN HDFS [functional.alltypes]
| partitions=24/24 files=24 size=478.45KB
| predicates: timestamp_col = TIMESTAMP '2016-11-20 00:00:00'
| row-size=20B cardinality=1
|
00:SCAN HDFS [functional.alltypes a]
partitions=24/24 files=24 size=478.45KB
predicates: a.date_string_col = ''
runtime filters: RF000 -> a.id
row-size=89B cardinality=10
====
select * from functional.alltypes a
left semi join
(select * from functional.alltypes
where date_string_col = '') b
on (a.id = b.id)
and a.timestamp_col = cast("2016-11-20" as timestamp)
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [RIGHT SEMI JOIN]
| hash predicates: functional.alltypes.id = a.id
| runtime filters: RF000 <- a.id
| row-size=89B cardinality=1
|
|--00:SCAN HDFS [functional.alltypes a]
| partitions=24/24 files=24 size=478.45KB
| predicates: a.timestamp_col = TIMESTAMP '2016-11-20 00:00:00'
| row-size=89B cardinality=1
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
predicates: date_string_col = ''
runtime filters: RF000 -> functional.alltypes.id
row-size=24B cardinality=10
====
# The same should hold true when the filtering is done with "<=>" rather than "=".
select * from functional.alltypes a
left semi join
(select * from functional.alltypes
where timestamp_col <=> cast("2016-11-20" as timestamp)) b
on (a.id = b.id)
and a.date_string_col <=> ''
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: a.id = functional.alltypes.id
| runtime filters: RF000 <- functional.alltypes.id
| row-size=89B cardinality=1
|
|--01:SCAN HDFS [functional.alltypes]
| partitions=24/24 files=24 size=478.45KB
| predicates: timestamp_col IS NOT DISTINCT FROM TIMESTAMP '2016-11-20 00:00:00'
| row-size=20B cardinality=1
|
00:SCAN HDFS [functional.alltypes a]
partitions=24/24 files=24 size=478.45KB
predicates: a.date_string_col IS NOT DISTINCT FROM ''
runtime filters: RF000 -> a.id
row-size=89B cardinality=10
====
select * from functional.alltypes a
left semi join
(select * from functional.alltypes
where date_string_col <=> '') b
on (a.id = b.id)
and a.timestamp_col <=> cast("2016-11-20" as timestamp)
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [RIGHT SEMI JOIN]
| hash predicates: functional.alltypes.id = a.id
| runtime filters: RF000 <- a.id
| row-size=89B cardinality=1
|
|--00:SCAN HDFS [functional.alltypes a]
| partitions=24/24 files=24 size=478.45KB
| predicates: a.timestamp_col IS NOT DISTINCT FROM TIMESTAMP '2016-11-20 00:00:00'
| row-size=89B cardinality=1
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
predicates: date_string_col IS NOT DISTINCT FROM ''
runtime filters: RF000 -> functional.alltypes.id
row-size=24B cardinality=10
====