blob: 8e83a774e6b736f58a17483f49913d76feb56af5 [file] [log] [blame]
# IN predicate
select *
from functional.alltypes
where id in
(select id from functional.alltypesagg)
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: id = id
| runtime filters: RF000 <- id
| row-size=89B cardinality=7.30K
|
|--01:SCAN HDFS [functional.alltypesagg]
| HDFS partitions=11/11 files=11 size=814.73KB
| row-size=4B cardinality=11.00K
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> id
row-size=89B cardinality=7.30K
====
# NOT IN predicate rewritten into a null-aware anti join
select *
from functional.alltypes
where id not in
(select id from functional.alltypesagg)
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [NULL AWARE LEFT ANTI JOIN]
| hash predicates: id = id
| row-size=89B cardinality=7.30K
|
|--01:SCAN HDFS [functional.alltypesagg]
| HDFS partitions=11/11 files=11 size=814.73KB
| row-size=4B cardinality=11.00K
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
row-size=89B cardinality=7.30K
====
# Correlated NOT IN rewritten into a null-aware anti join
select *
from functional.alltypes a
where a.int_col not in
(select int_col
from functional.alltypesagg g
where g.id = a.id and g.bigint_col < a.bigint_col)
and a.int_col < 100
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [NULL AWARE LEFT ANTI JOIN]
| hash predicates: a.int_col = int_col
| other join predicates: a.id = g.id, g.bigint_col < a.bigint_col
| row-size=89B cardinality=730
|
|--01:SCAN HDFS [functional.alltypesagg g]
| HDFS partitions=11/11 files=11 size=814.73KB
| row-size=16B cardinality=11.00K
|
00:SCAN HDFS [functional.alltypes a]
HDFS partitions=24/24 files=24 size=478.45KB
predicates: a.int_col < 100
row-size=89B cardinality=730
====
# Correlated NOT IN subquery resulting in the same eq conjunct
# being used in both the hash and the other join predicate
select *
from functional.alltypes a
where a.id not in (select id from functional.alltypes b where a.id = b.id)
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [NULL AWARE LEFT ANTI JOIN]
| hash predicates: a.id = id
| other join predicates: a.id = b.id
| row-size=89B cardinality=7.30K
|
|--01:SCAN HDFS [functional.alltypes b]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=4B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypes a]
HDFS partitions=24/24 files=24 size=478.45KB
row-size=89B cardinality=7.30K
====
# Subquery with predicate in the WHERE clause
select count(*)
from functional.alltypes a
where int_col in
(select int_col from functional.alltypesagg g where a.id = g.id and g.bigint_col < 10)
and bool_col = false
---- PLAN
PLAN-ROOT SINK
|
03:AGGREGATE [FINALIZE]
| output: count(*)
| row-size=8B cardinality=1
|
02:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: a.id = g.id, int_col = int_col
| runtime filters: RF000 <- g.id, RF001 <- int_col
| row-size=9B cardinality=1.10K
|
|--01:SCAN HDFS [functional.alltypesagg g]
| HDFS partitions=11/11 files=11 size=814.73KB
| predicates: g.bigint_col < 10
| row-size=16B cardinality=1.10K
|
00:SCAN HDFS [functional.alltypes a]
HDFS partitions=24/24 files=24 size=478.45KB
predicates: bool_col = FALSE
runtime filters: RF000 -> a.id, RF001 -> int_col
row-size=9B cardinality=3.65K
====
# IMPALA-4325: Preserve parenthesis of expressions when rewriting subqueries
select *
from functional.alltypes t1
where t1.int_col in
(select t2.int_col
from functional.alltypes t2
where (t2.int_col is not null and (t2.int_col < 0 or t2.int_col > 10)
or t2.bigint_col is not null and (t2.bigint_col < 0 or t2.bigint_col > 10)))
---- PLAN
PLAN-ROOT SINK
|
03:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: t1.int_col = t2.int_col
| runtime filters: RF000 <- t2.int_col
| row-size=89B cardinality=7.30K
|
|--02:AGGREGATE [FINALIZE]
| | group by: t2.int_col
| | row-size=4B cardinality=10
| |
| 01:SCAN HDFS [functional.alltypes t2]
| HDFS partitions=24/24 files=24 size=478.45KB
| predicates: (t2.int_col IS NOT NULL AND (t2.int_col < 0 OR t2.int_col > 10) OR t2.bigint_col IS NOT NULL AND (t2.bigint_col < 0 OR t2.bigint_col > 10))
| row-size=12B cardinality=730
|
00:SCAN HDFS [functional.alltypes t1]
HDFS partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> t1.int_col
row-size=89B cardinality=7.30K
====
# Complex expression in the IN predicate
select *
from functional.alltypes t
where t.int_col + 1 in
(select int_col + bigint_col from functional.alltypesagg)
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: t.int_col + 1 = int_col + bigint_col
| runtime filters: RF000 <- int_col + bigint_col
| row-size=89B cardinality=7.30K
|
|--01:SCAN HDFS [functional.alltypesagg]
| HDFS partitions=11/11 files=11 size=814.73KB
| row-size=12B cardinality=11.00K
|
00:SCAN HDFS [functional.alltypes t]
HDFS partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> t.int_col + 1
row-size=89B cardinality=7.30K
====
# Multiple subqueries in the WHERE clause
select *
from functional.alltypes t
where t.id in
(select id from functional.alltypesagg where bool_col = false)
and t.tinyint_col not in (select tinyint_col from functional.alltypestiny)
and t.bigint_col < 1000
---- PLAN
PLAN-ROOT SINK
|
05:HASH JOIN [NULL AWARE LEFT ANTI JOIN]
| hash predicates: t.tinyint_col = functional.alltypestiny.tinyint_col
| row-size=89B cardinality=730
|
|--04:AGGREGATE [FINALIZE]
| | group by: functional.alltypestiny.tinyint_col
| | row-size=1B cardinality=2
| |
| 02:SCAN HDFS [functional.alltypestiny]
| HDFS partitions=4/4 files=4 size=460B
| row-size=1B cardinality=8
|
03:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: t.id = id
| runtime filters: RF000 <- id
| row-size=89B cardinality=730
|
|--01:SCAN HDFS [functional.alltypesagg]
| HDFS partitions=11/11 files=11 size=814.73KB
| predicates: bool_col = FALSE
| row-size=5B cardinality=5.50K
|
00:SCAN HDFS [functional.alltypes t]
HDFS partitions=24/24 files=24 size=478.45KB
predicates: t.bigint_col < 1000
runtime filters: RF000 -> t.id
row-size=89B cardinality=730
====
# Multiple tables in the FROM clause of the outer query block
select count(*)
from functional.alltypesagg a, functional.alltypes t
where a.id = t.id and a.int_col in
(select int_col from functional.alltypestiny where bool_col = false)
---- PLAN
PLAN-ROOT SINK
|
05:AGGREGATE [FINALIZE]
| output: count(*)
| row-size=8B cardinality=1
|
04:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: a.int_col = int_col
| runtime filters: RF000 <- int_col
| row-size=12B cardinality=16
|
|--02:SCAN HDFS [functional.alltypestiny]
| HDFS partitions=4/4 files=4 size=460B
| predicates: bool_col = FALSE
| row-size=5B cardinality=4
|
03:HASH JOIN [INNER JOIN]
| hash predicates: a.id = t.id
| runtime filters: RF002 <- t.id
| row-size=12B cardinality=7.81K
|
|--01:SCAN HDFS [functional.alltypes t]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=4B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypesagg a]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> a.int_col, RF002 -> a.id
row-size=8B cardinality=11.00K
====
# Multiple tables in the subquery
select count(*)
from functional.alltypesagg a
where a.id in
(select s.id
from functional.alltypessmall s, functional.alltypestiny t
where s.int_col = t.int_col and a.bool_col = s.bool_col)
and a.int_col < 10
---- PLAN
PLAN-ROOT SINK
|
05:AGGREGATE [FINALIZE]
| output: count(*)
| row-size=8B cardinality=1
|
04:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: a.bool_col = s.bool_col, a.id = s.id
| runtime filters: RF000 <- s.bool_col, RF001 <- s.id
| row-size=9B cardinality=80
|
|--03:HASH JOIN [INNER JOIN]
| | hash predicates: s.int_col = t.int_col
| | runtime filters: RF004 <- t.int_col
| | row-size=13B cardinality=80
| |
| |--02:SCAN HDFS [functional.alltypestiny t]
| | HDFS partitions=4/4 files=4 size=460B
| | row-size=4B cardinality=8
| |
| 01:SCAN HDFS [functional.alltypessmall s]
| HDFS partitions=4/4 files=4 size=6.32KB
| runtime filters: RF004 -> s.int_col
| row-size=9B cardinality=100
|
00:SCAN HDFS [functional.alltypesagg a]
HDFS partitions=11/11 files=11 size=814.73KB
predicates: a.int_col < 10
runtime filters: RF000 -> a.bool_col, RF001 -> a.id
row-size=9B cardinality=1.10K
====
# Outer join between the tables in the outer query block
select count(*)
from functional.alltypesagg a left outer join functional.alltypes t
on a.int_col = t.int_col
where a.id in
(select id from functional.alltypestiny)
and t.bool_col = false
---- PLAN
PLAN-ROOT SINK
|
05:AGGREGATE [FINALIZE]
| output: count(*)
| row-size=8B cardinality=1
|
04:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: a.id = id
| runtime filters: RF000 <- id
| row-size=13B cardinality=33
|
|--02:SCAN HDFS [functional.alltypestiny]
| HDFS partitions=4/4 files=4 size=460B
| row-size=4B cardinality=8
|
03:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: a.int_col = t.int_col
| other predicates: t.bool_col = FALSE
| row-size=13B cardinality=41.95K
|
|--01:SCAN HDFS [functional.alltypes t]
| HDFS partitions=24/24 files=24 size=478.45KB
| predicates: t.bool_col = FALSE
| row-size=5B cardinality=3.65K
|
00:SCAN HDFS [functional.alltypesagg a]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> a.id
row-size=8B cardinality=11.00K
====
# Subquery in the outer-joined table
select count(*)
from functional.alltypes a left outer join
(select * from functional.alltypesagg g where id in
(select id from functional.alltypestiny)) t
on a.int_col = t.int_col
where a.bool_col = false and t.bigint_col < 100
---- PLAN
PLAN-ROOT SINK
|
05:AGGREGATE [FINALIZE]
| output: count(*)
| row-size=8B cardinality=1
|
04:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: a.int_col = g.int_col
| other predicates: g.bigint_col < 100
| row-size=21B cardinality=5.84K
|
|--03:HASH JOIN [LEFT SEMI JOIN]
| | hash predicates: id = id
| | runtime filters: RF000 <- id
| | row-size=16B cardinality=8
| |
| |--02:SCAN HDFS [functional.alltypestiny]
| | HDFS partitions=4/4 files=4 size=460B
| | row-size=4B cardinality=8
| |
| 01:SCAN HDFS [functional.alltypesagg g]
| HDFS partitions=11/11 files=11 size=814.73KB
| predicates: g.bigint_col < 100
| runtime filters: RF000 -> id
| row-size=16B cardinality=1.10K
|
00:SCAN HDFS [functional.alltypes a]
HDFS partitions=24/24 files=24 size=478.45KB
predicates: a.bool_col = FALSE
row-size=5B cardinality=3.65K
====
# Multiple tables in the FROM clause of the subquery
select count(distinct id)
from functional.alltypesagg a
where a.int_col in
(select t.int_col
from functional.alltypes t, functional.alltypessmall s, functional.alltypestiny n
where t.id = s.id and s.bigint_col = n.bigint_col and n.bool_col = false)
---- PLAN
PLAN-ROOT SINK
|
09:AGGREGATE [FINALIZE]
| output: count(id)
| row-size=8B cardinality=1
|
08:AGGREGATE
| group by: id
| row-size=4B cardinality=115
|
07:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: a.int_col = t.int_col
| runtime filters: RF000 <- t.int_col
| row-size=8B cardinality=115
|
|--06:AGGREGATE [FINALIZE]
| | group by: t.int_col
| | row-size=4B cardinality=10
| |
| 05:HASH JOIN [INNER JOIN]
| | hash predicates: s.bigint_col = n.bigint_col
| | runtime filters: RF002 <- n.bigint_col
| | row-size=29B cardinality=40
| |
| |--03:SCAN HDFS [functional.alltypestiny n]
| | HDFS partitions=4/4 files=4 size=460B
| | predicates: n.bool_col = FALSE
| | row-size=9B cardinality=4
| |
| 04:HASH JOIN [INNER JOIN]
| | hash predicates: t.id = s.id
| | runtime filters: RF004 <- s.id
| | row-size=20B cardinality=99
| |
| |--02:SCAN HDFS [functional.alltypessmall s]
| | HDFS partitions=4/4 files=4 size=6.32KB
| | runtime filters: RF002 -> s.bigint_col
| | row-size=12B cardinality=100
| |
| 01:SCAN HDFS [functional.alltypes t]
| HDFS partitions=24/24 files=24 size=478.45KB
| runtime filters: RF004 -> t.id
| row-size=8B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypesagg a]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> a.int_col
row-size=8B cardinality=11.00K
====
# Subqueries with inline views
select *
from functional.alltypes t
where t.id in
(select a.id from functional.alltypestiny a,
(select id, count(*) as cnt from functional.alltypessmall group by id) s
where s.id = a.id and s.cnt = 10)
---- PLAN
PLAN-ROOT SINK
|
05:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: t.id = a.id
| runtime filters: RF000 <- a.id
| row-size=89B cardinality=8
|
|--04:HASH JOIN [INNER JOIN]
| | hash predicates: id = a.id
| | runtime filters: RF002 <- a.id
| | row-size=16B cardinality=8
| |
| |--01:SCAN HDFS [functional.alltypestiny a]
| | HDFS partitions=4/4 files=4 size=460B
| | row-size=4B cardinality=8
| |
| 03:AGGREGATE [FINALIZE]
| | output: count(*)
| | group by: id
| | having: count(*) = 10
| | row-size=12B cardinality=99
| |
| 02:SCAN HDFS [functional.alltypessmall]
| HDFS partitions=4/4 files=4 size=6.32KB
| runtime filters: RF002 -> functional.alltypessmall.id
| row-size=4B cardinality=100
|
00:SCAN HDFS [functional.alltypes t]
HDFS partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> t.id
row-size=89B cardinality=7.30K
====
with t as (select a.* from functional.alltypes a where id in
(select id from functional.alltypestiny))
select * from t where t.bool_col = false and t.int_col = 10
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: id = id
| runtime filters: RF000 <- id
| row-size=89B cardinality=8
|
|--01:SCAN HDFS [functional.alltypestiny]
| HDFS partitions=4/4 files=4 size=460B
| row-size=4B cardinality=8
|
00:SCAN HDFS [functional.alltypes a]
HDFS partitions=24/24 files=24 size=478.45KB
predicates: a.int_col = 10, a.bool_col = FALSE
runtime filters: RF000 -> id
row-size=89B cardinality=516
====
# Subqueries in WITH, FROM and WHERE clauses
with t as (select a.* from functional.alltypes a
where id in (select id from functional.alltypestiny))
select t.*
from t, (select * from functional.alltypesagg g where g.id in
(select id from functional.alltypes)) s
where s.string_col = t.string_col and t.int_col in
(select int_col from functional.alltypessmall)
and s.bool_col = false
---- PLAN
PLAN-ROOT SINK
|
09:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: a.int_col = functional.alltypessmall.int_col
| runtime filters: RF000 <- functional.alltypessmall.int_col
| row-size=109B cardinality=91
|
|--08:AGGREGATE [FINALIZE]
| | group by: functional.alltypessmall.int_col
| | row-size=4B cardinality=10
| |
| 06:SCAN HDFS [functional.alltypessmall]
| HDFS partitions=4/4 files=4 size=6.32KB
| row-size=4B cardinality=100
|
07:HASH JOIN [INNER JOIN]
| hash predicates: g.string_col = a.string_col
| runtime filters: RF002 <- a.string_col
| row-size=109B cardinality=91
|
|--02:HASH JOIN [LEFT SEMI JOIN]
| | hash predicates: id = id
| | runtime filters: RF006 <- id
| | row-size=89B cardinality=8
| |
| |--01:SCAN HDFS [functional.alltypestiny]
| | HDFS partitions=4/4 files=4 size=460B
| | row-size=4B cardinality=8
| |
| 00:SCAN HDFS [functional.alltypes a]
| HDFS partitions=24/24 files=24 size=478.45KB
| runtime filters: RF000 -> a.int_col, RF006 -> id
| row-size=89B cardinality=7.30K
|
05:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: g.id = id
| runtime filters: RF004 <- id
| row-size=20B cardinality=5.50K
|
|--04:SCAN HDFS [functional.alltypes]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=4B cardinality=7.30K
|
03:SCAN HDFS [functional.alltypesagg g]
HDFS partitions=11/11 files=11 size=814.73KB
predicates: g.bool_col = FALSE
runtime filters: RF002 -> g.string_col, RF004 -> g.id
row-size=20B cardinality=5.50K
====
# Correlated subqueries
select *
from functional.alltypes t
where id in
(select id from functional.alltypesagg a where t.int_col = a.int_col)
and t.bool_col = false
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: id = id, t.int_col = a.int_col
| runtime filters: RF000 <- id, RF001 <- a.int_col
| row-size=89B cardinality=3.65K
|
|--01:SCAN HDFS [functional.alltypesagg a]
| HDFS partitions=11/11 files=11 size=814.73KB
| row-size=8B cardinality=11.00K
|
00:SCAN HDFS [functional.alltypes t]
HDFS partitions=24/24 files=24 size=478.45KB
predicates: t.bool_col = FALSE
runtime filters: RF000 -> id, RF001 -> t.int_col
row-size=89B cardinality=3.65K
====
# Multiple nesting levels (uncorrelated queries)
select *
from functional.alltypes t
where id in
(select id from functional.alltypesagg where int_col in
(select int_col from functional.alltypestiny)
and bool_col = false)
and bigint_col < 1000
---- PLAN
PLAN-ROOT SINK
|
05:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: id = id
| runtime filters: RF000 <- id
| row-size=89B cardinality=11
|
|--04:HASH JOIN [LEFT SEMI JOIN]
| | hash predicates: int_col = functional.alltypestiny.int_col
| | runtime filters: RF002 <- functional.alltypestiny.int_col
| | row-size=9B cardinality=11
| |
| |--03:AGGREGATE [FINALIZE]
| | | group by: functional.alltypestiny.int_col
| | | row-size=4B cardinality=2
| | |
| | 02:SCAN HDFS [functional.alltypestiny]
| | HDFS partitions=4/4 files=4 size=460B
| | row-size=4B cardinality=8
| |
| 01:SCAN HDFS [functional.alltypesagg]
| HDFS partitions=11/11 files=11 size=814.73KB
| predicates: bool_col = FALSE
| runtime filters: RF002 -> int_col
| row-size=9B cardinality=5.50K
|
00:SCAN HDFS [functional.alltypes t]
HDFS partitions=24/24 files=24 size=478.45KB
predicates: bigint_col < 1000
runtime filters: RF000 -> id
row-size=89B cardinality=730
====
# Multiple nesting levels (correlated queries)
select *
from functional.alltypes t
where id in
(select id from functional.alltypesagg a where a.int_col = t.int_col
and a.tinyint_col in
(select tinyint_col from functional.alltypestiny s
where s.bigint_col = a.bigint_col))
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: id = id, t.int_col = a.int_col
| runtime filters: RF000 <- id, RF001 <- a.int_col
| row-size=89B cardinality=22
|
|--03:HASH JOIN [LEFT SEMI JOIN]
| | hash predicates: a.bigint_col = s.bigint_col, a.tinyint_col = tinyint_col
| | runtime filters: RF004 <- s.bigint_col, RF005 <- tinyint_col
| | row-size=17B cardinality=22
| |
| |--02:SCAN HDFS [functional.alltypestiny s]
| | HDFS partitions=4/4 files=4 size=460B
| | row-size=9B cardinality=8
| |
| 01:SCAN HDFS [functional.alltypesagg a]
| HDFS partitions=11/11 files=11 size=814.73KB
| runtime filters: RF004 -> a.bigint_col, RF005 -> a.tinyint_col
| row-size=17B cardinality=11.00K
|
00:SCAN HDFS [functional.alltypes t]
HDFS partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> id, RF001 -> t.int_col
row-size=89B cardinality=7.30K
====
# Multiple nesting levels (correlated and uncorrelated queries)
select *
from functional.alltypes t
where id in
(select id from functional.alltypesagg a where a.int_col in
(select int_col from functional.alltypestiny s where a.bigint_col = s.bigint_col))
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: id = id
| runtime filters: RF000 <- id
| row-size=89B cardinality=22
|
|--03:HASH JOIN [LEFT SEMI JOIN]
| | hash predicates: a.bigint_col = s.bigint_col, a.int_col = int_col
| | runtime filters: RF002 <- s.bigint_col, RF003 <- int_col
| | row-size=16B cardinality=22
| |
| |--02:SCAN HDFS [functional.alltypestiny s]
| | HDFS partitions=4/4 files=4 size=460B
| | row-size=12B cardinality=8
| |
| 01:SCAN HDFS [functional.alltypesagg a]
| HDFS partitions=11/11 files=11 size=814.73KB
| runtime filters: RF002 -> a.bigint_col, RF003 -> a.int_col
| row-size=16B cardinality=11.00K
|
00:SCAN HDFS [functional.alltypes t]
HDFS partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> id
row-size=89B cardinality=7.30K
====
# Predicate propagation with uncorrelated subqueries
select *
from functional.alltypes
where id in
(select id from functional.alltypes where id < 10)
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: id = id
| runtime filters: RF000 <- id
| row-size=89B cardinality=730
|
|--01:SCAN HDFS [functional.alltypes]
| HDFS partitions=24/24 files=24 size=478.45KB
| predicates: id < 10
| row-size=4B cardinality=730
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
predicates: functional.alltypes.id < 10
runtime filters: RF000 -> id
row-size=89B cardinality=730
====
# Predicate propagation with correlated subqueries
select *
from functional.alltypesagg a inner join functional.alltypes t on t.id = a.id
where t.int_col < 10 and t.int_col in
(select int_col from functional.alltypessmall s where s.id = t.id)
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: t.id = s.id, t.int_col = int_col
| runtime filters: RF000 <- s.id, RF001 <- int_col
| row-size=184B cardinality=10
|
|--02:SCAN HDFS [functional.alltypessmall s]
| HDFS partitions=4/4 files=4 size=6.32KB
| predicates: s.int_col < 10
| row-size=8B cardinality=10
|
03:HASH JOIN [INNER JOIN]
| hash predicates: a.id = t.id
| runtime filters: RF004 <- t.id
| row-size=184B cardinality=782
|
|--01:SCAN HDFS [functional.alltypes t]
| HDFS partitions=24/24 files=24 size=478.45KB
| predicates: t.int_col < 10
| runtime filters: RF000 -> t.id, RF001 -> t.int_col
| row-size=89B cardinality=730
|
00:SCAN HDFS [functional.alltypesagg a]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> a.id, RF004 -> a.id
row-size=95B cardinality=11.00K
====
# Correlated EXISTS
select count(*)
from functional.alltypes t
where exists
(select * from functional.alltypesagg a where a.id = t.id)
---- PLAN
PLAN-ROOT SINK
|
03:AGGREGATE [FINALIZE]
| output: count(*)
| row-size=8B cardinality=1
|
02:HASH JOIN [RIGHT SEMI JOIN]
| hash predicates: a.id = t.id
| runtime filters: RF000 <- t.id
| row-size=4B cardinality=7.30K
|
|--00:SCAN HDFS [functional.alltypes t]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=4B cardinality=7.30K
|
01:SCAN HDFS [functional.alltypesagg a]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> a.id
row-size=4B cardinality=11.00K
====
# Correlated EXISTS with an analytic function and a group by clause
select 1
from functional.alltypesagg a
where exists
(select id, count(int_col) over (partition by bool_col)
from functional.alltypestiny b
where a.tinyint_col = b.tinyint_col
group by id, int_col, bool_col)
and tinyint_col < 10
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: a.tinyint_col = b.tinyint_col
| runtime filters: RF000 <- b.tinyint_col
| row-size=1B cardinality=244
|
|--03:AGGREGATE [FINALIZE]
| | group by: b.tinyint_col
| | row-size=1B cardinality=2
| |
| 02:AGGREGATE [FINALIZE]
| | group by: id, int_col, bool_col, b.tinyint_col
| | row-size=10B cardinality=8
| |
| 01:SCAN HDFS [functional.alltypestiny b]
| HDFS partitions=4/4 files=4 size=460B
| row-size=10B cardinality=8
|
00:SCAN HDFS [functional.alltypesagg a]
HDFS partitions=11/11 files=11 size=814.73KB
predicates: tinyint_col < 10
runtime filters: RF000 -> a.tinyint_col
row-size=1B cardinality=1.10K
====
# Correlated NOT EXISTS
select count(*)
from functional.alltypes t
where not exists
(select id from functional.alltypesagg a where t.int_col = a.int_col)
---- PLAN
PLAN-ROOT SINK
|
04:AGGREGATE [FINALIZE]
| output: count(*)
| row-size=8B cardinality=1
|
03:HASH JOIN [LEFT ANTI JOIN]
| hash predicates: t.int_col = a.int_col
| row-size=4B cardinality=7.30K
|
|--02:AGGREGATE [FINALIZE]
| | group by: a.int_col
| | row-size=4B cardinality=957
| |
| 01:SCAN HDFS [functional.alltypesagg a]
| HDFS partitions=11/11 files=11 size=814.73KB
| row-size=4B cardinality=11.00K
|
00:SCAN HDFS [functional.alltypes t]
HDFS partitions=24/24 files=24 size=478.45KB
row-size=4B cardinality=7.30K
====
# Correlated NOT EXISTS with an analytic function and a group by clause
select count(*)
from functional.alltypesagg a
where not exists
(select b.id, count(b.int_col) over (partition by b.bigint_col)
from functional.alltypessmall b inner join functional.alltypes c on b.id = c.id
where c.bool_col = false and a.int_col = b.int_col
group by b.id, b.int_col, b.bigint_col)
and bool_col = false
---- PLAN
PLAN-ROOT SINK
|
07:AGGREGATE [FINALIZE]
| output: count(*)
| row-size=8B cardinality=1
|
06:HASH JOIN [LEFT ANTI JOIN]
| hash predicates: a.int_col = b.int_col
| row-size=5B cardinality=5.50K
|
|--05:AGGREGATE [FINALIZE]
| | group by: b.int_col
| | row-size=4B cardinality=10
| |
| 04:AGGREGATE [FINALIZE]
| | group by: b.id, b.int_col, b.bigint_col
| | row-size=16B cardinality=50
| |
| 03:HASH JOIN [INNER JOIN]
| | hash predicates: c.id = b.id
| | runtime filters: RF000 <- b.id
| | row-size=21B cardinality=50
| |
| |--01:SCAN HDFS [functional.alltypessmall b]
| | HDFS partitions=4/4 files=4 size=6.32KB
| | row-size=16B cardinality=100
| |
| 02:SCAN HDFS [functional.alltypes c]
| HDFS partitions=24/24 files=24 size=478.45KB
| predicates: c.bool_col = FALSE
| runtime filters: RF000 -> c.id
| row-size=5B cardinality=3.65K
|
00:SCAN HDFS [functional.alltypesagg a]
HDFS partitions=11/11 files=11 size=814.73KB
predicates: bool_col = FALSE
row-size=5B cardinality=5.50K
====
# Uncorrelated EXISTS
select *
from functional.alltypestiny t
where exists (select * from functional.alltypessmall s where s.id < 5)
---- PLAN
PLAN-ROOT SINK
|
02:NESTED LOOP JOIN [LEFT SEMI JOIN]
| row-size=89B cardinality=8
|
|--01:SCAN HDFS [functional.alltypessmall s]
| HDFS partitions=4/4 files=4 size=6.32KB
| predicates: s.id < 5
| limit: 1
| row-size=4B cardinality=1
|
00:SCAN HDFS [functional.alltypestiny t]
HDFS partitions=4/4 files=4 size=460B
row-size=89B cardinality=8
====
# Uncorrelated EXISTS with an analytic function and a group by clause
select 1
from functional.alltypestiny t
where exists
(select id, max(int_col) over (partition by bigint_col)
from functional.alltypesagg where tinyint_col = 10
group by id, int_col, bigint_col)
---- PLAN
PLAN-ROOT SINK
|
03:NESTED LOOP JOIN [RIGHT SEMI JOIN]
| row-size=0B cardinality=8
|
|--00:SCAN HDFS [functional.alltypestiny t]
| HDFS partitions=4/4 files=4 size=460B
| row-size=0B cardinality=8
|
02:AGGREGATE [FINALIZE]
| group by: id, int_col, bigint_col
| limit: 1
| row-size=16B cardinality=1
|
01:SCAN HDFS [functional.alltypesagg]
HDFS partitions=11/11 files=11 size=814.73KB
predicates: tinyint_col = 10
row-size=17B cardinality=1.22K
====
# Uncorrelated EXISTS with a LIMIT 0 clause
select 1
from functional.alltypestiny t
where exists (select * from functional.alltypessmall limit 0)
---- PLAN
PLAN-ROOT SINK
|
00:EMPTYSET
====
# Uncorrelated NOT EXISTS
select *
from functional.alltypestiny t
where not exists (select * from functional.alltypessmall s where s.id < 5)
---- PLAN
PLAN-ROOT SINK
|
02:NESTED LOOP JOIN [LEFT ANTI JOIN]
| row-size=89B cardinality=8
|
|--01:SCAN HDFS [functional.alltypessmall s]
| HDFS partitions=4/4 files=4 size=6.32KB
| predicates: s.id < 5
| limit: 1
| row-size=4B cardinality=1
|
00:SCAN HDFS [functional.alltypestiny t]
HDFS partitions=4/4 files=4 size=460B
row-size=89B cardinality=8
====
# Uncorrelated NOT exists referencing a WITH clause
with
w1 as (select * from functional.alltypestiny t),
w2 as (select * from functional.alltypessmall s where s.id < 0)
select *
from w1 t
where not exists (select 1 from w2)
---- PLAN
PLAN-ROOT SINK
|
02:NESTED LOOP JOIN [LEFT ANTI JOIN]
| row-size=89B cardinality=8
|
|--01:SCAN HDFS [functional.alltypessmall s]
| HDFS partitions=4/4 files=4 size=6.32KB
| predicates: s.id < 0
| limit: 1
| row-size=4B cardinality=1
|
00:SCAN HDFS [functional.alltypestiny t]
HDFS partitions=4/4 files=4 size=460B
row-size=89B cardinality=8
====
# Uncorrelated NOT EXISTS with an analytic function and a group by clause
select 1
from functional.alltypestiny t
where not exists
(select id, max(int_col) over (partition by bigint_col)
from functional.alltypesagg where tinyint_col = 10
group by id, int_col, bigint_col)
---- PLAN
PLAN-ROOT SINK
|
03:NESTED LOOP JOIN [RIGHT ANTI JOIN]
| row-size=0B cardinality=8
|
|--00:SCAN HDFS [functional.alltypestiny t]
| HDFS partitions=4/4 files=4 size=460B
| row-size=0B cardinality=8
|
02:AGGREGATE [FINALIZE]
| group by: id, int_col, bigint_col
| limit: 1
| row-size=16B cardinality=1
|
01:SCAN HDFS [functional.alltypesagg]
HDFS partitions=11/11 files=11 size=814.73KB
predicates: tinyint_col = 10
row-size=17B cardinality=1.22K
====
# Uncorrelated NOT EXISTS with a LIMIT 0 clause
select 1
from functional.alltypestiny t
where not exists (select * from functional.alltypessmall limit 0)
---- PLAN
PLAN-ROOT SINK
|
00:SCAN HDFS [functional.alltypestiny t]
HDFS partitions=4/4 files=4 size=460B
row-size=0B cardinality=8
====
# Multiple nesting levels
select count(*)
from functional.alltypes a
where exists
(select * from functional.alltypestiny t where a.id = t.id and exists
(select * from functional.alltypesagg g where g.int_col = t.int_col
and g.bool_col = false))
---- PLAN
PLAN-ROOT SINK
|
06:AGGREGATE [FINALIZE]
| output: count(*)
| row-size=8B cardinality=1
|
05:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: a.id = t.id
| runtime filters: RF000 <- t.id
| row-size=4B cardinality=8
|
|--04:HASH JOIN [RIGHT SEMI JOIN]
| | hash predicates: g.int_col = t.int_col
| | runtime filters: RF002 <- t.int_col
| | row-size=8B cardinality=8
| |
| |--01:SCAN HDFS [functional.alltypestiny t]
| | HDFS partitions=4/4 files=4 size=460B
| | row-size=8B cardinality=8
| |
| 03:AGGREGATE [FINALIZE]
| | group by: g.int_col
| | row-size=4B cardinality=957
| |
| 02:SCAN HDFS [functional.alltypesagg g]
| HDFS partitions=11/11 files=11 size=814.73KB
| predicates: g.bool_col = FALSE
| runtime filters: RF002 -> g.int_col
| row-size=5B cardinality=5.50K
|
00:SCAN HDFS [functional.alltypes a]
HDFS partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> a.id
row-size=4B cardinality=7.30K
====
# Multiple subquery predicates
select g.int_col, count(*)
from functional.alltypesagg g left outer join functional.alltypes a
on g.id = a.id
where g.int_col < 100 and exists
(select *
from functional.alltypestiny t
where t.id = g.id and t.bool_col = false)
and g.bigint_col in
(select bigint_col
from functional.alltypessmall s
where s.id = g.id and s.int_col > 10)
and g.tinyint_col <
(select count(*)
from functional.alltypes t
where t.id = g.id and t.bool_col = true)
group by g.int_col
having count(*) < 100
---- PLAN
PLAN-ROOT SINK
|
10:AGGREGATE [FINALIZE]
| output: count(*)
| group by: g.int_col
| having: count(*) < 100
| row-size=12B cardinality=1
|
09:HASH JOIN [RIGHT OUTER JOIN]
| hash predicates: t.id = g.id
| other predicates: g.tinyint_col < zeroifnull(count(*))
| runtime filters: RF000 <- g.id
| row-size=33B cardinality=4
|
|--08:HASH JOIN [RIGHT SEMI JOIN]
| | hash predicates: bigint_col = g.bigint_col, s.id = g.id
| | runtime filters: RF002 <- g.bigint_col, RF003 <- g.id
| | row-size=21B cardinality=4
| |
| |--07:HASH JOIN [LEFT SEMI JOIN]
| | | hash predicates: g.id = t.id
| | | runtime filters: RF006 <- t.id
| | | row-size=21B cardinality=4
| | |
| | |--02:SCAN HDFS [functional.alltypestiny t]
| | | HDFS partitions=4/4 files=4 size=460B
| | | predicates: t.bool_col = FALSE
| | | row-size=5B cardinality=4
| | |
| | 06:HASH JOIN [RIGHT OUTER JOIN]
| | | hash predicates: a.id = g.id
| | | runtime filters: RF008 <- g.id
| | | row-size=21B cardinality=1.10K
| | |
| | |--00:SCAN HDFS [functional.alltypesagg g]
| | | HDFS partitions=11/11 files=11 size=814.73KB
| | | predicates: g.int_col < 100
| | | runtime filters: RF006 -> g.id
| | | row-size=17B cardinality=1.10K
| | |
| | 01:SCAN HDFS [functional.alltypes a]
| | HDFS partitions=24/24 files=24 size=478.45KB
| | runtime filters: RF006 -> a.id, RF008 -> a.id
| | row-size=4B cardinality=7.30K
| |
| 03:SCAN HDFS [functional.alltypessmall s]
| HDFS partitions=4/4 files=4 size=6.32KB
| predicates: s.int_col > 10
| runtime filters: RF002 -> bigint_col, RF003 -> s.id
| row-size=16B cardinality=10
|
05:AGGREGATE [FINALIZE]
| output: count(*)
| group by: t.id
| row-size=12B cardinality=3.65K
|
04:SCAN HDFS [functional.alltypes t]
HDFS partitions=24/24 files=24 size=478.45KB
predicates: t.bool_col = TRUE
runtime filters: RF000 -> t.id
row-size=5B cardinality=3.65K
====
# Subqueries with aggregation
select *
from functional.alltypes a
where a.int_col in
(select count(int_col) from functional.alltypesagg g where g.bool_col
group by int_col)
and a.bigint_col < 10
---- PLAN
PLAN-ROOT SINK
|
03:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: a.int_col = count(int_col)
| runtime filters: RF000 <- count(int_col)
| row-size=89B cardinality=730
|
|--02:AGGREGATE [FINALIZE]
| | output: count(int_col)
| | group by: int_col
| | row-size=12B cardinality=957
| |
| 01:SCAN HDFS [functional.alltypesagg g]
| HDFS partitions=11/11 files=11 size=814.73KB
| predicates: g.bool_col
| row-size=5B cardinality=1.10K
|
00:SCAN HDFS [functional.alltypes a]
HDFS partitions=24/24 files=24 size=478.45KB
predicates: a.bigint_col < 10
runtime filters: RF000 -> a.int_col
row-size=89B cardinality=730
====
# Uncorrelated aggregation subquery
select *
from functional.alltypes a
where a.int_col <
(select max(int_col) from functional.alltypesagg g where g.bool_col = true)
and a.bigint_col > 10
---- PLAN
PLAN-ROOT SINK
|
03:NESTED LOOP JOIN [INNER JOIN]
| predicates: a.int_col < max(int_col)
| row-size=93B cardinality=730
|
|--02:AGGREGATE [FINALIZE]
| | output: max(int_col)
| | row-size=4B cardinality=1
| |
| 01:SCAN HDFS [functional.alltypesagg g]
| HDFS partitions=11/11 files=11 size=814.73KB
| predicates: g.bool_col = TRUE
| row-size=5B cardinality=5.50K
|
00:SCAN HDFS [functional.alltypes a]
HDFS partitions=24/24 files=24 size=478.45KB
predicates: a.bigint_col > 10
row-size=89B cardinality=730
====
# Aggregation subquery with constant comparison expr
select *
from functional.alltypesagg a
where (select max(id) from functional.alltypes t where t.bool_col = false) > 10
and a.int_col < 10
---- PLAN
PLAN-ROOT SINK
|
03:NESTED LOOP JOIN [CROSS JOIN]
| row-size=99B cardinality=1.10K
|
|--02:AGGREGATE [FINALIZE]
| | output: max(id)
| | having: max(id) > 10
| | row-size=4B cardinality=1
| |
| 01:SCAN HDFS [functional.alltypes t]
| HDFS partitions=24/24 files=24 size=478.45KB
| predicates: t.bool_col = FALSE
| row-size=5B cardinality=3.65K
|
00:SCAN HDFS [functional.alltypesagg a]
HDFS partitions=11/11 files=11 size=814.73KB
predicates: a.int_col < 10
row-size=95B cardinality=1.10K
====
# Correlated aggregation subquery
select a.int_col, count(*)
from functional.alltypesagg a
where a.id =
(select min(id)
from functional.alltypes t
where t.int_col = a.int_col and t.tinyint_col < 10)
and a.bool_col = false
group by a.int_col
---- PLAN
PLAN-ROOT SINK
|
04:AGGREGATE [FINALIZE]
| output: count(*)
| group by: a.int_col
| row-size=12B cardinality=10
|
03:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: a.id = min(id), a.int_col = t.int_col
| runtime filters: RF000 <- min(id), RF001 <- t.int_col
| row-size=9B cardinality=10
|
|--02:AGGREGATE [FINALIZE]
| | output: min(id)
| | group by: t.int_col
| | row-size=8B cardinality=10
| |
| 01:SCAN HDFS [functional.alltypes t]
| HDFS partitions=24/24 files=24 size=478.45KB
| predicates: t.tinyint_col < 10
| row-size=9B cardinality=730
|
00:SCAN HDFS [functional.alltypesagg a]
HDFS partitions=11/11 files=11 size=814.73KB
predicates: a.bool_col = FALSE
runtime filters: RF000 -> a.id, RF001 -> a.int_col
row-size=9B cardinality=5.50K
====
# Aggregation subquery with multiple tables
select t.tinyint_col, count(*)
from functional.alltypes t left outer join functional.alltypesagg a
on t.id = a.id
where t.int_col <
(select min(s.int_col)
from functional.alltypessmall s left outer join functional.alltypestiny p
on s.id = p.id where s.bool_col = false and s.bigint_col = t.bigint_col)
and a.bool_col = false
group by t.tinyint_col
---- PLAN
PLAN-ROOT SINK
|
08:AGGREGATE [FINALIZE]
| output: count(*)
| group by: t.tinyint_col
| row-size=9B cardinality=10
|
07:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: t.bigint_col = s.bigint_col
| other join predicates: t.int_col < min(s.int_col)
| runtime filters: RF000 <- s.bigint_col
| row-size=22B cardinality=7.30K
|
|--05:AGGREGATE [FINALIZE]
| | output: min(s.int_col)
| | group by: s.bigint_col
| | row-size=12B cardinality=10
| |
| 04:HASH JOIN [LEFT OUTER JOIN]
| | hash predicates: s.id = p.id
| | row-size=21B cardinality=50
| |
| |--03:SCAN HDFS [functional.alltypestiny p]
| | HDFS partitions=4/4 files=4 size=460B
| | row-size=4B cardinality=8
| |
| 02:SCAN HDFS [functional.alltypessmall s]
| HDFS partitions=4/4 files=4 size=6.32KB
| predicates: s.bool_col = FALSE
| row-size=17B cardinality=50
|
06:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: t.id = a.id
| other predicates: a.bool_col = FALSE
| row-size=22B cardinality=7.30K
|
|--01:SCAN HDFS [functional.alltypesagg a]
| HDFS partitions=11/11 files=11 size=814.73KB
| predicates: a.bool_col = FALSE
| row-size=5B cardinality=5.50K
|
00:SCAN HDFS [functional.alltypes t]
HDFS partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> t.bigint_col
row-size=17B cardinality=7.30K
====
# Multiple aggregation subqueries
select *
from functional.alltypesagg a, functional.alltypes t
where a.id = t.id and a.int_col <
(select min(int_col)
from functional.alltypestiny g
where t.bigint_col = g.bigint_col and g.bool_col = false)
and a.tinyint_col >
(select max(tinyint_col) from functional.alltypessmall s where s.id < 10)
and t.bool_col = false
---- PLAN
PLAN-ROOT SINK
|
08:NESTED LOOP JOIN [INNER JOIN]
| predicates: a.tinyint_col > max(tinyint_col)
| row-size=185B cardinality=781
|
|--05:AGGREGATE [FINALIZE]
| | output: max(tinyint_col)
| | row-size=1B cardinality=1
| |
| 04:SCAN HDFS [functional.alltypessmall s]
| HDFS partitions=4/4 files=4 size=6.32KB
| predicates: s.id < 10
| row-size=5B cardinality=10
|
07:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: t.bigint_col = g.bigint_col
| other join predicates: a.int_col < min(int_col)
| runtime filters: RF001 <- g.bigint_col
| row-size=184B cardinality=781
|
|--03:AGGREGATE [FINALIZE]
| | output: min(int_col)
| | group by: g.bigint_col
| | row-size=12B cardinality=2
| |
| 02:SCAN HDFS [functional.alltypestiny g]
| HDFS partitions=4/4 files=4 size=460B
| predicates: g.bool_col = FALSE
| row-size=13B cardinality=4
|
06:HASH JOIN [INNER JOIN]
| hash predicates: a.id = t.id
| runtime filters: RF003 <- t.id
| row-size=184B cardinality=3.91K
|
|--01:SCAN HDFS [functional.alltypes t]
| HDFS partitions=24/24 files=24 size=478.45KB
| predicates: t.bool_col = FALSE
| runtime filters: RF001 -> t.bigint_col
| row-size=89B cardinality=3.65K
|
00:SCAN HDFS [functional.alltypesagg a]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF003 -> a.id
row-size=95B cardinality=11.00K
====
# Multiple nesting levels with aggregation subqueries
select *
from functional.alltypes t
where t.int_col <
(select avg(g.int_col)*2
from functional.alltypesagg g
where g.id = t.id and g.bigint_col <
(select count(*)
from functional.alltypestiny a
where a.id = g.id
and a.bool_col = false))
---- PLAN
PLAN-ROOT SINK
|
06:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: t.id = g.id
| other join predicates: t.int_col < avg(g.int_col) * 2
| runtime filters: RF000 <- g.id
| row-size=89B cardinality=7.30K
|
|--05:AGGREGATE [FINALIZE]
| | output: avg(g.int_col)
| | group by: g.id
| | row-size=12B cardinality=10.28K
| |
| 04:HASH JOIN [LEFT OUTER JOIN]
| | hash predicates: g.id = a.id
| | other predicates: g.bigint_col < zeroifnull(count(*))
| | row-size=28B cardinality=11.00K
| |
| |--03:AGGREGATE [FINALIZE]
| | | output: count(*)
| | | group by: a.id
| | | row-size=12B cardinality=4
| | |
| | 02:SCAN HDFS [functional.alltypestiny a]
| | HDFS partitions=4/4 files=4 size=460B
| | predicates: a.bool_col = FALSE
| | row-size=5B cardinality=4
| |
| 01:SCAN HDFS [functional.alltypesagg g]
| HDFS partitions=11/11 files=11 size=814.73KB
| row-size=16B cardinality=11.00K
|
00:SCAN HDFS [functional.alltypes t]
HDFS partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> t.id
row-size=89B cardinality=7.30K
====
# Multiple nesting of aggregate subquery predicates with count
select *
from functional.alltypesagg a left outer join functional.alltypes t
on a.id = t.id
where a.int_col <
(select count(*)
from functional.alltypessmall s
where s.id = a.id and s.tinyint_col >
(select count(*) from functional.alltypestiny where bool_col = false))
---- PLAN
PLAN-ROOT SINK
|
08:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: a.id = s.id
| other predicates: a.int_col < zeroifnull(count(*))
| row-size=196B cardinality=11.00K
|
|--06:AGGREGATE [FINALIZE]
| | output: count(*)
| | group by: s.id
| | row-size=12B cardinality=99
| |
| 05:NESTED LOOP JOIN [INNER JOIN]
| | predicates: s.tinyint_col > count(*)
| | row-size=13B cardinality=100
| |
| |--04:AGGREGATE [FINALIZE]
| | | output: count(*)
| | | row-size=8B cardinality=1
| | |
| | 03:SCAN HDFS [functional.alltypestiny]
| | HDFS partitions=4/4 files=4 size=460B
| | predicates: bool_col = FALSE
| | row-size=1B cardinality=4
| |
| 02:SCAN HDFS [functional.alltypessmall s]
| HDFS partitions=4/4 files=4 size=6.32KB
| row-size=5B cardinality=100
|
07:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: a.id = t.id
| row-size=184B cardinality=11.00K
|
|--01:SCAN HDFS [functional.alltypes t]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=89B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypesagg a]
HDFS partitions=11/11 files=11 size=814.73KB
row-size=95B cardinality=11.00K
====
# Distinct in the outer select block
select distinct id, bool_col
from functional.alltypesagg g
where 100 < (select count(*) from functional.alltypes where bool_col = false and id < 5)
and bool_col = false
---- PLAN
PLAN-ROOT SINK
|
04:AGGREGATE [FINALIZE]
| group by: id, bool_col
| row-size=5B cardinality=5.50K
|
03:NESTED LOOP JOIN [CROSS JOIN]
| row-size=13B cardinality=5.50K
|
|--02:AGGREGATE [FINALIZE]
| | output: count(*)
| | having: count(*) > 100
| | row-size=8B cardinality=1
| |
| 01:SCAN HDFS [functional.alltypes]
| HDFS partitions=24/24 files=24 size=478.45KB
| predicates: id < 5, bool_col = FALSE
| row-size=5B cardinality=516
|
00:SCAN HDFS [functional.alltypesagg g]
HDFS partitions=11/11 files=11 size=814.73KB
predicates: bool_col = FALSE
row-size=5B cardinality=5.50K
====
# Distinct with an unqualified star in the outer select block
select distinct *
from functional.alltypesagg g
where 100 > (select count(distinct id) from functional.alltypestiny where int_col < 5)
and g.bigint_col < 1000 and g.bigint_col = true
---- PLAN
PLAN-ROOT SINK
|
05:AGGREGATE [FINALIZE]
| group by: g.id, g.bool_col, g.tinyint_col, g.smallint_col, g.int_col, g.bigint_col, g.float_col, g.double_col, g.date_string_col, g.string_col, g.timestamp_col, g.year, g.month, g.day
| row-size=95B cardinality=11
|
04:NESTED LOOP JOIN [CROSS JOIN]
| row-size=103B cardinality=11
|
|--03:AGGREGATE [FINALIZE]
| | output: count(id)
| | having: count(id) < 100
| | row-size=8B cardinality=1
| |
| 02:AGGREGATE
| | group by: id
| | row-size=4B cardinality=1
| |
| 01:SCAN HDFS [functional.alltypestiny]
| HDFS partitions=4/4 files=4 size=460B
| predicates: int_col < 5
| row-size=8B cardinality=1
|
00:SCAN HDFS [functional.alltypesagg g]
HDFS partitions=11/11 files=11 size=814.73KB
predicates: g.bigint_col = 1
row-size=95B cardinality=11
====
# Aggregate subquery in an IS NULL predicate
select *
from functional.alltypestiny t
where (select max(int_col) from functional.alltypesagg where int_col is null) is null
---- PLAN
PLAN-ROOT SINK
|
03:NESTED LOOP JOIN [CROSS JOIN]
| row-size=93B cardinality=8
|
|--02:AGGREGATE [FINALIZE]
| | output: max(int_col)
| | having: max(int_col) IS NULL
| | row-size=4B cardinality=1
| |
| 01:SCAN HDFS [functional.alltypesagg]
| HDFS partitions=11/11 files=11 size=814.73KB
| predicates: int_col IS NULL
| row-size=4B cardinality=20
|
00:SCAN HDFS [functional.alltypestiny t]
HDFS partitions=4/4 files=4 size=460B
row-size=89B cardinality=8
====
# Correlated aggregate subquery with a count in an IS NULL predicate
select int_col, count(*)
from functional.alltypestiny t
where (select count(*) from functional.alltypesagg g where t.id = g.id) is null
and bool_col = false
group by int_col
---- PLAN
PLAN-ROOT SINK
|
04:AGGREGATE [FINALIZE]
| output: count(*)
| group by: int_col
| row-size=12B cardinality=2
|
03:HASH JOIN [RIGHT OUTER JOIN]
| hash predicates: g.id = t.id
| other predicates: zeroifnull(count(*)) IS NULL
| runtime filters: RF000 <- t.id
| row-size=21B cardinality=4
|
|--00:SCAN HDFS [functional.alltypestiny t]
| HDFS partitions=4/4 files=4 size=460B
| predicates: bool_col = FALSE
| row-size=9B cardinality=4
|
02:AGGREGATE [FINALIZE]
| output: count(*)
| group by: g.id
| having: zeroifnull(count(*)) IS NULL
| row-size=12B cardinality=1.03K
|
01:SCAN HDFS [functional.alltypesagg g]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> g.id
row-size=4B cardinality=11.00K
====
# Correlated aggregate subquery in an IS NULL predicate
select *
from functional.alltypestiny t
where
(select max(int_col)
from functional.alltypesagg g
where g.id = t.id and g.int_col is null) is null
and t.bool_col = false
---- PLAN
PLAN-ROOT SINK
|
03:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: t.id = g.id
| runtime filters: RF000 <- g.id
| row-size=89B cardinality=2
|
|--02:AGGREGATE [FINALIZE]
| | output: max(int_col)
| | group by: g.id
| | having: max(int_col) IS NULL
| | row-size=8B cardinality=2
| |
| 01:SCAN HDFS [functional.alltypesagg g]
| HDFS partitions=11/11 files=11 size=814.73KB
| predicates: g.int_col IS NULL
| row-size=8B cardinality=20
|
00:SCAN HDFS [functional.alltypestiny t]
HDFS partitions=4/4 files=4 size=460B
predicates: t.bool_col = FALSE
runtime filters: RF000 -> t.id
row-size=89B cardinality=4
====
# Complex expr with a scalar subquery
select *
from functional.alltypestiny t
where 1 +
(select count(*) from functional.alltypesagg where bool_col = false) = t.int_col + 2
and t.bigint_col < 100
---- PLAN
PLAN-ROOT SINK
|
03:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: t.int_col + 2 = 1 + count(*)
| runtime filters: RF000 <- 1 + count(*)
| row-size=89B cardinality=1
|
|--02:AGGREGATE [FINALIZE]
| | output: count(*)
| | row-size=8B cardinality=1
| |
| 01:SCAN HDFS [functional.alltypesagg]
| HDFS partitions=11/11 files=11 size=814.73KB
| predicates: bool_col = FALSE
| row-size=1B cardinality=5.50K
|
00:SCAN HDFS [functional.alltypestiny t]
HDFS partitions=4/4 files=4 size=460B
predicates: t.bigint_col < 100
runtime filters: RF000 -> t.int_col + 2
row-size=89B cardinality=1
====
# Scalar subquery in a function
select *
from functional.alltypestiny t
where nullifzero((select min(id) from functional.alltypessmall s where s.bool_col = false))
is null
and t.id < 10
---- PLAN
PLAN-ROOT SINK
|
03:NESTED LOOP JOIN [CROSS JOIN]
| row-size=93B cardinality=1
|
|--02:AGGREGATE [FINALIZE]
| | output: min(id)
| | having: nullifzero(min(id)) IS NULL
| | row-size=4B cardinality=1
| |
| 01:SCAN HDFS [functional.alltypessmall s]
| HDFS partitions=4/4 files=4 size=6.32KB
| predicates: s.bool_col = FALSE
| row-size=5B cardinality=50
|
00:SCAN HDFS [functional.alltypestiny t]
HDFS partitions=4/4 files=4 size=460B
predicates: t.id < 10
row-size=89B cardinality=1
====
# Correlated aggregate subquery with a LIMIT clause that is removed during the rewrite
select min(t.id)
from functional.alltypes t
where t.int_col <
(select sum(s.int_col)
from functional.alltypessmall s
where s.id = t.id
limit 1)
group by t.bool_col
---- PLAN
PLAN-ROOT SINK
|
04:AGGREGATE [FINALIZE]
| output: min(t.id)
| group by: t.bool_col
| row-size=5B cardinality=2
|
03:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: t.id = s.id
| other join predicates: t.int_col < sum(s.int_col)
| runtime filters: RF000 <- s.id
| row-size=9B cardinality=99
|
|--02:AGGREGATE [FINALIZE]
| | output: sum(s.int_col)
| | group by: s.id
| | row-size=12B cardinality=99
| |
| 01:SCAN HDFS [functional.alltypessmall s]
| HDFS partitions=4/4 files=4 size=6.32KB
| row-size=8B cardinality=100
|
00:SCAN HDFS [functional.alltypes t]
HDFS partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> t.id
row-size=9B cardinality=7.30K
====
# Between predicate with subqueries
select *
from functional.alltypestiny t
where int_col between
(select min(int_col) from functional.alltypessmall where bool_col = false) and
(select max(int_col) from functional.alltypessmall where bool_col = true)
---- PLAN
PLAN-ROOT SINK
|
06:NESTED LOOP JOIN [INNER JOIN]
| predicates: int_col <= max(int_col)
| row-size=97B cardinality=8
|
|--04:AGGREGATE [FINALIZE]
| | output: max(int_col)
| | row-size=4B cardinality=1
| |
| 03:SCAN HDFS [functional.alltypessmall]
| HDFS partitions=4/4 files=4 size=6.32KB
| predicates: bool_col = TRUE
| row-size=5B cardinality=50
|
05:NESTED LOOP JOIN [INNER JOIN]
| predicates: int_col >= min(int_col)
| row-size=93B cardinality=8
|
|--02:AGGREGATE [FINALIZE]
| | output: min(int_col)
| | row-size=4B cardinality=1
| |
| 01:SCAN HDFS [functional.alltypessmall]
| HDFS partitions=4/4 files=4 size=6.32KB
| predicates: bool_col = FALSE
| row-size=5B cardinality=50
|
00:SCAN HDFS [functional.alltypestiny t]
HDFS partitions=4/4 files=4 size=460B
row-size=89B cardinality=8
====
# Aggregate subquery with count (subquery op slotRef)
select t1.id
from functional.alltypestiny t1
where
(select count(tt1.smallint_col) as int_col_1
from functional.alltypestiny tt1
where t1.id = tt1.month) < t1.id
---- PLAN
PLAN-ROOT SINK
|
03:HASH JOIN [RIGHT OUTER JOIN]
| hash predicates: tt1.`month` = t1.id
| other predicates: t1.id > zeroifnull(count(tt1.smallint_col))
| runtime filters: RF000 <- t1.id
| row-size=16B cardinality=8
|
|--00:SCAN HDFS [functional.alltypestiny t1]
| HDFS partitions=4/4 files=4 size=460B
| row-size=4B cardinality=8
|
02:AGGREGATE [FINALIZE]
| output: count(tt1.smallint_col)
| group by: tt1.`month`
| row-size=12B cardinality=4
|
01:SCAN HDFS [functional.alltypestiny tt1]
HDFS partitions=4/4 files=4 size=460B
runtime filters: RF000 -> tt1.month
row-size=6B cardinality=8
====
# Correlated aggregate subquery with count in a function participating in
# a complex arithmetic expr
select int_col, count(*)
from functional.alltypestiny t
where
1 + log(abs((select count(int_col) from functional.alltypes s where s.id = t.id)), 2)
< 10
group by int_col
---- PLAN
PLAN-ROOT SINK
|
04:AGGREGATE [FINALIZE]
| output: count(*)
| group by: int_col
| row-size=12B cardinality=2
|
03:HASH JOIN [RIGHT OUTER JOIN]
| hash predicates: s.id = t.id
| other predicates: 1 + log(abs(zeroifnull(count(int_col))), 2) < 10
| runtime filters: RF000 <- t.id
| row-size=20B cardinality=8
|
|--00:SCAN HDFS [functional.alltypestiny t]
| HDFS partitions=4/4 files=4 size=460B
| row-size=8B cardinality=8
|
02:AGGREGATE [FINALIZE]
| output: count(int_col)
| group by: s.id
| row-size=12B cardinality=7.30K
|
01:SCAN HDFS [functional.alltypes s]
HDFS partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> s.id
row-size=8B cardinality=7.30K
====
# Correlated scalar subquery with an aggregate function that returns a
# non-numeric type on empty input
select int_col, count(*)
from functional.alltypestiny t
where
(select sample(int_col) from functional.alltypes s where s.id = t.id) = t.string_col
and bool_col = false
group by int_col
---- PLAN
PLAN-ROOT SINK
|
04:AGGREGATE [FINALIZE]
| output: count(*)
| group by: int_col
| row-size=12B cardinality=2
|
03:HASH JOIN [RIGHT OUTER JOIN]
| hash predicates: s.id = t.id
| other predicates: t.string_col = ifnull(sample(int_col), '')
| runtime filters: RF000 <- t.id
| row-size=38B cardinality=4
|
|--00:SCAN HDFS [functional.alltypestiny t]
| HDFS partitions=4/4 files=4 size=460B
| predicates: bool_col = FALSE
| row-size=22B cardinality=4
|
02:AGGREGATE [FINALIZE]
| output: sample(int_col)
| group by: s.id
| row-size=16B cardinality=7.30K
|
01:SCAN HDFS [functional.alltypes s]
HDFS partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> s.id
row-size=8B cardinality=7.30K
====
# Uncorrelated scalar subquery where columns from the outer appear in both sides
# of the binary predicate
select 1
from functional.alltypestiny t1
where (select count(*) from functional.alltypessmall) + t1.int_col = t1.bigint_col - 1
---- PLAN
PLAN-ROOT SINK
|
03:NESTED LOOP JOIN [INNER JOIN]
| predicates: count(*) + t1.int_col = t1.bigint_col - 1
| row-size=20B cardinality=8
|
|--02:AGGREGATE [FINALIZE]
| | output: count(*)
| | row-size=8B cardinality=1
| |
| 01:SCAN HDFS [functional.alltypessmall]
| HDFS partitions=4/4 files=4 size=6.32KB
| row-size=0B cardinality=100
|
00:SCAN HDFS [functional.alltypestiny t1]
HDFS partitions=4/4 files=4 size=460B
row-size=12B cardinality=8
====
# Uncorrelated scalar subquery in complex binary predicate that contains columns
# from two tables of the outer
select 1
from functional.alltypestiny t1 join functional.alltypessmall t2 on t1.id = t2.id
where (select count(*) from functional.alltypes) + 1 = t1.int_col + t2.int_col
---- PLAN
PLAN-ROOT SINK
|
05:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: t1.int_col + t2.int_col = count(*) + 1
| row-size=16B cardinality=9
|
|--03:AGGREGATE [FINALIZE]
| | output: count(*)
| | row-size=8B cardinality=1
| |
| 02:SCAN HDFS [functional.alltypes]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=0B cardinality=7.30K
|
04:HASH JOIN [INNER JOIN]
| hash predicates: t2.id = t1.id
| runtime filters: RF000 <- t1.id
| row-size=16B cardinality=9
|
|--00:SCAN HDFS [functional.alltypestiny t1]
| HDFS partitions=4/4 files=4 size=460B
| row-size=8B cardinality=8
|
01:SCAN HDFS [functional.alltypessmall t2]
HDFS partitions=4/4 files=4 size=6.32KB
runtime filters: RF000 -> t2.id
row-size=8B cardinality=100
====
# Uncorrelated scalar subquery in complex binary predicate that contains columns
# from two tables of the outer that appear in both sides of the predicate
select 1
from functional.alltypestiny t1 join functional.alltypessmall t2 on t1.id = t2.id
where
(select count(*) from functional.alltypes) + t2.bigint_col = t1.int_col + t2.int_col
---- PLAN
PLAN-ROOT SINK
|
05:NESTED LOOP JOIN [INNER JOIN]
| predicates: count(*) + t2.bigint_col = t1.int_col + t2.int_col
| row-size=32B cardinality=9
|
|--03:AGGREGATE [FINALIZE]
| | output: count(*)
| | row-size=8B cardinality=1
| |
| 02:SCAN HDFS [functional.alltypes]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=0B cardinality=7.30K
|
04:HASH JOIN [INNER JOIN]
| hash predicates: t2.id = t1.id
| runtime filters: RF000 <- t1.id
| row-size=24B cardinality=9
|
|--00:SCAN HDFS [functional.alltypestiny t1]
| HDFS partitions=4/4 files=4 size=460B
| row-size=8B cardinality=8
|
01:SCAN HDFS [functional.alltypessmall t2]
HDFS partitions=4/4 files=4 size=6.32KB
runtime filters: RF000 -> t2.id
row-size=16B cardinality=100
====
# Correlated scalar subquery with complex correlated predicate (IMPALA-1335)
select 1
from functional.alltypestiny t
where
(select sum(t1.id)
from functional.alltypesagg t1 inner join functional.alltypes t2 on t1.id = t2.id
where t1.id + t2.id = t.int_col) = t.int_col
---- PLAN
PLAN-ROOT SINK
|
05:HASH JOIN [RIGHT SEMI JOIN]
| hash predicates: t1.id + t2.id = t.int_col
| row-size=4B cardinality=8
|
|--00:SCAN HDFS [functional.alltypestiny t]
| HDFS partitions=4/4 files=4 size=460B
| row-size=4B cardinality=8
|
04:AGGREGATE [FINALIZE]
| output: sum(t1.id)
| group by: t1.id + t2.id
| having: sum(t1.id) = t1.id + t2.id
| row-size=16B cardinality=781
|
03:HASH JOIN [INNER JOIN]
| hash predicates: t1.id = t2.id
| runtime filters: RF002 <- t2.id
| row-size=8B cardinality=7.81K
|
|--02:SCAN HDFS [functional.alltypes t2]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=4B cardinality=7.30K
|
01:SCAN HDFS [functional.alltypesagg t1]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF002 -> t1.id
row-size=4B cardinality=11.00K
====
# Correlated scalar subquery with complex correlared predicate (IMPALA-1335)
select 1
from functional.alltypestiny t
where
(select sum(t1.id)
from functional.alltypesagg t1 inner join functional.alltypes t2 on t1.id = t2.id
where t1.id + t2.id = t.bigint_col) = t.int_col
---- PLAN
PLAN-ROOT SINK
|
05:HASH JOIN [RIGHT SEMI JOIN]
| hash predicates: t1.id + t2.id = t.bigint_col, sum(t1.id) = t.int_col
| row-size=12B cardinality=8
|
|--00:SCAN HDFS [functional.alltypestiny t]
| HDFS partitions=4/4 files=4 size=460B
| row-size=12B cardinality=8
|
04:AGGREGATE [FINALIZE]
| output: sum(t1.id)
| group by: t1.id + t2.id
| row-size=16B cardinality=7.81K
|
03:HASH JOIN [INNER JOIN]
| hash predicates: t1.id = t2.id
| runtime filters: RF004 <- t2.id
| row-size=8B cardinality=7.81K
|
|--02:SCAN HDFS [functional.alltypes t2]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=4B cardinality=7.30K
|
01:SCAN HDFS [functional.alltypesagg t1]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF004 -> t1.id
row-size=4B cardinality=11.00K
====
# Outer query block with multiple tables and a correlated scalar subquery with
# complex correlated predicate that references multiple subquery tables and multiple
# tables from the outer query block (IMPALA-1335)
select 1
from functional.alltypestiny t1 inner join functional.alltypessmall t2 on t1.id = t2.id
where
(select sum(tt1.id)
from functional.alltypesagg tt1 inner join functional.alltypes tt2
on tt1.int_col = tt2.int_col
where tt1.id + tt2.id = t1.int_col - t2.int_col) = t1.bigint_col
---- PLAN
PLAN-ROOT SINK
|
07:HASH JOIN [RIGHT SEMI JOIN]
| hash predicates: sum(tt1.id) = t1.bigint_col, tt1.id + tt2.id = t1.int_col - t2.int_col
| row-size=24B cardinality=9
|
|--06:HASH JOIN [INNER JOIN]
| | hash predicates: t2.id = t1.id
| | runtime filters: RF004 <- t1.id
| | row-size=24B cardinality=9
| |
| |--00:SCAN HDFS [functional.alltypestiny t1]
| | HDFS partitions=4/4 files=4 size=460B
| | row-size=16B cardinality=8
| |
| 01:SCAN HDFS [functional.alltypessmall t2]
| HDFS partitions=4/4 files=4 size=6.32KB
| runtime filters: RF004 -> t2.id
| row-size=8B cardinality=100
|
05:AGGREGATE [FINALIZE]
| output: sum(tt1.id)
| group by: tt1.id + tt2.id
| row-size=16B cardinality=10.28K
|
04:HASH JOIN [INNER JOIN]
| hash predicates: tt1.int_col = tt2.int_col
| runtime filters: RF002 <- tt2.int_col
| row-size=16B cardinality=83.91K
|
|--03:SCAN HDFS [functional.alltypes tt2]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=8B cardinality=7.30K
|
02:SCAN HDFS [functional.alltypesagg tt1]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF002 -> tt1.int_col
row-size=8B cardinality=11.00K
====
# IMPALA-1550/IMPALA-4423: Correlated EXISTS and NOT EXISTS subqueries with aggregates
# that can be evaluated at query compile time. All predicates evaluate to FALSE.
select 1
from functional.alltypestiny t1
where exists
(select id
from functional.alltypes t2
where t1.int_col = t2.int_col limit 0)
and not exists
(select count(distinct int_col)
from functional.alltypesagg t3
where t1.id = t3.id)
and not exists
(select min(int_col)
from functional.alltypestiny t5
where t1.id = t5.id and false)
---- PLAN
PLAN-ROOT SINK
|
00:EMPTYSET
====
# IMPALA-1550/IMPALA-4423: Correlated EXISTS and NOT EXISTS subqueries with aggregates
# that can be evaluated at query compile time. All predicates evaluate to TRUE.
select 1
from functional.alltypestiny t1
where not exists
(select id
from functional.alltypes t2
where t1.int_col = t2.int_col limit 0)
and exists
(select count(distinct int_col), sum(distinct int_col)
from functional.alltypesagg t3
where t1.id = t3.id)
and not exists
(select sum(int_col)
from functional.alltypessmall t4
where t1.id = t4.id limit 0)
and not exists
(select min(int_col)
from functional.alltypestiny t5
where t1.id = t5.id having false)
---- PLAN
PLAN-ROOT SINK
|
00:SCAN HDFS [functional.alltypestiny t1]
HDFS partitions=4/4 files=4 size=460B
row-size=0B cardinality=8
====
# Correlated EXISTS and NOT EXISTS subqueries with limit 0 and
# aggregates. Some predicates evaluate to TRUE while others need to
# be evaluated at run-time. (IMPALA-1550)
select 1
from functional.alltypestiny t1
where not exists
(select id
from functional.alltypes t2
where t1.int_col = t2.int_col limit 0)
and exists
(select distinct int_col
from functional.alltypesagg t3
where t3.id > 100 and t1.id = t3.id)
and not exists
(select count(id)
from functional.alltypestiny t4
where t4.int_col = t1.tinyint_col
having count(id) > 200)
---- PLAN
PLAN-ROOT SINK
|
06:HASH JOIN [RIGHT ANTI JOIN]
| hash predicates: t4.int_col = t1.tinyint_col
| row-size=5B cardinality=1
|
|--05:HASH JOIN [RIGHT SEMI JOIN]
| | hash predicates: t3.id = t1.id
| | runtime filters: RF000 <- t1.id
| | row-size=5B cardinality=1
| |
| |--00:SCAN HDFS [functional.alltypestiny t1]
| | HDFS partitions=4/4 files=4 size=460B
| | predicates: t1.id > 100
| | row-size=5B cardinality=1
| |
| 02:AGGREGATE [FINALIZE]
| | group by: int_col, t3.id
| | row-size=8B cardinality=1.10K
| |
| 01:SCAN HDFS [functional.alltypesagg t3]
| HDFS partitions=11/11 files=11 size=814.73KB
| predicates: t3.id > 100
| runtime filters: RF000 -> t3.id
| row-size=8B cardinality=1.10K
|
04:AGGREGATE [FINALIZE]
| output: count(id)
| group by: t4.int_col
| having: count(id) > 200
| row-size=12B cardinality=1
|
03:SCAN HDFS [functional.alltypestiny t4]
HDFS partitions=4/4 files=4 size=460B
row-size=8B cardinality=8
====
# Tests for <=> (aka IS NOT DISTINCT FROM) and IS DISTINCT FROM
select * from functional.alltypesagg t1
where t1.id is not distinct from
(select min(id) from functional.alltypes t2
where t1.int_col is not distinct from t2.int_col)
---- PLAN
PLAN-ROOT SINK
|
03:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: t1.id IS NOT DISTINCT FROM min(id), t1.int_col IS NOT DISTINCT FROM t2.int_col
| runtime filters: RF000 <- min(id), RF001 <- t2.int_col
| row-size=95B cardinality=11
|
|--02:AGGREGATE [FINALIZE]
| | output: min(id)
| | group by: t2.int_col
| | row-size=8B cardinality=10
| |
| 01:SCAN HDFS [functional.alltypes t2]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=8B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypesagg t1]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> t1.id, RF001 -> t1.int_col
row-size=95B cardinality=11.00K
====
select * from functional.alltypesagg t1
where t1.id is distinct from
(select min(id) from functional.alltypes t2
where t1.int_col is not distinct from t2.int_col)
---- PLAN
PLAN-ROOT SINK
|
03:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: t1.int_col IS NOT DISTINCT FROM t2.int_col
| other join predicates: t1.id IS DISTINCT FROM min(id)
| runtime filters: RF000 <- t2.int_col
| row-size=95B cardinality=115
|
|--02:AGGREGATE [FINALIZE]
| | output: min(id)
| | group by: t2.int_col
| | row-size=8B cardinality=10
| |
| 01:SCAN HDFS [functional.alltypes t2]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=8B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypesagg t1]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> t1.int_col
row-size=95B cardinality=11.00K
====
select * from functional.alltypesagg t1
where t1.id =
(select min(id) from functional.alltypes t2
where t1.int_col is not distinct from t2.int_col)
---- PLAN
PLAN-ROOT SINK
|
03:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: t1.id = min(id), t1.int_col IS NOT DISTINCT FROM t2.int_col
| runtime filters: RF000 <- min(id), RF001 <- t2.int_col
| row-size=95B cardinality=11
|
|--02:AGGREGATE [FINALIZE]
| | output: min(id)
| | group by: t2.int_col
| | row-size=8B cardinality=10
| |
| 01:SCAN HDFS [functional.alltypes t2]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=8B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypesagg t1]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> t1.id, RF001 -> t1.int_col
row-size=95B cardinality=11.00K
====
select * from functional.alltypesagg t1
where t1.id !=
(select min(id) from functional.alltypes t2
where t1.int_col is not distinct from t2.int_col)
---- PLAN
PLAN-ROOT SINK
|
03:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: t1.int_col IS NOT DISTINCT FROM t2.int_col
| other join predicates: t1.id != min(id)
| runtime filters: RF000 <- t2.int_col
| row-size=95B cardinality=115
|
|--02:AGGREGATE [FINALIZE]
| | output: min(id)
| | group by: t2.int_col
| | row-size=8B cardinality=10
| |
| 01:SCAN HDFS [functional.alltypes t2]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=8B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypesagg t1]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> t1.int_col
row-size=95B cardinality=11.00K
====
select * from functional.alltypesagg t1
where t1.id is not distinct from
(select min(id) from functional.alltypes t2
where t1.int_col = t2.int_col)
---- PLAN
PLAN-ROOT SINK
|
03:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: t1.id IS NOT DISTINCT FROM min(id), t1.int_col = t2.int_col
| runtime filters: RF000 <- min(id), RF001 <- t2.int_col
| row-size=95B cardinality=11
|
|--02:AGGREGATE [FINALIZE]
| | output: min(id)
| | group by: t2.int_col
| | row-size=8B cardinality=10
| |
| 01:SCAN HDFS [functional.alltypes t2]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=8B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypesagg t1]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> t1.id, RF001 -> t1.int_col
row-size=95B cardinality=11.00K
====
select * from functional.alltypesagg t1
where t1.id is distinct from
(select min(id) from functional.alltypes t2
where t1.int_col = t2.int_col)
---- PLAN
PLAN-ROOT SINK
|
03:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: t1.int_col = t2.int_col
| other join predicates: t1.id IS DISTINCT FROM min(id)
| runtime filters: RF000 <- t2.int_col
| row-size=95B cardinality=115
|
|--02:AGGREGATE [FINALIZE]
| | output: min(id)
| | group by: t2.int_col
| | row-size=8B cardinality=10
| |
| 01:SCAN HDFS [functional.alltypes t2]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=8B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypesagg t1]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> t1.int_col
row-size=95B cardinality=11.00K
====
select * from functional.alltypesagg t1
where t1.id =
(select min(id) from functional.alltypes t2
where t1.int_col = t2.int_col)
---- PLAN
PLAN-ROOT SINK
|
03:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: t1.id = min(id), t1.int_col = t2.int_col
| runtime filters: RF000 <- min(id), RF001 <- t2.int_col
| row-size=95B cardinality=11
|
|--02:AGGREGATE [FINALIZE]
| | output: min(id)
| | group by: t2.int_col
| | row-size=8B cardinality=10
| |
| 01:SCAN HDFS [functional.alltypes t2]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=8B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypesagg t1]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> t1.id, RF001 -> t1.int_col
row-size=95B cardinality=11.00K
====
select * from functional.alltypesagg t1
where t1.id !=
(select min(id) from functional.alltypes t2
where t1.int_col = t2.int_col)
---- PLAN
PLAN-ROOT SINK
|
03:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: t1.int_col = t2.int_col
| other join predicates: t1.id != min(id)
| runtime filters: RF000 <- t2.int_col
| row-size=95B cardinality=115
|
|--02:AGGREGATE [FINALIZE]
| | output: min(id)
| | group by: t2.int_col
| | row-size=8B cardinality=10
| |
| 01:SCAN HDFS [functional.alltypes t2]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=8B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypesagg t1]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> t1.int_col
row-size=95B cardinality=11.00K
====
# IMPALA-3861: Test that IN subqueries with correlated BETWEEN predicates work.
select 1 from functional.alltypes t where id in
(select id from functional.alltypesagg a where
a.tinyint_col between t.tinyint_col and t.smallint_col and
a.smallint_col between 10 and t.int_col and
20 between t.bigint_col and a.int_col and
t.float_col between a.float_col and a.double_col and
t.string_col between a.string_col and t.date_string_col and
a.double_col between round(acos(t.float_col), 2)
and cast(t.string_col as int))
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [RIGHT SEMI JOIN]
| hash predicates: id = id
| other join predicates: a.tinyint_col >= t.tinyint_col, t.float_col >= a.float_col, a.smallint_col <= t.int_col, a.tinyint_col <= t.smallint_col, t.float_col <= a.double_col, t.string_col >= a.string_col, a.double_col <= CAST(t.string_col AS INT), a.double_col >= round(acos(t.float_col), 2)
| runtime filters: RF000 <- id
| row-size=56B cardinality=730
|
|--00:SCAN HDFS [functional.alltypes t]
| HDFS partitions=24/24 files=24 size=478.45KB
| predicates: t.bigint_col <= 20, t.string_col <= t.date_string_col
| row-size=56B cardinality=730
|
01:SCAN HDFS [functional.alltypesagg a]
HDFS partitions=11/11 files=11 size=814.73KB
predicates: a.int_col >= 20, a.smallint_col >= 10
runtime filters: RF000 -> id
row-size=38B cardinality=1.10K
====
# IMPALA-4423: Correlated EXISTS and NOT EXISTS subqueries with aggregates. Both
# subqueries can be evaluated at query compile time. The first one evaluates to
# TRUE and the second one to FALSE.
select 1
from functional.alltypestiny t1
where not exists
(select id
from functional.alltypes t2
where t1.int_col = t2.int_col limit 0)
and not exists
(select min(int_col)
from functional.alltypestiny t5
where t1.id = t5.id and false)
---- PLAN
PLAN-ROOT SINK
|
00:EMPTYSET
====
# IMPALA-4303: Test subquery rewriting with nested unions.
select * from functional.alltypestiny
where exists (select 1 from functional.alltypes where int_col < 10)
union all
(select * from functional.alltypestiny where year=2009 and month=1
union all
select * from functional.alltypestiny where year=2009 and month=2)
---- PLAN
PLAN-ROOT SINK
|
00:UNION
| pass-through-operands: all
| row-size=89B cardinality=12
|
|--05:SCAN HDFS [functional.alltypestiny]
| partition predicates: `year` = 2009, `month` = 2
| HDFS partitions=1/4 files=1 size=115B
| row-size=89B cardinality=2
|
|--04:SCAN HDFS [functional.alltypestiny]
| partition predicates: `year` = 2009, `month` = 1
| HDFS partitions=1/4 files=1 size=115B
| row-size=89B cardinality=2
|
03:NESTED LOOP JOIN [LEFT SEMI JOIN]
| row-size=89B cardinality=8
|
|--02:SCAN HDFS [functional.alltypes]
| HDFS partitions=24/24 files=24 size=478.45KB
| predicates: int_col < 10
| limit: 1
| row-size=4B cardinality=1
|
01:SCAN HDFS [functional.alltypestiny]
HDFS partitions=4/4 files=4 size=460B
row-size=89B cardinality=8
====
# IMPALA-4303: Test subquery rewriting with nested unions.
select * from functional.alltypestiny
where exists (select 1 from functional.alltypes where int_col < 10)
union distinct
(select * from functional.alltypestiny where year=2009 and month=1
union distinct
select * from functional.alltypestiny where year=2009 and month=2)
---- PLAN
PLAN-ROOT SINK
|
06:AGGREGATE [FINALIZE]
| group by: id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month
| row-size=89B cardinality=12
|
00:UNION
| pass-through-operands: all
| row-size=89B cardinality=12
|
|--05:SCAN HDFS [functional.alltypestiny]
| partition predicates: `year` = 2009, `month` = 2
| HDFS partitions=1/4 files=1 size=115B
| row-size=89B cardinality=2
|
|--04:SCAN HDFS [functional.alltypestiny]
| partition predicates: `year` = 2009, `month` = 1
| HDFS partitions=1/4 files=1 size=115B
| row-size=89B cardinality=2
|
03:NESTED LOOP JOIN [LEFT SEMI JOIN]
| row-size=89B cardinality=8
|
|--02:SCAN HDFS [functional.alltypes]
| HDFS partitions=24/24 files=24 size=478.45KB
| predicates: int_col < 10
| limit: 1
| row-size=4B cardinality=1
|
01:SCAN HDFS [functional.alltypestiny]
HDFS partitions=4/4 files=4 size=460B
row-size=89B cardinality=8
====
# Constant on LHS of IN, uncorrelated subquery
select * from functional.alltypessmall where
1 in (select int_col from functional.alltypestiny)
---- PLAN
PLAN-ROOT SINK
|
02:NESTED LOOP JOIN [LEFT SEMI JOIN]
| row-size=89B cardinality=100
|
|--01:SCAN HDFS [functional.alltypestiny]
| HDFS partitions=4/4 files=4 size=460B
| predicates: 1 = functional.alltypestiny.int_col
| limit: 1
| row-size=4B cardinality=4
|
00:SCAN HDFS [functional.alltypessmall]
HDFS partitions=4/4 files=4 size=6.32KB
row-size=89B cardinality=100
====
# Constant on LHS of NOT IN, uncorrelated subquery
select * from functional.alltypessmall where
1 not in (select int_col from functional.alltypestiny)
---- PLAN
PLAN-ROOT SINK
|
02:NESTED LOOP JOIN [LEFT ANTI JOIN]
| row-size=89B cardinality=100
|
|--01:SCAN HDFS [functional.alltypestiny]
| HDFS partitions=4/4 files=4 size=460B
| predicates: 1 IS NULL OR functional.alltypestiny.int_col IS NULL OR functional.alltypestiny.int_col = 1
| limit: 1
| row-size=4B cardinality=1
|
00:SCAN HDFS [functional.alltypessmall]
HDFS partitions=4/4 files=4 size=6.32KB
row-size=89B cardinality=100
====
# Constant on LHS of IN, correlated subquery
select * from functional.alltypessmall a where
1 in (select int_col from functional.alltypestiny b where b.id = a.id)
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: a.id = b.id
| runtime filters: RF000 <- b.id
| row-size=89B cardinality=4
|
|--01:SCAN HDFS [functional.alltypestiny b]
| HDFS partitions=4/4 files=4 size=460B
| predicates: 1 = b.int_col
| row-size=8B cardinality=4
|
00:SCAN HDFS [functional.alltypessmall a]
HDFS partitions=4/4 files=4 size=6.32KB
runtime filters: RF000 -> a.id
row-size=89B cardinality=100
====
# Constant on LHS of IN, subquery with group by
select * from functional.alltypessmall where
1 in (select int_col from functional.alltypestiny group by int_col)
---- PLAN
PLAN-ROOT SINK
|
03:NESTED LOOP JOIN [LEFT SEMI JOIN]
| row-size=89B cardinality=100
|
|--02:AGGREGATE [FINALIZE]
| | group by: int_col
| | limit: 1
| | row-size=4B cardinality=2
| |
| 01:SCAN HDFS [functional.alltypestiny]
| HDFS partitions=4/4 files=4 size=460B
| predicates: 1 = functional.alltypestiny.int_col
| row-size=4B cardinality=4
|
00:SCAN HDFS [functional.alltypessmall]
HDFS partitions=4/4 files=4 size=6.32KB
row-size=89B cardinality=100
====
# Constant on LHS of NOT IN, subquery with group by
select * from functional.alltypessmall where
1 not in (select int_col from functional.alltypestiny group by int_col)
---- PLAN
PLAN-ROOT SINK
|
03:NESTED LOOP JOIN [LEFT ANTI JOIN]
| row-size=89B cardinality=100
|
|--02:AGGREGATE [FINALIZE]
| | group by: int_col
| | limit: 1
| | row-size=4B cardinality=1
| |
| 01:SCAN HDFS [functional.alltypestiny]
| HDFS partitions=4/4 files=4 size=460B
| predicates: 1 IS NULL OR functional.alltypestiny.int_col IS NULL OR functional.alltypestiny.int_col = 1
| row-size=4B cardinality=1
|
00:SCAN HDFS [functional.alltypessmall]
HDFS partitions=4/4 files=4 size=6.32KB
row-size=89B cardinality=100
====
# Constant on LHS of IN, subquery with aggregate
select * from functional.alltypessmall where
1 in (select max(int_col) from functional.alltypestiny)
---- PLAN
PLAN-ROOT SINK
|
03:NESTED LOOP JOIN [LEFT SEMI JOIN]
| row-size=89B cardinality=100
|
|--02:AGGREGATE [FINALIZE]
| | output: max(int_col)
| | having: 1 = max(int_col)
| | limit: 1
| | row-size=4B cardinality=1
| |
| 01:SCAN HDFS [functional.alltypestiny]
| HDFS partitions=4/4 files=4 size=460B
| row-size=4B cardinality=8
|
00:SCAN HDFS [functional.alltypessmall]
HDFS partitions=4/4 files=4 size=6.32KB
row-size=89B cardinality=100
====
# Constant on LHS of NOT IN, subquery with aggregate
select * from functional.alltypessmall where
1 not in (select max(int_col) from functional.alltypestiny)
---- PLAN
PLAN-ROOT SINK
|
03:NESTED LOOP JOIN [CROSS JOIN]
| row-size=93B cardinality=100
|
|--02:AGGREGATE [FINALIZE]
| | output: max(int_col)
| | having: 1 != max(int_col)
| | row-size=4B cardinality=1
| |
| 01:SCAN HDFS [functional.alltypestiny]
| HDFS partitions=4/4 files=4 size=460B
| row-size=4B cardinality=8
|
00:SCAN HDFS [functional.alltypessmall]
HDFS partitions=4/4 files=4 size=6.32KB
row-size=89B cardinality=100
====
# Constant on LHS of IN, subquery with limit
select * from functional.alltypessmall where
1 in (select int_col from functional.alltypestiny limit 1)
---- PLAN
PLAN-ROOT SINK
|
03:NESTED LOOP JOIN [LEFT SEMI JOIN]
| row-size=89B cardinality=100
|
|--02:SELECT
| | predicates: 1 = int_col
| | limit: 1
| | row-size=4B cardinality=1
| |
| 01:SCAN HDFS [functional.alltypestiny]
| HDFS partitions=4/4 files=4 size=460B
| limit: 1
| row-size=4B cardinality=1
|
00:SCAN HDFS [functional.alltypessmall]
HDFS partitions=4/4 files=4 size=6.32KB
row-size=89B cardinality=100
====
# Constant on LHS of NOT IN, subquery with limit
select * from functional.alltypessmall where
1 not in (select int_col from functional.alltypestiny limit 1)
---- PLAN
PLAN-ROOT SINK
|
03:NESTED LOOP JOIN [LEFT ANTI JOIN]
| row-size=89B cardinality=100
|
|--02:SELECT
| | predicates: 1 IS NULL OR int_col IS NULL OR int_col = 1
| | limit: 1
| | row-size=4B cardinality=1
| |
| 01:SCAN HDFS [functional.alltypestiny]
| HDFS partitions=4/4 files=4 size=460B
| limit: 1
| row-size=4B cardinality=1
|
00:SCAN HDFS [functional.alltypessmall]
HDFS partitions=4/4 files=4 size=6.32KB
row-size=89B cardinality=100
====
# Constant on LHS of IN for nested subqueries (no correlation)
select * from functional.alltypes t where 1 in
(select int_col from functional.tinyinttable where
1 in (select int_col from functional.alltypestiny))
---- PLAN
PLAN-ROOT SINK
|
04:NESTED LOOP JOIN [LEFT SEMI JOIN]
| row-size=89B cardinality=7.30K
|
|--03:NESTED LOOP JOIN [LEFT SEMI JOIN]
| | limit: 1
| | row-size=4B cardinality=unavailable
| |
| |--02:SCAN HDFS [functional.alltypestiny]
| | HDFS partitions=4/4 files=4 size=460B
| | predicates: 1 = functional.alltypestiny.int_col
| | limit: 1
| | row-size=4B cardinality=4
| |
| 01:SCAN HDFS [functional.tinyinttable]
| HDFS partitions=1/1 files=1 size=20B
| predicates: 1 = functional.tinyinttable.int_col
| row-size=4B cardinality=unavailable
|
00:SCAN HDFS [functional.alltypes t]
HDFS partitions=24/24 files=24 size=478.45KB
row-size=89B cardinality=7.30K
====
# Constant on LHS of IN for nested subqueries (correlation)
select * from functional.alltypes t where 1 in
(select int_col from functional.alltypessmall t where
bigint_col in (select bigint_col from functional.alltypestiny where id = t.id))
---- PLAN
PLAN-ROOT SINK
|
04:NESTED LOOP JOIN [LEFT SEMI JOIN]
| row-size=89B cardinality=7.30K
|
|--03:HASH JOIN [LEFT SEMI JOIN]
| | hash predicates: bigint_col = bigint_col, t.id = id
| | runtime filters: RF000 <- bigint_col, RF001 <- id
| | limit: 1
| | row-size=16B cardinality=2
| |
| |--02:SCAN HDFS [functional.alltypestiny]
| | HDFS partitions=4/4 files=4 size=460B
| | row-size=12B cardinality=8
| |
| 01:SCAN HDFS [functional.alltypessmall t]
| HDFS partitions=4/4 files=4 size=6.32KB
| predicates: 1 = t.int_col
| runtime filters: RF000 -> bigint_col, RF001 -> t.id
| row-size=16B cardinality=10
|
00:SCAN HDFS [functional.alltypes t]
HDFS partitions=24/24 files=24 size=478.45KB
row-size=89B cardinality=7.30K
====
# EXISTS subquery containing ORDER BY, LIMIT, and OFFSET (IMPALA-6934)
select count(*) from functional.alltypestiny t where exists
(select id from functional.alltypestiny where id < 5 order by id limit 10 offset 6)
---- PLAN
PLAN-ROOT SINK
|
04:AGGREGATE [FINALIZE]
| output: count(*)
| row-size=8B cardinality=1
|
03:NESTED LOOP JOIN [RIGHT SEMI JOIN]
| row-size=0B cardinality=8
|
|--00:SCAN HDFS [functional.alltypestiny t]
| HDFS partitions=4/4 files=4 size=460B
| row-size=0B cardinality=8
|
02:TOP-N [LIMIT=1 OFFSET=6]
| order by: id ASC
| row-size=4B cardinality=1
|
01:SCAN HDFS [functional.alltypestiny]
HDFS partitions=4/4 files=4 size=460B
predicates: id < 5
row-size=4B cardinality=1
====
# Subquery in binary predicate that needs cardinality check at runtime
select bigint_col from functional.alltypes where id =
(select id
from functional.alltypes where id = 1
)
---- PLAN
PLAN-ROOT SINK
|
03:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: id = id
| runtime filters: RF000 <- id
| row-size=12B cardinality=1
|
|--02:CARDINALITY CHECK
| | limit: 1
| | row-size=4B cardinality=1
| |
| 01:SCAN HDFS [functional.alltypes]
| HDFS partitions=24/24 files=24 size=478.45KB
| predicates: id = 1
| limit: 2
| row-size=4B cardinality=1
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
predicates: functional.alltypes.id = 1
runtime filters: RF000 -> id
row-size=12B cardinality=1
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
07:EXCHANGE [UNPARTITIONED]
|
03:HASH JOIN [LEFT SEMI JOIN, PARTITIONED]
| hash predicates: id = id
| runtime filters: RF000 <- id
| row-size=12B cardinality=1
|
|--06:EXCHANGE [HASH(id)]
| |
| 02:CARDINALITY CHECK
| | limit: 1
| | row-size=4B cardinality=1
| |
| 04:EXCHANGE [UNPARTITIONED]
| | limit: 2
| |
| 01:SCAN HDFS [functional.alltypes]
| HDFS partitions=24/24 files=24 size=478.45KB
| predicates: id = 1
| limit: 2
| row-size=4B cardinality=1
|
05:EXCHANGE [HASH(id)]
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
predicates: functional.alltypes.id = 1
runtime filters: RF000 -> id
row-size=12B cardinality=1
====
# Subquery in arithmetic expression that needs cardinality check at runtime
select bigint_col from functional.alltypes where id =
3 * (select id
from functional.alltypes where id = 1
)
---- PLAN
PLAN-ROOT SINK
|
03:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: id = 3 * id
| runtime filters: RF000 <- 3 * id
| row-size=12B cardinality=7.30K
|
|--02:CARDINALITY CHECK
| | limit: 1
| | row-size=4B cardinality=1
| |
| 01:SCAN HDFS [functional.alltypes]
| HDFS partitions=24/24 files=24 size=478.45KB
| predicates: id = 1
| limit: 2
| row-size=4B cardinality=1
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> id
row-size=12B cardinality=7.30K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
06:EXCHANGE [UNPARTITIONED]
|
03:HASH JOIN [LEFT SEMI JOIN, BROADCAST]
| hash predicates: id = 3 * id
| runtime filters: RF000 <- 3 * id
| row-size=12B cardinality=7.30K
|
|--05:EXCHANGE [BROADCAST]
| |
| 02:CARDINALITY CHECK
| | limit: 1
| | row-size=4B cardinality=1
| |
| 04:EXCHANGE [UNPARTITIONED]
| | limit: 2
| |
| 01:SCAN HDFS [functional.alltypes]
| HDFS partitions=24/24 files=24 size=478.45KB
| predicates: id = 1
| limit: 2
| row-size=4B cardinality=1
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> id
row-size=12B cardinality=7.30K
====
# Subquery that contains union and needs cardinality check at runtime
select * from functional.alltypes where id =
(select i from (select bigint_col as i from functional.alltypes
union
select smallint_col as i from functional.alltypes) t)
---- PLAN
PLAN-ROOT SINK
|
06:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: id = i
| runtime filters: RF000 <- i
| row-size=89B cardinality=1
|
|--05:CARDINALITY CHECK
| | limit: 1
| | row-size=8B cardinality=1
| |
| 04:AGGREGATE [FINALIZE]
| | group by: i
| | limit: 2
| | row-size=8B cardinality=2
| |
| 01:UNION
| | pass-through-operands: 02
| | row-size=8B cardinality=14.60K
| |
| |--03:SCAN HDFS [functional.alltypes]
| | HDFS partitions=24/24 files=24 size=478.45KB
| | row-size=2B cardinality=7.30K
| |
| 02:SCAN HDFS [functional.alltypes]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=8B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> id
row-size=89B cardinality=7.30K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
11:EXCHANGE [UNPARTITIONED]
|
06:HASH JOIN [LEFT SEMI JOIN, BROADCAST]
| hash predicates: id = i
| runtime filters: RF000 <- i
| row-size=89B cardinality=1
|
|--10:EXCHANGE [BROADCAST]
| |
| 05:CARDINALITY CHECK
| | limit: 1
| | row-size=8B cardinality=1
| |
| 09:EXCHANGE [UNPARTITIONED]
| | limit: 2
| |
| 08:AGGREGATE [FINALIZE]
| | group by: i
| | limit: 2
| | row-size=8B cardinality=2
| |
| 07:EXCHANGE [HASH(i)]
| |
| 04:AGGREGATE [STREAMING]
| | group by: i
| | limit: 2
| | row-size=8B cardinality=2
| |
| 01:UNION
| | pass-through-operands: 02
| | row-size=8B cardinality=14.60K
| |
| |--03:SCAN HDFS [functional.alltypes]
| | HDFS partitions=24/24 files=24 size=478.45KB
| | row-size=2B cardinality=7.30K
| |
| 02:SCAN HDFS [functional.alltypes]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=8B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> id
row-size=89B cardinality=7.30K
====
# Subquery that contains join and GROUP BY and needs cardinality check at runtime
select * from functional.alltypes where id =
(select max(allt.smallint_col) from functional.alltypes allt, functional.alltypesagg ata
where allt.id = ata.id and ata.month = 1 group by ata.month)
---- PLAN
PLAN-ROOT SINK
|
06:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: id = max(allt.smallint_col)
| runtime filters: RF000 <- max(allt.smallint_col)
| row-size=89B cardinality=1
|
|--05:CARDINALITY CHECK
| | limit: 1
| | row-size=6B cardinality=1
| |
| 04:AGGREGATE [FINALIZE]
| | output: max(allt.smallint_col)
| | group by: ata.`month`
| | limit: 2
| | row-size=6B cardinality=1
| |
| 03:HASH JOIN [INNER JOIN]
| | hash predicates: ata.id = allt.id
| | runtime filters: RF002 <- allt.id
| | row-size=14B cardinality=7.81K
| |
| |--01:SCAN HDFS [functional.alltypes allt]
| | HDFS partitions=24/24 files=24 size=478.45KB
| | row-size=6B cardinality=7.30K
| |
| 02:SCAN HDFS [functional.alltypesagg ata]
| partition predicates: ata.`month` = 1
| HDFS partitions=11/11 files=11 size=814.73KB
| runtime filters: RF002 -> ata.id
| row-size=8B cardinality=11.00K
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> id
row-size=89B cardinality=7.30K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
13:EXCHANGE [UNPARTITIONED]
|
06:HASH JOIN [LEFT SEMI JOIN, BROADCAST]
| hash predicates: id = max(allt.smallint_col)
| runtime filters: RF000 <- max(allt.smallint_col)
| row-size=89B cardinality=1
|
|--12:EXCHANGE [BROADCAST]
| |
| 05:CARDINALITY CHECK
| | limit: 1
| | row-size=6B cardinality=1
| |
| 11:EXCHANGE [UNPARTITIONED]
| | limit: 2
| |
| 10:AGGREGATE [FINALIZE]
| | output: max:merge(allt.smallint_col)
| | group by: ata.`month`
| | limit: 2
| | row-size=6B cardinality=1
| |
| 09:EXCHANGE [HASH(ata.`month`)]
| |
| 04:AGGREGATE [STREAMING]
| | output: max(allt.smallint_col)
| | group by: ata.`month`
| | row-size=6B cardinality=1
| |
| 03:HASH JOIN [INNER JOIN, PARTITIONED]
| | hash predicates: ata.id = allt.id
| | runtime filters: RF002 <- allt.id
| | row-size=14B cardinality=7.81K
| |
| |--08:EXCHANGE [HASH(allt.id)]
| | |
| | 01:SCAN HDFS [functional.alltypes allt]
| | HDFS partitions=24/24 files=24 size=478.45KB
| | row-size=6B cardinality=7.30K
| |
| 07:EXCHANGE [HASH(ata.id)]
| |
| 02:SCAN HDFS [functional.alltypesagg ata]
| partition predicates: ata.`month` = 1
| HDFS partitions=11/11 files=11 size=814.73KB
| runtime filters: RF002 -> ata.id
| row-size=8B cardinality=11.00K
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> id
row-size=89B cardinality=7.30K
====
# IS NULL predicate must not be pushed down to the scan node of the inline view.
select count(1) from functional.alltypes
where (select int_col from functional.alltypes) is null
---- PLAN
PLAN-ROOT SINK
|
05:AGGREGATE [FINALIZE]
| output: count(*)
| row-size=8B cardinality=1
|
04:NESTED LOOP JOIN [CROSS JOIN]
| row-size=4B cardinality=7.30K
|
|--00:SCAN HDFS [functional.alltypes]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=0B cardinality=7.30K
|
03:SELECT
| predicates: int_col IS NULL
| row-size=4B cardinality=1
|
02:CARDINALITY CHECK
| limit: 1
| row-size=4B cardinality=1
|
01:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
limit: 2
row-size=4B cardinality=2
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
05:AGGREGATE [FINALIZE]
| output: count(*)
| row-size=8B cardinality=1
|
04:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
| row-size=4B cardinality=7.30K
|
|--07:EXCHANGE [UNPARTITIONED]
| |
| 00:SCAN HDFS [functional.alltypes]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=0B cardinality=7.30K
|
03:SELECT
| predicates: int_col IS NULL
| row-size=4B cardinality=1
|
02:CARDINALITY CHECK
| limit: 1
| row-size=4B cardinality=1
|
06:EXCHANGE [UNPARTITIONED]
| limit: 2
|
01:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
limit: 2
row-size=4B cardinality=2
====
# Binary predicate with constant must not be pushed down
# to the scan node of the inline view.
select count(1) from functional.alltypes
where (select int_col from functional.alltypes) > 10
---- PLAN
PLAN-ROOT SINK
|
05:AGGREGATE [FINALIZE]
| output: count(*)
| row-size=8B cardinality=1
|
04:NESTED LOOP JOIN [CROSS JOIN]
| row-size=4B cardinality=7.30K
|
|--00:SCAN HDFS [functional.alltypes]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=0B cardinality=7.30K
|
03:SELECT
| predicates: int_col > 10
| row-size=4B cardinality=1
|
02:CARDINALITY CHECK
| limit: 1
| row-size=4B cardinality=1
|
01:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
limit: 2
row-size=4B cardinality=2
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
05:AGGREGATE [FINALIZE]
| output: count(*)
| row-size=8B cardinality=1
|
04:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
| row-size=4B cardinality=7.30K
|
|--07:EXCHANGE [UNPARTITIONED]
| |
| 00:SCAN HDFS [functional.alltypes]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=0B cardinality=7.30K
|
03:SELECT
| predicates: int_col > 10
| row-size=4B cardinality=1
|
02:CARDINALITY CHECK
| limit: 1
| row-size=4B cardinality=1
|
06:EXCHANGE [UNPARTITIONED]
| limit: 2
|
01:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
limit: 2
row-size=4B cardinality=2
====
# Runtime scalar subquery with offset.
select count(*) from functional.alltypes
where 7 = (select id from functional.alltypestiny
order by id limit 8 offset 7)
---- PLAN
PLAN-ROOT SINK
|
06:AGGREGATE [FINALIZE]
| output: count(*)
| row-size=8B cardinality=1
|
05:NESTED LOOP JOIN [CROSS JOIN]
| row-size=4B cardinality=7.30K
|
|--00:SCAN HDFS [functional.alltypes]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=0B cardinality=7.30K
|
04:SELECT
| predicates: id = 7
| row-size=4B cardinality=1
|
03:CARDINALITY CHECK
| limit: 1
| row-size=4B cardinality=1
|
02:TOP-N [LIMIT=2 OFFSET=7]
| order by: id ASC
| row-size=4B cardinality=2
|
01:SCAN HDFS [functional.alltypestiny]
HDFS partitions=4/4 files=4 size=460B
row-size=4B cardinality=8
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
06:AGGREGATE [FINALIZE]
| output: count(*)
| row-size=8B cardinality=1
|
05:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
| row-size=4B cardinality=7.30K
|
|--08:EXCHANGE [UNPARTITIONED]
| |
| 00:SCAN HDFS [functional.alltypes]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=0B cardinality=7.30K
|
04:SELECT
| predicates: id = 7
| row-size=4B cardinality=1
|
03:CARDINALITY CHECK
| limit: 1
| row-size=4B cardinality=1
|
07:MERGING-EXCHANGE [UNPARTITIONED]
| offset: 7
| order by: id ASC
| limit: 2
|
02:TOP-N [LIMIT=9]
| order by: id ASC
| row-size=4B cardinality=8
|
01:SCAN HDFS [functional.alltypestiny]
HDFS partitions=4/4 files=4 size=460B
row-size=4B cardinality=8
====
# IMPALA-7108: Select from an inline view that returns a single row.
select * from functional.alltypes
where int_col = (select f.id from (
select * from functional.alltypes order by 1 limit 1) f)
---- PLAN
PLAN-ROOT SINK
|
03:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: int_col = id
| runtime filters: RF000 <- id
| row-size=89B cardinality=730
|
|--02:TOP-N [LIMIT=1]
| | order by: id ASC
| | row-size=4B cardinality=1
| |
| 01:SCAN HDFS [functional.alltypes]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=4B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> int_col
row-size=89B cardinality=7.30K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
06:EXCHANGE [UNPARTITIONED]
|
03:HASH JOIN [LEFT SEMI JOIN, BROADCAST]
| hash predicates: int_col = id
| runtime filters: RF000 <- id
| row-size=89B cardinality=730
|
|--05:EXCHANGE [BROADCAST]
| |
| 04:MERGING-EXCHANGE [UNPARTITIONED]
| | order by: id ASC
| | limit: 1
| |
| 02:TOP-N [LIMIT=1]
| | order by: id ASC
| | row-size=4B cardinality=1
| |
| 01:SCAN HDFS [functional.alltypes]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=4B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> int_col
row-size=89B cardinality=7.30K
====
# Scalar subquery in select list
SELECT alltypesagg.string_col, alltypesagg.date_string_col, dimtbl.name,
SUM(CASE WHEN dimtbl.zip < 94613 THEN 1 ELSE 0 END) / CAST((SELECT COUNT(*) FROM functional.dimtbl WHERE dimtbl.zip < 94613) AS decimal(10, 6)) AS close,
SUM(CASE WHEN dimtbl.zip >= 94613 THEN 1 ELSE 0 END) / CAST((SELECT COUNT(*) FROM functional.dimtbl WHERE dimtbl.zip >= 94613) AS decimal(10, 6)) AS remote
FROM functional.alltypesagg JOIN functional.dimtbl ON (alltypesagg.id = dimtbl.id)
GROUP BY 1, 2, 3
ORDER BY alltypesagg.string_col DESC
---- PLAN
PLAN-ROOT SINK
|
10:SORT
| order by: alltypesagg.string_col DESC
| row-size=84B cardinality=11
|
09:AGGREGATE [FINALIZE]
| output: sum(CASE WHEN dimtbl.zip < 94613 THEN 1 ELSE 0 END), max(count(*)), sum(CASE WHEN dimtbl.zip >= 94613 THEN 1 ELSE 0 END), max(count(*))
| group by: alltypesagg.string_col, alltypesagg.date_string_col, dimtbl.name
| row-size=84B cardinality=11
|
08:NESTED LOOP JOIN [CROSS JOIN]
| row-size=84B cardinality=11
|
|--05:AGGREGATE [FINALIZE]
| | output: count(*)
| | row-size=8B cardinality=1
| |
| 04:SCAN HDFS [functional.dimtbl]
| HDFS partitions=1/1 files=1 size=171B
| predicates: dimtbl.zip >= 94613
| row-size=4B cardinality=1
|
07:NESTED LOOP JOIN [CROSS JOIN]
| row-size=76B cardinality=11
|
|--03:AGGREGATE [FINALIZE]
| | output: count(*)
| | row-size=8B cardinality=1
| |
| 02:SCAN HDFS [functional.dimtbl]
| HDFS partitions=1/1 files=1 size=171B
| predicates: dimtbl.zip < 94613
| row-size=4B cardinality=1
|
06:HASH JOIN [INNER JOIN]
| hash predicates: alltypesagg.id = dimtbl.id
| runtime filters: RF000 <- dimtbl.id
| row-size=68B cardinality=11
|
|--01:SCAN HDFS [functional.dimtbl]
| HDFS partitions=1/1 files=1 size=171B
| row-size=29B cardinality=10
|
00:SCAN HDFS [functional.alltypesagg]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> alltypesagg.id
row-size=39B cardinality=11.00K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
20:MERGING-EXCHANGE [UNPARTITIONED]
| order by: alltypesagg.string_col DESC
|
10:SORT
| order by: alltypesagg.string_col DESC
| row-size=84B cardinality=11
|
19:AGGREGATE [FINALIZE]
| output: sum:merge(CASE WHEN dimtbl.zip < 94613 THEN 1 ELSE 0 END), max:merge(`$a$1`.`$c$1`), sum:merge(CASE WHEN dimtbl.zip >= 94613 THEN 1 ELSE 0 END), max:merge(`$a$2`.`$c$1`)
| group by: alltypesagg.string_col, alltypesagg.date_string_col, dimtbl.name
| row-size=84B cardinality=11
|
18:EXCHANGE [HASH(alltypesagg.string_col,alltypesagg.date_string_col,dimtbl.name)]
|
09:AGGREGATE [STREAMING]
| output: sum(CASE WHEN dimtbl.zip < 94613 THEN 1 ELSE 0 END), max(count(*)), sum(CASE WHEN dimtbl.zip >= 94613 THEN 1 ELSE 0 END), max(count(*))
| group by: alltypesagg.string_col, alltypesagg.date_string_col, dimtbl.name
| row-size=84B cardinality=11
|
08:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
| row-size=84B cardinality=11
|
|--17:EXCHANGE [BROADCAST]
| |
| 16:AGGREGATE [FINALIZE]
| | output: count:merge(*)
| | row-size=8B cardinality=1
| |
| 15:EXCHANGE [UNPARTITIONED]
| |
| 05:AGGREGATE
| | output: count(*)
| | row-size=8B cardinality=1
| |
| 04:SCAN HDFS [functional.dimtbl]
| HDFS partitions=1/1 files=1 size=171B
| predicates: dimtbl.zip >= 94613
| row-size=4B cardinality=1
|
07:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
| row-size=76B cardinality=11
|
|--14:EXCHANGE [BROADCAST]
| |
| 13:AGGREGATE [FINALIZE]
| | output: count:merge(*)
| | row-size=8B cardinality=1
| |
| 12:EXCHANGE [UNPARTITIONED]
| |
| 03:AGGREGATE
| | output: count(*)
| | row-size=8B cardinality=1
| |
| 02:SCAN HDFS [functional.dimtbl]
| HDFS partitions=1/1 files=1 size=171B
| predicates: dimtbl.zip < 94613
| row-size=4B cardinality=1
|
06:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: alltypesagg.id = dimtbl.id
| runtime filters: RF000 <- dimtbl.id
| row-size=68B cardinality=11
|
|--11:EXCHANGE [BROADCAST]
| |
| 01:SCAN HDFS [functional.dimtbl]
| HDFS partitions=1/1 files=1 size=171B
| row-size=29B cardinality=10
|
00:SCAN HDFS [functional.alltypesagg]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> alltypesagg.id
row-size=39B cardinality=11.00K
====
# IMPALA-9905 Cardinality Check for subqueries with grouping expressions and
# scalar return types
select bigint_col from functional.alltypes where int_col >=
3 * (select max(int_col)
from functional.alltypestiny where id = 1 group by id
)
---- PLAN
PLAN-ROOT SINK
|
04:NESTED LOOP JOIN [INNER JOIN]
| predicates: int_col >= 3 * max(int_col)
| row-size=20B cardinality=7.30K
|
|--03:CARDINALITY CHECK
| | limit: 1
| | row-size=8B cardinality=1
| |
| 02:AGGREGATE [FINALIZE]
| | output: max(int_col)
| | group by: id
| | limit: 2
| | row-size=8B cardinality=1
| |
| 01:SCAN HDFS [functional.alltypestiny]
| HDFS partitions=4/4 files=4 size=460B
| predicates: id = 1
| row-size=8B cardinality=1
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
row-size=12B cardinality=7.30K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
09:EXCHANGE [UNPARTITIONED]
|
04:NESTED LOOP JOIN [INNER JOIN, BROADCAST]
| predicates: int_col >= 3 * max(int_col)
| row-size=20B cardinality=7.30K
|
|--08:EXCHANGE [BROADCAST]
| |
| 03:CARDINALITY CHECK
| | limit: 1
| | row-size=8B cardinality=1
| |
| 07:EXCHANGE [UNPARTITIONED]
| | limit: 2
| |
| 06:AGGREGATE [FINALIZE]
| | output: max:merge(int_col)
| | group by: id
| | limit: 2
| | row-size=8B cardinality=1
| |
| 05:EXCHANGE [HASH(id)]
| |
| 02:AGGREGATE [STREAMING]
| | output: max(int_col)
| | group by: id
| | row-size=8B cardinality=1
| |
| 01:SCAN HDFS [functional.alltypestiny]
| HDFS partitions=4/4 files=4 size=460B
| predicates: id = 1
| row-size=8B cardinality=1
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
row-size=12B cardinality=7.30K
====
# Basic IN subquery in disjunct.
select * from functional.alltypes t
where t.id IN (select id from functional.alltypesagg) or t.bool_col = false
---- PLAN
PLAN-ROOT SINK
|
03:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: t.id = id
| other predicates: id IS NOT NULL OR t.bool_col = FALSE
| row-size=93B cardinality=7.81K
|
|--02:AGGREGATE [FINALIZE]
| | group by: id
| | row-size=4B cardinality=10.28K
| |
| 01:SCAN HDFS [functional.alltypesagg]
| HDFS partitions=11/11 files=11 size=814.73KB
| row-size=4B cardinality=11.00K
|
00:SCAN HDFS [functional.alltypes t]
HDFS partitions=24/24 files=24 size=478.45KB
row-size=89B cardinality=7.30K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
07:EXCHANGE [UNPARTITIONED]
|
03:HASH JOIN [LEFT OUTER JOIN, BROADCAST]
| hash predicates: t.id = id
| other predicates: id IS NOT NULL OR t.bool_col = FALSE
| row-size=93B cardinality=7.81K
|
|--06:EXCHANGE [BROADCAST]
| |
| 05:AGGREGATE [FINALIZE]
| | group by: id
| | row-size=4B cardinality=10.28K
| |
| 04:EXCHANGE [HASH(id)]
| |
| 02:AGGREGATE [STREAMING]
| | group by: id
| | row-size=4B cardinality=10.28K
| |
| 01:SCAN HDFS [functional.alltypesagg]
| HDFS partitions=11/11 files=11 size=814.73KB
| row-size=4B cardinality=11.00K
|
00:SCAN HDFS [functional.alltypes t]
HDFS partitions=24/24 files=24 size=478.45KB
row-size=89B cardinality=7.30K
====
# IN subquery with unnesting of array in disjunct.
select id from functional_parquet.complextypestbl t
where id in (7, 9, 10) or
id + 1 IN (select item from t.int_array a where t.nested_struct.a < a.item)
---- PLAN
PLAN-ROOT SINK
|
01:SUBPLAN
| row-size=28B cardinality=unavailable
|
|--05:NESTED LOOP JOIN [RIGHT OUTER JOIN]
| | join predicates: t.nested_struct.a < item, item = id + 1
| | predicates: id IN (7, 9, 10) OR item IS NOT NULL
| | row-size=28B cardinality=10
| |
| |--02:SINGULAR ROW SRC
| | row-size=24B cardinality=1
| |
| 04:AGGREGATE [FINALIZE]
| | group by: item
| | row-size=4B cardinality=10
| |
| 03:UNNEST [t.int_array a]
| row-size=0B cardinality=10
|
00:SCAN HDFS [functional_parquet.complextypestbl t]
HDFS partitions=1/1 files=2 size=6.92KB
row-size=24B cardinality=unavailable
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
06:EXCHANGE [UNPARTITIONED]
|
01:SUBPLAN
| row-size=28B cardinality=unavailable
|
|--05:NESTED LOOP JOIN [RIGHT OUTER JOIN]
| | join predicates: t.nested_struct.a < item, item = id + 1
| | predicates: id IN (7, 9, 10) OR item IS NOT NULL
| | row-size=28B cardinality=10
| |
| |--02:SINGULAR ROW SRC
| | row-size=24B cardinality=1
| |
| 04:AGGREGATE [FINALIZE]
| | group by: item
| | row-size=4B cardinality=10
| |
| 03:UNNEST [t.int_array a]
| row-size=0B cardinality=10
|
00:SCAN HDFS [functional_parquet.complextypestbl t]
HDFS partitions=1/1 files=2 size=6.92KB
row-size=24B cardinality=unavailable
====
# IN scalar subquery in disjunct.
select * from functional.alltypes
where int_col > 10 or
string_col in (
select min(string_col) from functional.alltypestiny
having min(string_col) > 'abc')
---- PLAN
PLAN-ROOT SINK
|
03:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: string_col = min(string_col)
| other predicates: int_col > 10 OR min(string_col) IS NOT NULL
| row-size=101B cardinality=7.30K
|
|--02:AGGREGATE [FINALIZE]
| | output: min(string_col)
| | having: min(string_col) > 'abc'
| | row-size=12B cardinality=1
| |
| 01:SCAN HDFS [functional.alltypestiny]
| HDFS partitions=4/4 files=4 size=460B
| row-size=13B cardinality=8
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
row-size=89B cardinality=7.30K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
07:EXCHANGE [UNPARTITIONED]
|
03:HASH JOIN [LEFT OUTER JOIN, BROADCAST]
| hash predicates: string_col = min(string_col)
| other predicates: int_col > 10 OR min(string_col) IS NOT NULL
| row-size=101B cardinality=7.30K
|
|--06:EXCHANGE [BROADCAST]
| |
| 05:AGGREGATE [FINALIZE]
| | output: min:merge(string_col)
| | having: min(string_col) > 'abc'
| | row-size=12B cardinality=1
| |
| 04:EXCHANGE [UNPARTITIONED]
| |
| 02:AGGREGATE
| | output: min(string_col)
| | row-size=12B cardinality=1
| |
| 01:SCAN HDFS [functional.alltypestiny]
| HDFS partitions=4/4 files=4 size=460B
| row-size=13B cardinality=8
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
row-size=89B cardinality=7.30K
====
# Comparator-based subquery in disjunction.
select * from functional.alltypes t
where t.id = (select min(id) from functional.alltypesagg g) or t.id = 10
---- PLAN
PLAN-ROOT SINK
|
03:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: t.id = min(id)
| other predicates: min(id) IS NOT NULL OR t.id = 10
| row-size=93B cardinality=7.30K
|
|--02:AGGREGATE [FINALIZE]
| | output: min(id)
| | row-size=4B cardinality=1
| |
| 01:SCAN HDFS [functional.alltypesagg g]
| HDFS partitions=11/11 files=11 size=814.73KB
| row-size=4B cardinality=11.00K
|
00:SCAN HDFS [functional.alltypes t]
HDFS partitions=24/24 files=24 size=478.45KB
row-size=89B cardinality=7.30K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
07:EXCHANGE [UNPARTITIONED]
|
03:HASH JOIN [LEFT OUTER JOIN, BROADCAST]
| hash predicates: t.id = min(id)
| other predicates: min(id) IS NOT NULL OR t.id = 10
| row-size=93B cardinality=7.30K
|
|--06:EXCHANGE [BROADCAST]
| |
| 05:AGGREGATE [FINALIZE]
| | output: min:merge(id)
| | row-size=4B cardinality=1
| |
| 04:EXCHANGE [UNPARTITIONED]
| |
| 02:AGGREGATE
| | output: min(id)
| | row-size=4B cardinality=1
| |
| 01:SCAN HDFS [functional.alltypesagg g]
| HDFS partitions=11/11 files=11 size=814.73KB
| row-size=4B cardinality=11.00K
|
00:SCAN HDFS [functional.alltypes t]
HDFS partitions=24/24 files=24 size=478.45KB
row-size=89B cardinality=7.30K
====
# Comparator-based subquery with correlated predicate in disjunction.
select * from functional.alltypes t
where int_col % 2 = 0 or
t.id <= (select min(id) from functional.alltypesagg g where t.int_col = g.int_col)
---- PLAN
PLAN-ROOT SINK
|
03:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: t.int_col = g.int_col
| other join predicates: t.id <= min(id)
| other predicates: int_col % 2 = 0 OR min(id) IS NOT NULL
| row-size=97B cardinality=83.91K
|
|--02:AGGREGATE [FINALIZE]
| | output: min(id)
| | group by: g.int_col
| | row-size=8B cardinality=957
| |
| 01:SCAN HDFS [functional.alltypesagg g]
| HDFS partitions=11/11 files=11 size=814.73KB
| row-size=8B cardinality=11.00K
|
00:SCAN HDFS [functional.alltypes t]
HDFS partitions=24/24 files=24 size=478.45KB
row-size=89B cardinality=7.30K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
07:EXCHANGE [UNPARTITIONED]
|
03:HASH JOIN [LEFT OUTER JOIN, BROADCAST]
| hash predicates: t.int_col = g.int_col
| other join predicates: t.id <= min(id)
| other predicates: int_col % 2 = 0 OR min(id) IS NOT NULL
| row-size=97B cardinality=83.91K
|
|--06:EXCHANGE [BROADCAST]
| |
| 05:AGGREGATE [FINALIZE]
| | output: min:merge(id)
| | group by: g.int_col
| | row-size=8B cardinality=957
| |
| 04:EXCHANGE [HASH(g.int_col)]
| |
| 02:AGGREGATE [STREAMING]
| | output: min(id)
| | group by: g.int_col
| | row-size=8B cardinality=957
| |
| 01:SCAN HDFS [functional.alltypesagg g]
| HDFS partitions=11/11 files=11 size=814.73KB
| row-size=8B cardinality=11.00K
|
00:SCAN HDFS [functional.alltypes t]
HDFS partitions=24/24 files=24 size=478.45KB
row-size=89B cardinality=7.30K
====
# Subquery nested in a more complex expression that has an OR at the top level.
# The subquery select lists includes an alias to test alias handling.
select * from functional.alltypes
where int_col > 10 or (
string_col in ('2', '4') and int_col > 0 and string_col in (
select min(string_col) the_min from functional.alltypestiny))
---- PLAN
PLAN-ROOT SINK
|
03:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: string_col = min(string_col)
| other predicates: int_col > 10 OR (string_col IN ('2', '4') AND int_col > 0 AND min(string_col) IS NOT NULL)
| row-size=101B cardinality=7.30K
|
|--02:AGGREGATE [FINALIZE]
| | output: min(string_col)
| | row-size=12B cardinality=1
| |
| 01:SCAN HDFS [functional.alltypestiny]
| HDFS partitions=4/4 files=4 size=460B
| row-size=13B cardinality=8
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
row-size=89B cardinality=7.30K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
07:EXCHANGE [UNPARTITIONED]
|
03:HASH JOIN [LEFT OUTER JOIN, BROADCAST]
| hash predicates: string_col = min(string_col)
| other predicates: int_col > 10 OR (string_col IN ('2', '4') AND int_col > 0 AND min(string_col) IS NOT NULL)
| row-size=101B cardinality=7.30K
|
|--06:EXCHANGE [BROADCAST]
| |
| 05:AGGREGATE [FINALIZE]
| | output: min:merge(string_col)
| | row-size=12B cardinality=1
| |
| 04:EXCHANGE [UNPARTITIONED]
| |
| 02:AGGREGATE
| | output: min(string_col)
| | row-size=12B cardinality=1
| |
| 01:SCAN HDFS [functional.alltypestiny]
| HDFS partitions=4/4 files=4 size=460B
| row-size=13B cardinality=8
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
row-size=89B cardinality=7.30K
====
# Subquery in OR in having clause.
select year, month, min(id)
from functional.alltypes
group by year, month
having (year = 2010 and month > 11) or
min(id) in (select int_col from functional.alltypestiny)
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: min(id) = int_col
| other predicates: (`year` = 2010 AND `month` > 11) OR int_col IS NOT NULL
| row-size=16B cardinality=24
|
|--03:AGGREGATE [FINALIZE]
| | group by: int_col
| | row-size=4B cardinality=2
| |
| 02:SCAN HDFS [functional.alltypestiny]
| HDFS partitions=4/4 files=4 size=460B
| row-size=4B cardinality=8
|
01:AGGREGATE [FINALIZE]
| output: min(id)
| group by: `year`, `month`
| row-size=12B cardinality=24
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
row-size=12B cardinality=7.30K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
10:EXCHANGE [UNPARTITIONED]
|
04:HASH JOIN [LEFT OUTER JOIN, BROADCAST]
| hash predicates: min(id) = int_col
| other predicates: (`year` = 2010 AND `month` > 11) OR int_col IS NOT NULL
| row-size=16B cardinality=24
|
|--09:EXCHANGE [BROADCAST]
| |
| 08:AGGREGATE [FINALIZE]
| | group by: int_col
| | row-size=4B cardinality=2
| |
| 07:EXCHANGE [HASH(int_col)]
| |
| 03:AGGREGATE [STREAMING]
| | group by: int_col
| | row-size=4B cardinality=2
| |
| 02:SCAN HDFS [functional.alltypestiny]
| HDFS partitions=4/4 files=4 size=460B
| row-size=4B cardinality=8
|
06:AGGREGATE [FINALIZE]
| output: min:merge(id)
| group by: `year`, `month`
| row-size=12B cardinality=24
|
05:EXCHANGE [HASH(`year`,`month`)]
|
01:AGGREGATE [STREAMING]
| output: min(id)
| group by: `year`, `month`
| row-size=12B cardinality=24
|
00:SCAN HDFS [functional.alltypes]
HDFS partitions=24/24 files=24 size=478.45KB
row-size=12B cardinality=7.30K
====
# Multiple conjuncts with subqueries with OR.
select id from functional.alltypes t
where (id % 1234 = 0 or
t.id <= (select min(id) from functional.alltypesagg g where t.int_col = g.int_col)) and
(id % 4321 = 0 or
t.id >= (select max(id) from functional.alltypesagg g where t.int_col = g.int_col))
---- PLAN
PLAN-ROOT SINK
|
06:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: t.int_col = g.int_col
| other join predicates: t.id >= max(id)
| other predicates: (id % 4321 = 0 OR max(id) IS NOT NULL)
| row-size=24B cardinality=964.46K
|
|--04:AGGREGATE [FINALIZE]
| | output: max(id)
| | group by: g.int_col
| | row-size=8B cardinality=957
| |
| 03:SCAN HDFS [functional.alltypesagg g]
| HDFS partitions=11/11 files=11 size=814.73KB
| row-size=8B cardinality=11.00K
|
05:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: t.int_col = g.int_col
| other join predicates: t.id <= min(id)
| other predicates: (id % 1234 = 0 OR min(id) IS NOT NULL)
| row-size=16B cardinality=83.91K
|
|--02:AGGREGATE [FINALIZE]
| | output: min(id)
| | group by: g.int_col
| | row-size=8B cardinality=957
| |
| 01:SCAN HDFS [functional.alltypesagg g]
| HDFS partitions=11/11 files=11 size=814.73KB
| row-size=8B cardinality=11.00K
|
00:SCAN HDFS [functional.alltypes t]
HDFS partitions=24/24 files=24 size=478.45KB
row-size=8B cardinality=7.30K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
13:EXCHANGE [UNPARTITIONED]
|
06:HASH JOIN [LEFT OUTER JOIN, BROADCAST]
| hash predicates: t.int_col = g.int_col
| other join predicates: t.id >= max(id)
| other predicates: (id % 4321 = 0 OR max(id) IS NOT NULL)
| row-size=24B cardinality=964.46K
|
|--12:EXCHANGE [BROADCAST]
| |
| 11:AGGREGATE [FINALIZE]
| | output: max:merge(id)
| | group by: g.int_col
| | row-size=8B cardinality=957
| |
| 10:EXCHANGE [HASH(g.int_col)]
| |
| 04:AGGREGATE [STREAMING]
| | output: max(id)
| | group by: g.int_col
| | row-size=8B cardinality=957
| |
| 03:SCAN HDFS [functional.alltypesagg g]
| HDFS partitions=11/11 files=11 size=814.73KB
| row-size=8B cardinality=11.00K
|
05:HASH JOIN [LEFT OUTER JOIN, BROADCAST]
| hash predicates: t.int_col = g.int_col
| other join predicates: t.id <= min(id)
| other predicates: (id % 1234 = 0 OR min(id) IS NOT NULL)
| row-size=16B cardinality=83.91K
|
|--09:EXCHANGE [BROADCAST]
| |
| 08:AGGREGATE [FINALIZE]
| | output: min:merge(id)
| | group by: g.int_col
| | row-size=8B cardinality=957
| |
| 07:EXCHANGE [HASH(g.int_col)]
| |
| 02:AGGREGATE [STREAMING]
| | output: min(id)
| | group by: g.int_col
| | row-size=8B cardinality=957
| |
| 01:SCAN HDFS [functional.alltypesagg g]
| HDFS partitions=11/11 files=11 size=814.73KB
| row-size=8B cardinality=11.00K
|
00:SCAN HDFS [functional.alltypes t]
HDFS partitions=24/24 files=24 size=478.45KB
row-size=8B cardinality=7.30K
====
# Correlated subquery in BETWEEN in OR rewritten to LOJ.
select int_col from functional.alltypes t
where (t.int_col is null or
(t.int_col between
(select min(int_col) from functional.alltypesagg t2 where t.year = t2.year)
and 2))
---- PLAN
PLAN-ROOT SINK
|
03:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: t.`year` = t2.`year`
| other join predicates: t.int_col >= min(int_col)
| other predicates: (t.int_col IS NULL OR min(int_col) IS NOT NULL AND t.int_col <= 2)
| row-size=16B cardinality=7.30K
|
|--02:AGGREGATE [FINALIZE]
| | output: min(int_col)
| | group by: t2.`year`
| | row-size=8B cardinality=1
| |
| 01:SCAN HDFS [functional.alltypesagg t2]
| HDFS partitions=11/11 files=11 size=814.73KB
| row-size=8B cardinality=11.00K
|
00:SCAN HDFS [functional.alltypes t]
HDFS partitions=24/24 files=24 size=478.45KB
row-size=8B cardinality=7.30K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
07:EXCHANGE [UNPARTITIONED]
|
03:HASH JOIN [LEFT OUTER JOIN, BROADCAST]
| hash predicates: t.`year` = t2.`year`
| other join predicates: t.int_col >= min(int_col)
| other predicates: (t.int_col IS NULL OR min(int_col) IS NOT NULL AND t.int_col <= 2)
| row-size=16B cardinality=7.30K
|
|--06:EXCHANGE [BROADCAST]
| |
| 05:AGGREGATE [FINALIZE]
| | output: min:merge(int_col)
| | group by: t2.`year`
| | row-size=8B cardinality=1
| |
| 04:EXCHANGE [HASH(t2.`year`)]
| |
| 02:AGGREGATE [STREAMING]
| | output: min(int_col)
| | group by: t2.`year`
| | row-size=8B cardinality=1
| |
| 01:SCAN HDFS [functional.alltypesagg t2]
| HDFS partitions=11/11 files=11 size=814.73KB
| row-size=8B cardinality=11.00K
|
00:SCAN HDFS [functional.alltypes t]
HDFS partitions=24/24 files=24 size=478.45KB
row-size=8B cardinality=7.30K
====
# Uncorrelated subquery in BETWEEN in OR rewritten to INNER JOIN.
select int_col from functional.alltypes t
where (t.int_col is null or
(t.int_col between
(select min(int_col) from functional.alltypesagg t2)
and 2))
---- PLAN
PLAN-ROOT SINK
|
03:NESTED LOOP JOIN [INNER JOIN]
| predicates: t.int_col >= min(int_col), (t.int_col IS NULL OR min(int_col) IS NOT NULL AND t.int_col <= 2)
| row-size=8B cardinality=7.30K
|
|--02:AGGREGATE [FINALIZE]
| | output: min(int_col)
| | row-size=4B cardinality=1
| |
| 01:SCAN HDFS [functional.alltypesagg t2]
| HDFS partitions=11/11 files=11 size=814.73KB
| row-size=4B cardinality=11.00K
|
00:SCAN HDFS [functional.alltypes t]
HDFS partitions=24/24 files=24 size=478.45KB
row-size=4B cardinality=7.30K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
07:EXCHANGE [UNPARTITIONED]
|
03:NESTED LOOP JOIN [INNER JOIN, BROADCAST]
| predicates: t.int_col >= min(int_col), (t.int_col IS NULL OR min(int_col) IS NOT NULL AND t.int_col <= 2)
| row-size=8B cardinality=7.30K
|
|--06:EXCHANGE [BROADCAST]
| |
| 05:AGGREGATE [FINALIZE]
| | output: min:merge(int_col)
| | row-size=4B cardinality=1
| |
| 04:EXCHANGE [UNPARTITIONED]
| |
| 02:AGGREGATE
| | output: min(int_col)
| | row-size=4B cardinality=1
| |
| 01:SCAN HDFS [functional.alltypesagg t2]
| HDFS partitions=11/11 files=11 size=814.73KB
| row-size=4B cardinality=11.00K
|
00:SCAN HDFS [functional.alltypes t]
HDFS partitions=24/24 files=24 size=478.45KB
row-size=4B cardinality=7.30K
====
# LIKE predicate with subquerycan be rewritten into LEFT OUTER JOIN
select * from functional.alltypes t1
where id = 2 or string_col like (
select min(string_col)
from functional.alltypesagg t2
where t1.int_col = t2.int_col)
---- PLAN
PLAN-ROOT SINK
|
03:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: t1.int_col = t2.int_col
| other join predicates: string_col LIKE min(string_col)
| other predicates: id = 2 OR min(string_col) IS NOT NULL
| row-size=105B cardinality=83.91K
|
|--02:AGGREGATE [FINALIZE]
| | output: min(string_col)
| | group by: t2.int_col
| | row-size=16B cardinality=957
| |
| 01:SCAN HDFS [functional.alltypesagg t2]
| HDFS partitions=11/11 files=11 size=814.73KB
| row-size=19B cardinality=11.00K
|
00:SCAN HDFS [functional.alltypes t1]
HDFS partitions=24/24 files=24 size=478.45KB
row-size=89B cardinality=7.30K
====
# IN Subquery inside function call (NVL) can be rewritten into LEFT OUTER JOIN
select * from functional.alltypes t
where nvl(int_col in (select id from functional.alltypesagg), false) or id = 2
---- PLAN
PLAN-ROOT SINK
|
03:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: int_col = id
| other predicates: nvl(id IS NOT NULL, FALSE) OR id = 2
| row-size=93B cardinality=7.81K
|
|--02:AGGREGATE [FINALIZE]
| | group by: id
| | row-size=4B cardinality=10.28K
| |
| 01:SCAN HDFS [functional.alltypesagg]
| HDFS partitions=11/11 files=11 size=814.73KB
| row-size=4B cardinality=11.00K
|
00:SCAN HDFS [functional.alltypes t]
HDFS partitions=24/24 files=24 size=478.45KB
row-size=89B cardinality=7.30K
====
# IN Subquery inside case expr can be rewritten into LEFT OUTER JOIN
select * from functional.alltypes t
where bigint_col = 2 or
case when bigint_col % 2 = 0
then int_col in (select id from functional.alltypesagg)
end
---- PLAN
PLAN-ROOT SINK
|
03:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: int_col = id
| other predicates: bigint_col = 2 OR CASE WHEN bigint_col % 2 = 0 THEN id IS NOT NULL END
| row-size=93B cardinality=7.81K
|
|--02:AGGREGATE [FINALIZE]
| | group by: id
| | row-size=4B cardinality=10.28K
| |
| 01:SCAN HDFS [functional.alltypesagg]
| HDFS partitions=11/11 files=11 size=814.73KB
| row-size=4B cardinality=11.00K
|
00:SCAN HDFS [functional.alltypes t]
HDFS partitions=24/24 files=24 size=478.45KB
row-size=89B cardinality=7.30K
====
# IMPALA-9949: uncorrelated subquery that can return 0 rows must be rewritten as a
# LEFT OUTER JOIN. LIMIT 0 queries obviously return 0 rows.
select id, (select min(int_col) from functional.alltypes limit 0)
from functional.alltypestiny
---- PLAN
PLAN-ROOT SINK
|
02:NESTED LOOP JOIN [LEFT OUTER JOIN]
| row-size=8B cardinality=8
|
|--01:EMPTYSET
|
00:SCAN HDFS [functional.alltypestiny]
HDFS partitions=4/4 files=4 size=460B
row-size=4B cardinality=8
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
04:EXCHANGE [UNPARTITIONED]
|
02:NESTED LOOP JOIN [LEFT OUTER JOIN, BROADCAST]
| row-size=8B cardinality=8
|
|--03:EXCHANGE [BROADCAST]
| |
| 01:EMPTYSET
|
00:SCAN HDFS [functional.alltypestiny]
HDFS partitions=4/4 files=4 size=460B
row-size=4B cardinality=8
====
# An aggregation with LIMIT 1 returns exactly one row so can be rewritten as a CROSS JOIN.
select id, (select min(int_col) from functional.alltypes limit 1)
from functional.alltypestiny
---- PLAN
PLAN-ROOT SINK
|
03:NESTED LOOP JOIN [CROSS JOIN]
| row-size=8B cardinality=8
|
|--02:AGGREGATE [FINALIZE]
| | output: min(int_col)
| | limit: 1
| | row-size=4B cardinality=1
| |
| 01:SCAN HDFS [functional.alltypes]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=4B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypestiny]
HDFS partitions=4/4 files=4 size=460B
row-size=4B cardinality=8
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
07:EXCHANGE [UNPARTITIONED]
|
03:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
| row-size=8B cardinality=8
|
|--06:EXCHANGE [BROADCAST]
| |
| 05:AGGREGATE [FINALIZE]
| | output: min:merge(int_col)
| | limit: 1
| | row-size=4B cardinality=1
| |
| 04:EXCHANGE [UNPARTITIONED]
| |
| 02:AGGREGATE
| | output: min(int_col)
| | row-size=4B cardinality=1
| |
| 01:SCAN HDFS [functional.alltypes]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=4B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypestiny]
HDFS partitions=4/4 files=4 size=460B
row-size=4B cardinality=8
====
# IMPALA-9949: uncorrelated subquery that can return 0 rows must be rewritten as a
# LEFT OUTER JOIN. The HAVING clause can eliminate the single row returned from
# the aggregation.
select id, (select min(int_col) from functional.alltypes having min(int_col) > 10)
from functional.alltypestiny
---- PLAN
PLAN-ROOT SINK
|
03:NESTED LOOP JOIN [LEFT OUTER JOIN]
| row-size=8B cardinality=8
|
|--02:AGGREGATE [FINALIZE]
| | output: min(int_col)
| | having: min(int_col) > 10
| | row-size=4B cardinality=1
| |
| 01:SCAN HDFS [functional.alltypes]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=4B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypestiny]
HDFS partitions=4/4 files=4 size=460B
row-size=4B cardinality=8
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
07:EXCHANGE [UNPARTITIONED]
|
03:NESTED LOOP JOIN [LEFT OUTER JOIN, BROADCAST]
| row-size=8B cardinality=8
|
|--06:EXCHANGE [BROADCAST]
| |
| 05:AGGREGATE [FINALIZE]
| | output: min:merge(int_col)
| | having: min(int_col) > 10
| | row-size=4B cardinality=1
| |
| 04:EXCHANGE [UNPARTITIONED]
| |
| 02:AGGREGATE
| | output: min(int_col)
| | row-size=4B cardinality=1
| |
| 01:SCAN HDFS [functional.alltypes]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=4B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypestiny]
HDFS partitions=4/4 files=4 size=460B
row-size=4B cardinality=8
====
# A non-grouping aggregation without a HAVING or LIMIT always returns a single row, so
# this can be safely rewritten as a CROSS JOIN.
select id, (select min(int_col) from functional.alltypes where int_col > 10)
from functional.alltypestiny
---- PLAN
PLAN-ROOT SINK
|
03:NESTED LOOP JOIN [CROSS JOIN]
| row-size=8B cardinality=8
|
|--02:AGGREGATE [FINALIZE]
| | output: min(int_col)
| | row-size=4B cardinality=1
| |
| 01:SCAN HDFS [functional.alltypes]
| HDFS partitions=24/24 files=24 size=478.45KB
| predicates: int_col > 10
| row-size=4B cardinality=730
|
00:SCAN HDFS [functional.alltypestiny]
HDFS partitions=4/4 files=4 size=460B
row-size=4B cardinality=8
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
07:EXCHANGE [UNPARTITIONED]
|
03:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
| row-size=8B cardinality=8
|
|--06:EXCHANGE [BROADCAST]
| |
| 05:AGGREGATE [FINALIZE]
| | output: min:merge(int_col)
| | row-size=4B cardinality=1
| |
| 04:EXCHANGE [UNPARTITIONED]
| |
| 02:AGGREGATE
| | output: min(int_col)
| | row-size=4B cardinality=1
| |
| 01:SCAN HDFS [functional.alltypes]
| HDFS partitions=24/24 files=24 size=478.45KB
| predicates: int_col > 10
| row-size=4B cardinality=730
|
00:SCAN HDFS [functional.alltypestiny]
HDFS partitions=4/4 files=4 size=460B
row-size=4B cardinality=8
====
# IMPALA-9949: uncorrelated subquery that can return 0 rows must be rewritten as a
# LEFT OUTER JOIN. The WHERE clause can eliminate the single row returned from
# the inner select.
select id, (
select int_col
from (select min(id) as id, min(int_col) as int_col
from functional.alltypes) v
where id > 99999)
from functional.alltypestiny
---- PLAN
PLAN-ROOT SINK
|
03:NESTED LOOP JOIN [LEFT OUTER JOIN]
| row-size=12B cardinality=8
|
|--02:AGGREGATE [FINALIZE]
| | output: min(id), min(int_col)
| | having: min(id) > 99999
| | row-size=8B cardinality=1
| |
| 01:SCAN HDFS [functional.alltypes]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=8B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypestiny]
HDFS partitions=4/4 files=4 size=460B
row-size=4B cardinality=8
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
07:EXCHANGE [UNPARTITIONED]
|
03:NESTED LOOP JOIN [LEFT OUTER JOIN, BROADCAST]
| row-size=12B cardinality=8
|
|--06:EXCHANGE [BROADCAST]
| |
| 05:AGGREGATE [FINALIZE]
| | output: min:merge(id), min:merge(int_col)
| | having: min(id) > 99999
| | row-size=8B cardinality=1
| |
| 04:EXCHANGE [UNPARTITIONED]
| |
| 02:AGGREGATE
| | output: min(id), min(int_col)
| | row-size=8B cardinality=1
| |
| 01:SCAN HDFS [functional.alltypes]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=8B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypestiny]
HDFS partitions=4/4 files=4 size=460B
row-size=4B cardinality=8
====
# IMPALA-9949: uncorrelated subquery that can return 0 rows must be rewritten as a
# LEFT OUTER JOIN. The subquery has a single subquery in its from list, but that
# subquery can return 0 or 1 rows.
select id, (
select int_col
from (select min(id) as id, min(int_col) as int_col
from functional.alltypes
having min(int_col) > 10) v)
from functional.alltypestiny
---- PLAN
PLAN-ROOT SINK
|
03:NESTED LOOP JOIN [LEFT OUTER JOIN]
| row-size=8B cardinality=8
|
|--02:AGGREGATE [FINALIZE]
| | output: min(int_col)
| | having: min(int_col) > 10
| | row-size=4B cardinality=1
| |
| 01:SCAN HDFS [functional.alltypes]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=4B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypestiny]
HDFS partitions=4/4 files=4 size=460B
row-size=4B cardinality=8
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
07:EXCHANGE [UNPARTITIONED]
|
03:NESTED LOOP JOIN [LEFT OUTER JOIN, BROADCAST]
| row-size=8B cardinality=8
|
|--06:EXCHANGE [BROADCAST]
| |
| 05:AGGREGATE [FINALIZE]
| | output: min:merge(int_col)
| | having: min(int_col) > 10
| | row-size=4B cardinality=1
| |
| 04:EXCHANGE [UNPARTITIONED]
| |
| 02:AGGREGATE
| | output: min(int_col)
| | row-size=4B cardinality=1
| |
| 01:SCAN HDFS [functional.alltypes]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=4B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypestiny]
HDFS partitions=4/4 files=4 size=460B
row-size=4B cardinality=8
====
# Subquery with a single subquery in the FROM list that returns exactly one row and
# a trivially true WHERE clause. This can be rewritten as a cross join because it
# always returns exactly one row.
select id, (
select int_col
from (select min(id) as id, min(int_col) as int_col
from functional.alltypes) v
where TRUE)
from functional.alltypestiny
---- PLAN
PLAN-ROOT SINK
|
03:NESTED LOOP JOIN [CROSS JOIN]
| row-size=8B cardinality=8
|
|--02:AGGREGATE [FINALIZE]
| | output: min(int_col)
| | row-size=4B cardinality=1
| |
| 01:SCAN HDFS [functional.alltypes]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=4B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypestiny]
HDFS partitions=4/4 files=4 size=460B
row-size=4B cardinality=8
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
07:EXCHANGE [UNPARTITIONED]
|
03:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
| row-size=8B cardinality=8
|
|--06:EXCHANGE [BROADCAST]
| |
| 05:AGGREGATE [FINALIZE]
| | output: min:merge(int_col)
| | row-size=4B cardinality=1
| |
| 04:EXCHANGE [UNPARTITIONED]
| |
| 02:AGGREGATE
| | output: min(int_col)
| | row-size=4B cardinality=1
| |
| 01:SCAN HDFS [functional.alltypes]
| HDFS partitions=24/24 files=24 size=478.45KB
| row-size=4B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypestiny]
HDFS partitions=4/4 files=4 size=460B
row-size=4B cardinality=8
====
# Uncorrelated EXISTS with a group by clause with rollup.
select 1
from functional.alltypesagg a
where exists
(select id
from functional.alltypestiny b
group by rollup(id, int_col, bool_col))
and tinyint_col < 10
---- PLAN
PLAN-ROOT SINK
|
04:NESTED LOOP JOIN [LEFT SEMI JOIN]
| row-size=1B cardinality=1.10K
|
|--03:AGGREGATE [FINALIZE]
| | group by: CASE valid_tid(2,3,4,5) WHEN 2 THEN id WHEN 3 THEN id WHEN 4 THEN id WHEN 5 THEN NULL END, CASE valid_tid(2,3,4,5) WHEN 2 THEN int_col WHEN 3 THEN int_col WHEN 4 THEN NULL WHEN 5 THEN NULL END, CASE valid_tid(2,3,4,5) WHEN 2 THEN bool_col WHEN 3 THEN NULL WHEN 4 THEN NULL WHEN 5 THEN NULL END, CASE valid_tid(2,3,4,5) WHEN 2 THEN 2 WHEN 3 THEN 3 WHEN 4 THEN 4 WHEN 5 THEN 5 END
| | limit: 1
| | row-size=13B cardinality=1
| |
| 02:AGGREGATE [FINALIZE]
| | Class 0
| | group by: id, int_col, bool_col
| | Class 1
| | group by: id, int_col, NULL
| | Class 2
| | group by: id, NULL, NULL
| | Class 3
| | group by: NULL, NULL, NULL
| | row-size=36B cardinality=25
| |
| 01:SCAN HDFS [functional.alltypestiny b]
| HDFS partitions=4/4 files=4 size=460B
| row-size=9B cardinality=8
|
00:SCAN HDFS [functional.alltypesagg a]
HDFS partitions=11/11 files=11 size=814.73KB
predicates: tinyint_col < 10
row-size=1B cardinality=1.10K
====
# Correlated EXISTS with a group by clause with rollup.
# The correlated columns are pushed into the GROUP BY.
# TODO: This plan could be improved by removing the ROLLUP in the subquery or by
# reordering the JOIN and AGGREGATE.
select 1
from functional.alltypesagg a
where exists
(select id
from functional.alltypestiny b
where a.tinyint_col = b.tinyint_col and a.string_col = b.string_col
group by rollup(id, int_col, bool_col))
and tinyint_col < 10
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: a.tinyint_col = CASE valid_tid(2,3,4,5) WHEN 2 THEN b.tinyint_col WHEN 3 THEN b.tinyint_col WHEN 4 THEN b.tinyint_col WHEN 5 THEN b.tinyint_col END, a.string_col = CASE valid_tid(2,3,4,5) WHEN 2 THEN b.string_col WHEN 3 THEN b.string_col WHEN 4 THEN b.string_col WHEN 5 THEN b.string_col END
| runtime filters: RF000 <- CASE valid_tid(2,3,4,5) WHEN 2 THEN b.tinyint_col WHEN 3 THEN b.tinyint_col WHEN 4 THEN b.tinyint_col WHEN 5 THEN b.tinyint_col END, RF001 <- CASE valid_tid(2,3,4,5) WHEN 2 THEN b.string_col WHEN 3 THEN b.string_col WHEN 4 THEN b.string_col WHEN 5 THEN b.string_col END
| row-size=16B cardinality=3
|
|--03:AGGREGATE [FINALIZE]
| | group by: CASE valid_tid(2,3,4,5) WHEN 2 THEN id WHEN 3 THEN id WHEN 4 THEN id WHEN 5 THEN NULL END, CASE valid_tid(2,3,4,5) WHEN 2 THEN int_col WHEN 3 THEN int_col WHEN 4 THEN NULL WHEN 5 THEN NULL END, CASE valid_tid(2,3,4,5) WHEN 2 THEN bool_col WHEN 3 THEN NULL WHEN 4 THEN NULL WHEN 5 THEN NULL END, CASE valid_tid(2,3,4,5) WHEN 2 THEN b.tinyint_col WHEN 3 THEN b.tinyint_col WHEN 4 THEN b.tinyint_col WHEN 5 THEN b.tinyint_col END, CASE valid_tid(2,3,4,5) WHEN 2 THEN b.string_col WHEN 3 THEN b.string_col WHEN 4 THEN b.string_col WHEN 5 THEN b.string_col END, CASE valid_tid(2,3,4,5) WHEN 2 THEN 2 WHEN 3 THEN 3 WHEN 4 THEN 4 WHEN 5 THEN 5 END
| | row-size=26B cardinality=28
| |
| 02:AGGREGATE [FINALIZE]
| | Class 0
| | group by: id, int_col, bool_col, b.tinyint_col, b.string_col
| | Class 1
| | group by: id, int_col, NULL, b.tinyint_col, b.string_col
| | Class 2
| | group by: id, NULL, NULL, b.tinyint_col, b.string_col
| | Class 3
| | group by: NULL, NULL, NULL, b.tinyint_col, b.string_col
| | row-size=92B cardinality=28
| |
| 01:SCAN HDFS [functional.alltypestiny b]
| HDFS partitions=4/4 files=4 size=460B
| row-size=23B cardinality=8
|
00:SCAN HDFS [functional.alltypesagg a]
HDFS partitions=11/11 files=11 size=814.73KB
predicates: tinyint_col < 10
runtime filters: RF000 -> a.tinyint_col, RF001 -> a.string_col
row-size=16B cardinality=1.10K
====
# Correlated EXISTS with a group by clause with rollup and having.
# The correlated columns are pushed into the GROUP BY.
# This plan could be improved by reordering the JOIN and AGGREGATE
# or by transforming the ROLLUP (the HAVING predicate interacts with
# the ROLLUP since it is always true for some aggregation classes).
select 1
from functional.alltypesagg a
where exists
(select id
from functional.alltypestiny b
where a.tinyint_col = b.tinyint_col and a.string_col = b.string_col
group by rollup(id, int_col, bool_col)
having int_col is null)
and tinyint_col < 10
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: a.tinyint_col = CASE valid_tid(2,3,4,5) WHEN 2 THEN b.tinyint_col WHEN 3 THEN b.tinyint_col WHEN 4 THEN b.tinyint_col WHEN 5 THEN b.tinyint_col END, a.string_col = CASE valid_tid(2,3,4,5) WHEN 2 THEN b.string_col WHEN 3 THEN b.string_col WHEN 4 THEN b.string_col WHEN 5 THEN b.string_col END
| runtime filters: RF000 <- CASE valid_tid(2,3,4,5) WHEN 2 THEN b.tinyint_col WHEN 3 THEN b.tinyint_col WHEN 4 THEN b.tinyint_col WHEN 5 THEN b.tinyint_col END, RF001 <- CASE valid_tid(2,3,4,5) WHEN 2 THEN b.string_col WHEN 3 THEN b.string_col WHEN 4 THEN b.string_col WHEN 5 THEN b.string_col END
| row-size=16B cardinality=1
|
|--03:AGGREGATE [FINALIZE]
| | group by: CASE valid_tid(2,3,4,5) WHEN 2 THEN id WHEN 3 THEN id WHEN 4 THEN id WHEN 5 THEN NULL END, CASE valid_tid(2,3,4,5) WHEN 2 THEN int_col WHEN 3 THEN int_col WHEN 4 THEN NULL WHEN 5 THEN NULL END, CASE valid_tid(2,3,4,5) WHEN 2 THEN bool_col WHEN 3 THEN NULL WHEN 4 THEN NULL WHEN 5 THEN NULL END, CASE valid_tid(2,3,4,5) WHEN 2 THEN b.tinyint_col WHEN 3 THEN b.tinyint_col WHEN 4 THEN b.tinyint_col WHEN 5 THEN b.tinyint_col END, CASE valid_tid(2,3,4,5) WHEN 2 THEN b.string_col WHEN 3 THEN b.string_col WHEN 4 THEN b.string_col WHEN 5 THEN b.string_col END, CASE valid_tid(2,3,4,5) WHEN 2 THEN 2 WHEN 3 THEN 3 WHEN 4 THEN 4 WHEN 5 THEN 5 END
| | having: CASE valid_tid(2,3,4,5) WHEN 2 THEN int_col WHEN 3 THEN int_col WHEN 4 THEN NULL WHEN 5 THEN NULL END IS NULL
| | row-size=26B cardinality=1
| |
| 02:AGGREGATE [FINALIZE]
| | Class 0
| | group by: id, int_col, bool_col, b.tinyint_col, b.string_col
| | Class 1
| | group by: id, int_col, NULL, b.tinyint_col, b.string_col
| | Class 2
| | group by: id, NULL, NULL, b.tinyint_col, b.string_col
| | Class 3
| | group by: NULL, NULL, NULL, b.tinyint_col, b.string_col
| | row-size=92B cardinality=28
| |
| 01:SCAN HDFS [functional.alltypestiny b]
| HDFS partitions=4/4 files=4 size=460B
| row-size=23B cardinality=8
|
00:SCAN HDFS [functional.alltypesagg a]
HDFS partitions=11/11 files=11 size=814.73KB
predicates: tinyint_col < 10
runtime filters: RF000 -> a.tinyint_col, RF001 -> a.string_col
row-size=16B cardinality=1.10K
====
# Correlated EXISTS with a group by clause with rollup.
# Same query as above but with tables flipped so that the join is transformed into a
# RIGHT OUTER JOIN and the aggregation ends up on the left side of of the join. Runtime
# filters cannot be pushed to the scan because the rollup aggregation introduces NULLs.
select 1
from functional.alltypestiny a
where exists
(select id
from functional.alltypesagg b
where a.tinyint_col = b.tinyint_col and a.string_col = b.string_col
group by rollup(id, int_col, bool_col))
and tinyint_col < 10
---- PLAN
PLAN-ROOT SINK
|
05:HASH JOIN [RIGHT SEMI JOIN]
| hash predicates: CASE valid_tid(2,3,4,5) WHEN 2 THEN b.tinyint_col WHEN 3 THEN b.tinyint_col WHEN 4 THEN b.tinyint_col WHEN 5 THEN b.tinyint_col END = a.tinyint_col, CASE valid_tid(2,3,4,5) WHEN 2 THEN b.string_col WHEN 3 THEN b.string_col WHEN 4 THEN b.string_col WHEN 5 THEN b.string_col END = a.string_col
| row-size=14B cardinality=1
|
|--00:SCAN HDFS [functional.alltypestiny a]
| HDFS partitions=4/4 files=4 size=460B
| predicates: tinyint_col < 10
| row-size=14B cardinality=1
|
04:AGGREGATE [FINALIZE]
| group by: CASE valid_tid(2,3,4,5) WHEN 2 THEN b.tinyint_col WHEN 3 THEN b.tinyint_col WHEN 4 THEN b.tinyint_col WHEN 5 THEN b.tinyint_col END, CASE valid_tid(2,3,4,5) WHEN 2 THEN b.string_col WHEN 3 THEN b.string_col WHEN 4 THEN b.string_col WHEN 5 THEN b.string_col END
| row-size=13B cardinality=9.64K
|
03:AGGREGATE [FINALIZE]
| group by: CASE valid_tid(2,3,4,5) WHEN 2 THEN id WHEN 3 THEN id WHEN 4 THEN id WHEN 5 THEN NULL END, CASE valid_tid(2,3,4,5) WHEN 2 THEN int_col WHEN 3 THEN int_col WHEN 4 THEN NULL WHEN 5 THEN NULL END, CASE valid_tid(2,3,4,5) WHEN 2 THEN bool_col WHEN 3 THEN NULL WHEN 4 THEN NULL WHEN 5 THEN NULL END, CASE valid_tid(2,3,4,5) WHEN 2 THEN b.tinyint_col WHEN 3 THEN b.tinyint_col WHEN 4 THEN b.tinyint_col WHEN 5 THEN b.tinyint_col END, CASE valid_tid(2,3,4,5) WHEN 2 THEN b.string_col WHEN 3 THEN b.string_col WHEN 4 THEN b.string_col WHEN 5 THEN b.string_col END, CASE valid_tid(2,3,4,5) WHEN 2 THEN 2 WHEN 3 THEN 3 WHEN 4 THEN 4 WHEN 5 THEN 5 END
| row-size=26B cardinality=41.67K
|
02:AGGREGATE [FINALIZE]
| Class 0
| group by: id, int_col, bool_col, b.tinyint_col, b.string_col
| Class 1
| group by: id, int_col, NULL, b.tinyint_col, b.string_col
| Class 2
| group by: id, NULL, NULL, b.tinyint_col, b.string_col
| Class 3
| group by: NULL, NULL, NULL, b.tinyint_col, b.string_col
| row-size=100B cardinality=41.67K
|
01:SCAN HDFS [functional.alltypesagg b]
HDFS partitions=11/11 files=11 size=814.73KB
row-size=25B cardinality=11.00K
====
# IMPALA-7782: incorrect results for subquery with HAVING predicate.
# Subqueries with HAVING clauses need to be rewritten to an outer join
# because they may return an empty result set.
SELECT id
FROM functional.alltypestiny
WHERE -1 NOT IN (SELECT COUNT(id) FROM functional.alltypestiny HAVING false)
---- PLAN
PLAN-ROOT SINK
|
02:NESTED LOOP JOIN [LEFT ANTI JOIN]
| row-size=4B cardinality=8
|
|--01:EMPTYSET
| limit: 1
|
00:SCAN HDFS [functional.alltypestiny]
HDFS partitions=4/4 files=4 size=460B
row-size=4B cardinality=8
====
# IMPALA-7782: incorrect results for subquery with HAVING predicate.
# Subqueries with HAVING clauses need to be rewritten to an outer join
# because they may return an empty result set.
SELECT id
FROM functional.alltypestiny
WHERE -1 NOT IN (SELECT COUNT(id) FROM functional.alltypestiny HAVING count(id) > 100)
---- PLAN
PLAN-ROOT SINK
|
03:NESTED LOOP JOIN [LEFT ANTI JOIN]
| row-size=4B cardinality=8
|
|--02:AGGREGATE [FINALIZE]
| | output: count(id)
| | having: count(id) > 100, -1 IS NULL OR count(id) IS NULL OR count(id) = -1
| | limit: 1
| | row-size=8B cardinality=1
| |
| 01:SCAN HDFS [functional.alltypestiny]
| HDFS partitions=4/4 files=4 size=460B
| row-size=4B cardinality=8
|
00:SCAN HDFS [functional.alltypestiny]
HDFS partitions=4/4 files=4 size=460B
row-size=4B cardinality=8
====