blob: 96ed1bae41883fa068829b1846006cddb7530f7f [file] [log] [blame]
--! qt:dataset:src
--! qt:dataset:part
--! qt:dataset:lineitem
set hive.mapred.mode=nonstrict;
set hive.explain.user=false;
set hive.optimize.topnkey=false;
-- SORT_QUERY_RESULTS
-- non agg, non corr
explain
select *
from src
where src.key in (select key from src s1 where s1.key > '9')
;
select *
from src
where src.key in (select key from src s1 where s1.key > '9')
;
-- non agg, corr
explain
select *
from src b
where b.key in
(select a.key
from src a
where b.value = a.value and a.key > '9'
)
;
select *
from src b
where b.key in
(select a.key
from src a
where b.value = a.value and a.key > '9'
)
;
-- agg, non corr
explain
select p_name, p_size
from
part where part.p_size in
(select avg(p_size)
from (select p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a
where r <= 2
)
;
select p_name, p_size
from
part where part.p_size in
(select avg(p_size)
from (select p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a
where r <= 2
)
;
-- agg, corr
explain
select p_mfgr, p_name, p_size
from part b where b.p_size in
(select min(p_size)
from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a
where r <= 2 and b.p_mfgr = a.p_mfgr
)
;
select p_mfgr, p_name, p_size
from part b where b.p_size in
(select min(p_size)
from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a
where r <= 2 and b.p_mfgr = a.p_mfgr
)
;
-- distinct, corr
explain
select *
from src b
where b.key in
(select distinct a.key
from src a
where b.value = a.value and a.key > '9'
)
;
select *
from src b
where b.key in
(select distinct a.key
from src a
where b.value = a.value and a.key > '9'
)
;
-- corr, non equi predicate, should not have a join with outer to generate
-- corr values
explain
select *
from src b
where b.key in
(select distinct a.key
from src a
where b.value <> a.key and a.key > '9'
)
;
select *
from src b
where b.key in
(select distinct a.key
from src a
where b.value <> a.key and a.key > '9'
)
;
-- Right side shouldn't have aggregate
explain select * from src b where b.key in (select distinct key from src a where a.value > b.value);
select * from src b where b.key in (select distinct key from src a where a.value > b.value);
-- non agg, non corr, windowing
select p_mfgr, p_name, p_size
from part
where part.p_size in
(select first_value(p_size) over(partition by p_mfgr order by p_size) from part)
;
-- non agg, non corr, with join in Parent Query
explain
select p.p_partkey, li.l_suppkey
from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey
where li.l_linenumber = 1 and
li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR')
;
select p.p_partkey, li.l_suppkey
from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey
where li.l_linenumber = 1 and
li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR')
;
-- non agg, corr, with join in Parent Query
select p.p_partkey, li.l_suppkey
from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey
where li.l_linenumber = 1 and
li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber)
;
-- corr, agg in outer and inner
explain select sum(l_extendedprice) from lineitem, part where p_partkey = l_partkey and l_quantity IN (select avg(l_quantity) from lineitem where l_partkey = p_partkey);
select sum(l_extendedprice) from lineitem, part where p_partkey = l_partkey and l_quantity IN (select avg(l_quantity) from lineitem where l_partkey = p_partkey);
--where has multiple conjuction
explain select * from part where p_brand <> 'Brand#14' AND p_size IN (select (p_size) from part p where p.p_type = part.p_type group by p_size) AND p_size <> 340;
select * from part where p_brand <> 'Brand#14' AND p_size IN (select (p_size) from part p where p.p_type = part.p_type group by p_size) AND p_size <> 340;
--lhs contains non-simple expression
explain select * from part where (p_size-1) IN (select min(p_size) from part group by p_type);
select * from part where (p_size-1) IN (select min(p_size) from part group by p_type);
explain select * from part where (p_partkey*p_size) IN (select min(p_partkey) from part group by p_type);
select * from part where (p_partkey*p_size) IN (select min(p_partkey) from part group by p_type);
--lhs contains non-simple expression, corr
explain select count(*) as c from part as e where p_size + 100 IN (select p_partkey from part where p_name = e.p_name);
select count(*) as c from part as e where p_size + 100 IN (select p_partkey from part where p_name = e.p_name);
-- lhs contains udf expression
explain select * from part where floor(p_retailprice) IN (select floor(min(p_retailprice)) from part group by p_type);
select * from part where floor(p_retailprice) IN (select floor(min(p_retailprice)) from part group by p_type);
explain select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_size + 121150 = p.p_partkey );
select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_size + 121150 = p.p_partkey );
-- correlated query, multiple correlated variables referring to different outer var
explain select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_partkey= p.p_partkey );
select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_partkey= p.p_partkey );
-- correlated var refers to outer table alias
explain select p_name from (select p_name, p_type, p_brand as brand from part) fpart where fpart.p_type IN (select p_type from part where part.p_brand = fpart.brand);
select p_name from (select p_name, p_type, p_brand as brand from part) fpart where fpart.p_type IN (select p_type from part where part.p_brand = fpart.brand);
-- correlated var refers to outer table alias which is an expression
explain select p_name from (select p_name, p_type, p_size+1 as size from part) fpart where fpart.p_type IN (select p_type from part where (part.p_size+1) = fpart.size);
select p_name from (select p_name, p_type, p_size+1 as size from part) fpart where fpart.p_type IN (select p_type from part where (part.p_size+1) = fpart.size);
-- where plus having
explain select key, count(*) from src where value IN (select value from src) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key );
select key, count(*) from src where value IN (select value from src) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key );
-- where with having, correlated
explain select key, count(*) from src where value IN (select value from src sc where sc.key = src.key ) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key );
select key, count(*) from src where value IN (select value from src sc where sc.key = src.key ) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key );
-- subquery with order by
explain select * from part where (p_size-1) IN (select min(p_size) from part group by p_type) order by p_brand;
select * from part where (p_size-1) IN (select min(p_size) from part group by p_type) order by p_brand;
--order by with limit
explain select * from part where (p_size-1) IN (select min(p_size) from part group by p_type) order by p_brand limit 4;
select * from part where (p_size-1) IN (select min(p_size) from part group by p_type) order by p_brand limit 4;
-- union, uncorr
explain select * from src where key IN (select p_name from part UNION ALL select p_brand from part);
select * from src where key IN (select p_name from part UNION ALL select p_brand from part);
-- corr, subquery has another subquery in from
explain select p_mfgr, b.p_name, p_size from part b where b.p_name in
(select p_name from (select p_mfgr, p_name, p_size as r from part) a where r < 10 and b.p_mfgr = a.p_mfgr ) order by p_mfgr,p_size;
select p_mfgr, b.p_name, p_size from part b where b.p_name in
(select p_name from (select p_mfgr, p_name, p_size as r from part) a where r < 10 and b.p_mfgr = a.p_mfgr ) order by p_mfgr,p_size;
-- join in subquery, correlated predicate with only one table
explain select p_partkey from part where p_name in (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size);
select p_partkey from part where p_name in (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size);
-- join in subquery, correlated predicate with both inner tables, same outer var
explain select p_partkey from part where p_name in
(select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size and p.p_size=part.p_size);
select p_partkey from part where p_name in
(select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size and p.p_size=part.p_size);
-- join in subquery, correlated predicate with both inner tables, different_n22 outer var
explain select p_partkey from part where p_name in
(select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size and p.p_type=part.p_type);
-- subquery within from
explain select p_partkey from
(select p_size, p_partkey from part where p_name in (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size)) subq;
select p_partkey from
(select p_size, p_partkey from part where p_name in (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size)) subq;
-- corr IN with COUNT aggregate
explain select * from part where p_size IN (select count(*) from part pp where pp.p_type = part.p_type);
select * from part where p_size IN (select count(*) from part pp where pp.p_type = part.p_type);
-- corr IN with aggregate other than COUNT
explain select * from part where p_size in (select avg(pp.p_size) from part pp where pp.p_partkey = part.p_partkey);
select * from part where p_size in (select avg(pp.p_size) from part pp where pp.p_partkey = part.p_partkey);
-- corr IN with aggregate other than COUNT (MIN) with non-equi join
explain select * from part where p_size in (select min(pp.p_size) from part pp where pp.p_partkey > part.p_partkey);
select * from part where p_size in (select min(pp.p_size) from part pp where pp.p_partkey > part.p_partkey);
-- corr IN with COUNT aggregate
explain select * from part where p_size NOT IN (select count(*) from part pp where pp.p_type = part.p_type);
select * from part where p_size NOT IN (select count(*) from part pp where pp.p_type = part.p_type);
-- corr IN with aggregate other than COUNT
explain select * from part where p_size not in (select avg(pp.p_size) from part pp where pp.p_partkey = part.p_partkey);
select * from part where p_size not in (select avg(pp.p_size) from part pp where pp.p_partkey = part.p_partkey);
create table t_n22(i int);
insert into t_n22 values(1);
insert into t_n22 values(0);
create table tempty_n2(i int);
-- uncorr sub with aggregate which produces result irrespective of zero rows
explain select * from t_n22 where i IN (select count(*) from tempty_n2);
select * from t_n22 where i IN (select count(*) from tempty_n2);
drop table t_n22;
create table tnull_n2(i int);
insert into tnull_n2 values(NULL) , (NULL);
-- empty inner table, non-null sq key, expected empty result
select * from part where p_size IN (select i from tempty_n2);
-- empty inner table, null sq key, expected empty result
select * from tnull_n2 where i IN (select i from tempty_n2);
-- null inner table, non-null sq key
select * from part where p_size IN (select i from tnull_n2);
-- null inner table, null sq key
select * from tnull_n2 where i IN (select i from tnull_n2);
drop table tempty_n2;
create table t_n22(i int, j int);
insert into t_n22 values(0,1), (0,2);
create table tt_n2(i int, j int);
insert into tt_n2 values(0,3);
-- corr IN with aggregate other than COUNT return zero rows
explain select * from t_n22 where i IN (select sum(i) from tt_n2 where tt_n2.j = t_n22.j);
select * from t_n22 where i IN (select sum(i) from tt_n2 where tt_n2.j = t_n22.j);
drop table t_n22;
drop table tt_n2;
-- since inner query has aggregate it will be joined with outer to get all possible corrrelated values
explain select * from part where p_size IN (select max(p_size) from part p where p.p_type <> part.p_name);
select * from part where p_size IN (select max(p_size) from part p where p.p_type <> part.p_name);
-- inner query has join so should have a join with outer query to fetch all corr values
explain select * from part where p_size IN (select pp.p_size from part p join part pp on pp.p_type = p.p_type where part.p_type <> p.p_name);
select * from part where p_size IN (select pp.p_size from part p join part pp on pp.p_type = p.p_type where part.p_type <> p.p_name);