--! qt:dataset:src
--! qt:dataset:part
--! qt:dataset:lineitem
set hive.mapred.mode=nonstrict;
set hive.explain.user=false;

-- SORT_QUERY_RESULTS

-- non agg, non corr
explain
 select * 
from src 
where src.key in (select key from src s1 where s1.key > '9')
;

select * 
from src 
where src.key in (select key from src s1 where s1.key > '9')
;

-- non agg, corr
explain 
select * 
from src b 
where b.key in
        (select a.key 
         from src a 
         where b.value = a.value and a.key > '9'
        )
;

select * 
from src b 
where b.key in
        (select a.key 
         from src a 
         where b.value = a.value and a.key > '9'
        )
;


-- agg, non corr
explain
select p_name, p_size 
from 
part where part.p_size in 
	(select avg(p_size) 
	 from (select p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a 
	 where r <= 2
	)
;
select p_name, p_size 
from 
part where part.p_size in 
	(select avg(p_size) 
	 from (select p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a 
	 where r <= 2
	)
;

-- agg, corr
explain
select p_mfgr, p_name, p_size
from part b where b.p_size in
	(select min(p_size)
	 from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a
	 where r <= 2 and b.p_mfgr = a.p_mfgr
	)
;

select p_mfgr, p_name, p_size
from part b where b.p_size in
	(select min(p_size)
	 from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a
	 where r <= 2 and b.p_mfgr = a.p_mfgr
	)
;

-- distinct, corr
explain
select *
from src b
where b.key in
        (select distinct a.key
         from src a
         where b.value = a.value and a.key > '9'
        )
;

select *
from src b
where b.key in
        (select distinct a.key
         from src a
         where b.value = a.value and a.key > '9'
        )
;

-- corr, non equi predicate, should not have a join with outer to generate
-- corr values
explain
select *
from src b
where b.key in
        (select distinct a.key
         from src a
         where b.value <> a.key and a.key > '9'
        )
;

select *
from src b
where b.key in
        (select distinct a.key
         from src a
         where b.value <> a.key and a.key > '9'
        )
;

-- Right side shouldn't have aggregate
explain select * from src b where b.key in (select distinct key from src a where a.value > b.value);
select * from src b where b.key in (select distinct key from src a where a.value > b.value);


-- non agg, non corr, windowing
select p_mfgr, p_name, p_size 
from part 
where part.p_size in 
  (select first_value(p_size) over(partition by p_mfgr order by p_size) from part)
;

-- non agg, non corr, with join in Parent Query
explain
select p.p_partkey, li.l_suppkey 
from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey 
where li.l_linenumber = 1 and
 li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR')
;

select p.p_partkey, li.l_suppkey 
from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey 
where li.l_linenumber = 1 and
 li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR')
;

-- non agg, corr, with join in Parent Query
select p.p_partkey, li.l_suppkey 
from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey 
where li.l_linenumber = 1 and
 li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber)
;

-- corr, agg in outer and inner
explain select sum(l_extendedprice) from lineitem, part where p_partkey = l_partkey and l_quantity IN (select avg(l_quantity) from lineitem where l_partkey = p_partkey);
select sum(l_extendedprice) from lineitem, part where p_partkey = l_partkey and l_quantity IN (select avg(l_quantity) from lineitem where l_partkey = p_partkey);


--where has multiple conjuction
explain select * from part where p_brand <> 'Brand#14' AND p_size IN (select (p_size) from part p where p.p_type = part.p_type group by p_size) AND p_size <> 340;
select * from part where p_brand <> 'Brand#14' AND p_size IN (select (p_size) from part p where p.p_type = part.p_type group by p_size) AND p_size <> 340;

--lhs contains non-simple expression
explain select * from part  where (p_size-1) IN (select min(p_size) from part group by p_type);
select * from part  where (p_size-1) IN (select min(p_size) from part group by p_type);

explain select * from part where (p_partkey*p_size) IN (select min(p_partkey) from part group by p_type);
select * from part where (p_partkey*p_size) IN (select min(p_partkey) from part group by p_type);

--lhs contains non-simple expression, corr
explain select count(*) as c from part as e where p_size + 100 IN (select p_partkey from part where p_name = e.p_name);
select count(*) as c from part as e where p_size + 100 IN (select p_partkey from part where p_name = e.p_name);

-- lhs contains udf expression
explain select * from part  where floor(p_retailprice) IN (select floor(min(p_retailprice)) from part group by p_type);
select * from part  where floor(p_retailprice) IN (select floor(min(p_retailprice)) from part group by p_type);

explain select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_size + 121150 = p.p_partkey );
select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_size + 121150 = p.p_partkey );

-- correlated query, multiple correlated variables referring to different outer var
explain select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_partkey= p.p_partkey );
select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_partkey= p.p_partkey );

-- correlated var refers to outer table alias
explain select p_name from (select p_name, p_type, p_brand as brand from part) fpart where fpart.p_type IN (select p_type from part where part.p_brand = fpart.brand);
select p_name from (select p_name, p_type, p_brand as brand from part) fpart where fpart.p_type IN (select p_type from part where part.p_brand = fpart.brand);
 
-- correlated var refers to outer table alias which is an expression 
explain select p_name from (select p_name, p_type, p_size+1 as size from part) fpart where fpart.p_type IN (select p_type from part where (part.p_size+1) = fpart.size);
select p_name from (select p_name, p_type, p_size+1 as size from part) fpart where fpart.p_type IN (select p_type from part where (part.p_size+1) = fpart.size);

-- where plus having
explain select key, count(*) from src where value IN (select value from src) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key );
select key, count(*) from src where value IN (select value from src) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key );

-- where with having, correlated
explain select key, count(*) from src where value IN (select value from src sc where sc.key = src.key ) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key );
select key, count(*) from src where value IN (select value from src sc where sc.key = src.key ) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key );

-- subquery with order by
explain select * from part  where (p_size-1) IN (select min(p_size) from part group by p_type) order by p_brand;
select * from part  where (p_size-1) IN (select min(p_size) from part group by p_type) order by p_brand;

--order by with limit
explain select * from part  where (p_size-1) IN (select min(p_size) from part group by p_type) order by p_brand limit 4;
select * from part  where (p_size-1) IN (select min(p_size) from part group by p_type) order by p_brand limit 4;

-- union, uncorr
explain select * from src where key IN (select p_name from part UNION ALL select p_brand from part);
select * from src where key IN (select p_name from part UNION ALL select p_brand from part);

-- corr, subquery has another subquery in from
explain select p_mfgr, b.p_name, p_size from part b where b.p_name in 
  (select p_name from (select p_mfgr, p_name, p_size as r from part) a where r < 10 and b.p_mfgr = a.p_mfgr ) order by p_mfgr,p_size;
select p_mfgr, b.p_name, p_size from part b where b.p_name in 
  (select p_name from (select p_mfgr, p_name, p_size as r from part) a where r < 10 and b.p_mfgr = a.p_mfgr ) order by p_mfgr,p_size;

-- join in subquery, correlated predicate with only one table
explain select p_partkey from part where p_name in (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size);
select p_partkey from part where p_name in (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size);

-- join in subquery, correlated predicate with both inner tables, same outer var
explain select p_partkey from part where p_name in 
	(select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size and p.p_size=part.p_size);
select p_partkey from part where p_name in 
	(select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size and p.p_size=part.p_size);

-- join in subquery, correlated predicate with both inner tables, different_n22 outer var
explain select p_partkey from part where p_name in 
	(select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size and p.p_type=part.p_type);

-- subquery within from 
explain select p_partkey from 
	(select p_size, p_partkey from part where p_name in (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size)) subq;
select p_partkey from 
	(select p_size, p_partkey from part where p_name in (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size)) subq;

-- corr IN with COUNT aggregate
explain select * from part where p_size IN (select count(*) from part pp where pp.p_type = part.p_type);
select * from part where p_size IN (select count(*) from part pp where pp.p_type = part.p_type);

-- corr IN with aggregate other than COUNT
explain select * from part where p_size in (select avg(pp.p_size) from part pp where pp.p_partkey = part.p_partkey);
select * from part where p_size in (select avg(pp.p_size) from part pp where pp.p_partkey = part.p_partkey);

-- corr IN with aggregate other than COUNT (MIN) with non-equi join
explain select * from part where p_size in (select min(pp.p_size) from part pp where pp.p_partkey > part.p_partkey);
select * from part where p_size in (select min(pp.p_size) from part pp where pp.p_partkey > part.p_partkey);

-- corr IN with COUNT aggregate
explain select * from part where p_size NOT IN (select count(*) from part pp where pp.p_type = part.p_type);
select * from part where p_size NOT IN (select count(*) from part pp where pp.p_type = part.p_type);

-- corr IN with aggregate other than COUNT
explain select * from part where p_size not in (select avg(pp.p_size) from part pp where pp.p_partkey = part.p_partkey);
select * from part where p_size not in (select avg(pp.p_size) from part pp where pp.p_partkey = part.p_partkey);

create table t_n22(i int);
insert into t_n22 values(1);
insert into t_n22 values(0);

create table tempty_n2(i int);

-- uncorr sub with aggregate which produces result irrespective of zero rows
explain select * from t_n22 where i IN (select count(*) from tempty_n2);
select * from t_n22 where i IN (select count(*) from tempty_n2);

drop table t_n22;

create table tnull_n2(i int);
insert into tnull_n2 values(NULL) , (NULL);

-- empty inner table, non-null sq key, expected empty result
select * from part where p_size IN (select i from tempty_n2);

-- empty inner table, null sq key, expected empty result
select * from tnull_n2 where i IN (select i from tempty_n2);

-- null inner table, non-null sq key
select * from part where p_size IN (select i from tnull_n2);

-- null inner table, null sq key
select * from tnull_n2 where i IN (select i from tnull_n2);

drop table tempty_n2;

create table t_n22(i int, j int);
insert into t_n22 values(0,1), (0,2);

create table tt_n2(i int, j int);
insert into tt_n2 values(0,3);

-- corr IN with aggregate other than COUNT return zero rows
explain select * from t_n22 where i IN (select sum(i) from tt_n2 where tt_n2.j = t_n22.j);
select * from t_n22 where i IN (select sum(i) from tt_n2 where tt_n2.j = t_n22.j);

drop table t_n22;
drop table tt_n2;

-- since inner query has aggregate it will be joined with outer to get all possible corrrelated values
explain select * from part where p_size IN (select max(p_size) from part p where p.p_type <> part.p_name);
select * from part where p_size IN (select max(p_size) from part p where p.p_type <> part.p_name);

-- inner query has join so should have a join with outer query to fetch all corr values
explain select * from part where p_size IN (select pp.p_size from part p join part pp on pp.p_type = p.p_type where part.p_type <> p.p_name);
select * from part where p_size IN (select pp.p_size from part p join part pp on pp.p_type = p.p_type where part.p_type <> p.p_name);


