blob: 598b67150140f91d394c2d72a6ac815b7710d62b [file] [log] [blame]
--! qt:dataset:src
--! qt:dataset:part
--! qt:dataset:lineitem
set hive.mapred.mode=nonstrict;
set hive.explain.user=false;
-- SORT_QUERY_RESULTS
create table tnull_n0(i int, c char(2));
insert into tnull_n0 values(NULL, NULL), (NULL, NULL);
create table tempty_n0(c char(2));
CREATE TABLE part_null_n0(
p_partkey INT,
p_name STRING,
p_mfgr STRING,
p_brand STRING,
p_type STRING,
p_size INT,
p_container STRING,
p_retailprice DOUBLE,
p_comment_n11 STRING
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ","
;
LOAD DATA LOCAL INPATH '../../data/files/part_tiny_nulls.txt' overwrite into table part_null_n0;
insert into part_null_n0 values(78487,NULL,'Manufacturer#6','Brand#52','LARGE BRUSHED BRASS', 23, 'MED BAG',1464.48,'hely blith');
-- non corr, simple less than
explain select * from part where p_size > (select avg(p_size) from part_null_n0);
select * from part where p_size > (select avg(p_size) from part_null_n0);
-- non corr, empty
select * from part where p_size > (select * from tempty_n0);
explain select * from part where p_size > (select * from tempty_n0);
-- non corr, null comparison
explain select * from part where p_name = (select p_name from part_null_n0 where p_name is null);
select * from part where p_name = (select p_name from part_null_n0 where p_name is null);
-- non corr, is null
explain select * from part where (select i from tnull_n0 limit 1) is null;
select * from part where (select i from tnull_n0 limit 1) is null;
-- non corr, is not null
explain select * from part where (select max(p_name) from part_null_n0) is not null;
select * from part where (select max(p_name) from part_null_n0) is not null;
-- non corr, between
explain select * from part where p_size between (select min(p_size) from part) and (select avg(p_size) from part);
select * from part where p_size between (select min(p_size) from part) and (select avg(p_size) from part);
-- non corr, windowing
explain select p_mfgr, p_name, p_size from part
where part.p_size >
(select first_value(p_size) over(partition by p_mfgr order by p_size) as fv from part order by fv limit 1);
select p_mfgr, p_name, p_size from part
where part.p_size >
(select first_value(p_size) over(partition by p_mfgr order by p_size) as fv from part order by fv limit 1);
-- lhs contain complex expressions
explain select * from part where (p_partkey*p_size) <> (select min(p_partkey) from part);
select * from part where (p_partkey*p_size) <> (select min(p_partkey) from part);
-- corr, lhs contain complex expressions
explain select count(*) as c from part as e where p_size + 100 < (select max(p_partkey) from part where p_name = e.p_name);
select count(*) as c from part as e where p_size + 100 < (select max(p_partkey) from part where p_name = e.p_name);
-- corr, lhs contain constant expressions (HIVE-16689)
explain select count(*) as c from part as e where 100 < (select max(p_partkey) from part where p_name = e.p_name);
select count(*) as c from part as e where 100 < (select max(p_partkey) from part where p_name = e.p_name);
-- corr, equi-join predicate
explain select * from part where p_size > (select avg(p_size) from part_null_n0 where part_null_n0.p_type = part.p_type);
select * from part where p_size > (select avg(p_size) from part_null_n0 where part_null_n0.p_type = part.p_type);
-- mix of corr and uncorr
explain select * from part where p_size BETWEEN (select min(p_size) from part_null_n0 where part_null_n0.p_type = part.p_type) AND (select max(p_size) from part_null_n0);
select * from part where p_size BETWEEN (select min(p_size) from part_null_n0 where part_null_n0.p_type = part.p_type) AND (select max(p_size) from part_null_n0);
-- mix of corr and uncorr
explain select * from part where p_size >= (select min(p_size) from part_null_n0 where part_null_n0.p_type = part.p_type) AND p_retailprice <= (select max(p_retailprice) from part_null_n0);
select * from part where p_size >= (select min(p_size) from part_null_n0 where part_null_n0.p_type = part.p_type) AND p_retailprice <= (select max(p_retailprice) from part_null_n0);
-- mix of scalar and IN corr
explain select * from part where p_brand <> (select min(p_brand) from part ) AND p_size IN (select (p_size) from part p where p.p_type = part.p_type ) AND p_size <> 340;
select * from part where p_brand <> (select min(p_brand) from part ) AND p_size IN (select (p_size) from part p where p.p_type = part.p_type ) AND p_size <> 340;
-- multiple corr var with scalar query
explain select * from part where p_size <> (select count(p_name) from part p where p.p_size = part.p_size AND part.p_partkey= p.p_partkey );
select * from part where p_size <> (select count(p_name) from part p where p.p_size = part.p_size AND part.p_partkey= p.p_partkey );
-- where + having
explain select key, count(*) from src where value <> (select max(value) from src) group by key having count(*) > (select count(*) from src s1 where s1.key = '90' group by s1.key );
select key, count(*) from src where value <> (select max(value) from src) group by key having count(*) > (select count(*) from src s1 where s1.key = '90' group by s1.key );
explain select sum(p_retailprice) from part group by p_type having sum(p_retailprice) > (select max(pp.p_retailprice) from part pp);
select sum(p_retailprice) from part group by p_type having sum(p_retailprice) > (select max(pp.p_retailprice) from part pp);
-- scalar subquery with INTERSECT
explain select * from part where p_size > (select count(p_name) from part INTERSECT select count(p_brand) from part);
select * from part where p_size > (select count(p_name) from part INTERSECT select count(p_brand) from part);
-- join in subquery
explain select p_partkey from part where p_name like (select max(p.p_name) from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size);
select p_partkey from part where p_name like (select max(p.p_name) from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size);
-- mix of NOT IN and scalar
explain select * from part_null_n0 where p_name NOT LIKE (select min(p_name) from part_null_n0) AND p_brand NOT IN (select p_name from part);
select * from part_null_n0 where p_name NOT LIKE (select min(p_name) from part_null_n0) AND p_brand NOT IN (select p_name from part);
-- mix of NOT IN and corr scalar
explain select * from part_null_n0 where p_brand NOT IN (select p_name from part) AND p_name NOT LIKE (select min(p_name) from part_null_n0 pp where part_null_n0.p_type = pp.p_type);
select * from part_null_n0 where p_brand NOT IN (select p_name from part) AND p_name NOT LIKE (select min(p_name) from part_null_n0 pp where part_null_n0.p_type = pp.p_type);
-- non corr, with join in parent query
explain select p.p_partkey, li.l_suppkey
from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey
where li.l_linenumber = 1 and
li.l_orderkey <> (select min(l_orderkey) from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber)
;
select p.p_partkey, li.l_suppkey
from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey
where li.l_linenumber = 1 and
li.l_orderkey <> (select min(l_orderkey) from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber)
;
-- corr, with join in outer query
explain select p.p_partkey, li.l_suppkey
from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey
where li.l_linenumber = 1 and
li.l_orderkey <> (select min(l_orderkey) from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber);
select p.p_partkey, li.l_suppkey
from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey
where li.l_linenumber = 1 and
li.l_orderkey <> (select min(l_orderkey) from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber);
-- corr, aggregate in outer
explain select sum(l_extendedprice) from lineitem, part where p_partkey = l_partkey and l_quantity > (select avg(l_quantity) from lineitem where l_partkey = p_partkey);
select sum(l_extendedprice) from lineitem, part where p_partkey = l_partkey and l_quantity > (select avg(l_quantity) from lineitem where l_partkey = p_partkey);
-- nested with scalar
explain select * from part_null_n0 where p_name IN (select p_name from part where part.p_type = part_null_n0.p_type AND p_brand NOT LIKE (select min(p_brand) from part pp where part.p_type = pp.p_type));
select * from part_null_n0 where p_name IN (select p_name from part where part.p_type = part_null_n0.p_type AND p_brand NOT LIKE (select min(p_brand) from part pp where part.p_type = pp.p_type));
-- non corr, is null , is not converted to anti join.
explain select * from part where (select i from tnull_n0 limit 1) is null;
select * from part where (select i from tnull_n0 limit 1) is null;
drop table tnull_n0;
drop table part_null_n0;
drop table tempty_n0;
create table EMPS_n4(EMPNO int,NAME string,DEPTNO int,GENDER string,CITY string,EMPID int,AGE int,SLACKER boolean,MANAGER boolean,JOINEDAT date);
insert into EMPS_n4 values (100,'Fred',10,NULL,NULL,30,25,true,false,'1996-08-03');
insert into EMPS_n4 values (110,'Eric',20,'M','San Francisco',3,80,NULL,false,'2001-01-01') ;
insert into EMPS_n4 values (110,'John',40,'M','Vancouver',2,NULL,false,true,'2002-05-03');
insert into EMPS_n4 values (120,'Wilma',20,'F',NULL,1,5,NULL,true,'2005-09-07');
insert into EMPS_n4 values (130,'Alice',40,'F','Vancouver',2,NULL,false,true,'2007-01-01');
create table DEPTS_n3(deptno int, name string);
insert into DEPTS_n3 values( 10,'Sales');
insert into DEPTS_n3 values( 20,'Marketing');
insert into DEPTS_n3 values( 30,'Accounts');
-- corr, scalar, with count aggregate
explain select * from emps_n4 where deptno <> (select count(deptno) from depts_n3 where depts_n3.name = emps_n4.name);
select * from emps_n4 where deptno <> (select count(deptno) from depts_n3 where depts_n3.name = emps_n4.name);
explain select * from emps_n4 where name > (select min(name) from depts_n3 where depts_n3.deptno=emps_n4.deptno);
select * from emps_n4 where name > (select min(name) from depts_n3 where depts_n3.deptno=emps_n4.deptno);
-- corr, scalar multiple subq with count aggregate
explain select * from emps_n4 where deptno <> (select count(deptno) from depts_n3 where depts_n3.name = emps_n4.name) and empno > (select count(name) from depts_n3 where depts_n3.deptno = emps_n4.deptno);
select * from emps_n4 where deptno <> (select count(deptno) from depts_n3 where depts_n3.name = emps_n4.name) and empno > (select count(name) from depts_n3 where depts_n3.deptno = emps_n4.deptno);
-- mix of corr, uncorr with aggregate
explain select * from emps_n4 where deptno <> (select sum(deptno) from depts_n3 where depts_n3.name = emps_n4.name) and empno > (select count(name) from depts_n3);
select * from emps_n4 where deptno <> (select count(deptno) from depts_n3 where depts_n3.name = emps_n4.name) and empno > (select count(name) from depts_n3);
drop table DEPTS_n3;
drop table EMPS_n4;
-- having
explain
select key, count(*)
from src
group by key
having count(*) > (select count(*) from src s1 where s1.key > '9' )
;
select key, count(*)
from src
group by key
having count(*) > (select count(*) from src s1 where s1.key = '90')
;
explain
select key, value, count(*)
from src b
where b.key in (select key from src where src.key > '8')
group by key, value
having count(*) > (select count(*) from src s1 where s1.key > '9' )
;
select key, value, count(*)
from src b
where b.key in (select key from src where src.key > '8')
group by key, value
having count(*) > (select count(*) from src s1 where s1.key > '9' )
;
-- since subquery has implicit group by this should have sq_count_check (HIVE-16793)
explain select * from part where p_size > (select max(p_size) from part group by p_type);
-- same as above, for correlated columns
explain select * from part where p_size > (select max(p_size) from part p where p.p_type = part.p_type group by p_type);
-- corr scalar subquery with aggregate, having non-equi corr predicate
explain select * from part where p_size <>
(select count(p_size) from part pp where part.p_type <> pp.p_type);
select * from part where p_size <>
(select count(p_size) from part pp where part.p_type <> pp.p_type);
create table t_n11(i int, j int);
insert into t_n11 values(3,1), (1,1);
-- for t_n11.i=1 inner query will result empty result, making count(*) = 0
-- therefore where predicate will be true
explain select * from t_n11 where 0 = (select count(*) from t_n11 tt_n11 where tt_n11.j <> t_n11.i);
select * from t_n11 where 0 = (select count(*) from t_n11 tt_n11 where tt_n11.j <> t_n11.i);
-- same as above but with avg aggregate, avg(tt_n11.i) will be null therefore
-- empty result set
explain select * from t_n11 where 0 = (select avg(tt_n11.i) from t_n11 tt_n11 where tt_n11.j <> t_n11.i);
select * from t_n11 where 0 = (select avg(tt_n11.i) from t_n11 tt_n11 where tt_n11.j <> t_n11.i);
create table tempty_n0(i int, j int);
-- following query has subquery on empty making count(*) to zero and where predicate
-- to true for all rows in outer query
explain select * from t_n11 where 0 = (select count(*) from tempty_n0 tt_n11 where t_n11.i=tt_n11.i);
select * from t_n11 where 0 = (select count(*) from tempty_n0 tt_n11 where t_n11.i=tt_n11.i);
-- same as above but with min aggregate, since min on empty will return null
-- making where predicate false for all
explain select * from t_n11 where 0 = (select min(tt_n11.j) from tempty_n0 tt_n11 where t_n11.i=tt_n11.i);
select * from t_n11 where 0 = (select min(tt_n11.j) from tempty_n0 tt_n11 where t_n11.i=tt_n11.i);
drop table t_n11;
drop table tempty_n0;
-- following queries shouldn't have a join with sq_count_check
explain select key, count(*) from src group by key having count(*) >
(select count(*) from src s1 group by 4);
explain select key, count(*) from src group by key having count(*) >
(select count(*) from src s1 where s1.key = '90' group by s1.key );
CREATE TABLE `store_sales`(
`ss_sold_date_sk` int,
`ss_quantity` int,
`ss_list_price` decimal(7,2));
CREATE TABLE `date_dim`(
`d_date_sk` int,
`d_year` int);
explain cbo with avg_sales as
(select avg(quantity*list_price) average_sales
from (select ss_quantity quantity
,ss_list_price list_price
from store_sales
,date_dim
where ss_sold_date_sk = d_date_sk
and d_year between 1999 and 2001 ) x)
select * from store_sales where ss_list_price > (select average_sales from avg_sales);
-- this one should have sq_count_check branch because it contains windowing function
explain cbo with avg_sales as
(select avg(quantity*list_price) over( partition by list_price) average_sales
from (select ss_quantity quantity
,ss_list_price list_price
from store_sales
,date_dim
where ss_sold_date_sk = d_date_sk
and d_year between 1999 and 2001 ) x)
select * from store_sales where ss_list_price > (select average_sales from avg_sales);
DROP TABLE store_sales;
DROP TABLE date_dim;