blob: acb3c23da8f01c624b10daafc35ac52a462f39b7 [file] [log] [blame]
--! qt:dataset:src
set hive.mapred.mode=nonstrict;
set hive.auto.convert.anti.join=true;
-- SORT_QUERY_RESULTS
-- data setup
DROP TABLE IF EXISTS part_subq;
CREATE TABLE part_subq(
p_partkey INT,
p_name STRING,
p_mfgr STRING,
p_brand STRING,
p_type STRING,
p_size INT,
p_container STRING,
p_retailprice DOUBLE,
p_comment STRING
);
LOAD DATA LOCAL INPATH '../../data/files/tpch/tiny/part.tbl.bz2' overwrite into table part_subq;
-- non agg, non corr
explain
select key, count(*)
from src
group by key
having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key )
;
select s1.key, count(*) from src s1 where s1.key > '9' group by s1.key;
select key, count(*)
from src
group by key
having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key )
;
set hive.optimize.correlation=false;
-- agg, non corr
explain
select p_mfgr, avg(p_size)
from part_subq b
group by b.p_mfgr
having b.p_mfgr in
(select p_mfgr
from part_subq
group by p_mfgr
having max(p_size) - min(p_size) < 20
)
;
set hive.optimize.correlation=true;
-- agg, non corr
explain
select p_mfgr, avg(p_size)
from part_subq b
group by b.p_mfgr
having b.p_mfgr in
(select p_mfgr
from part_subq
group by p_mfgr
having max(p_size) - min(p_size) < 20
)
;
-- join on agg
select b.key, min(b.value)
from src b
group by b.key
having b.key in ( select a.key
from src a
where a.value > 'val_9' and a.value = min(b.value)
)
;
-- where and having
-- Plan is:
-- Stage 1: b semijoin sq1:src (subquery in where)
-- Stage 2: group by Stage 1 o/p
-- Stage 5: group by on sq2:src (subquery in having)
-- Stage 6: Stage 2 o/p semijoin Stage 5
explain
select key, value, count(*)
from src b
where b.key in (select key from src where src.key > '8')
group by key, value
having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key )
;
select key, value, count(*)
from src b
where b.key in (select key from src where src.key > '8')
group by key, value
having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key )
;
set hive.auto.convert.join=true;
-- Plan is:
-- Stage 5: group by on sq2:src (subquery in having)
-- Stage 10: hashtable for sq1:src (subquery in where)
-- Stage 2: b map-side semijoin Stage 10 o/p
-- Stage 3: Stage 2 semijoin Stage 5
-- Stage 9: construct hastable for Stage 5 o/p
-- Stage 6: Stage 2 map-side semijoin Stage 9
explain
select key, value, count(*)
from src b
where b.key in (select key from src where src.key > '8')
group by key, value
having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key )
;
-- both having and where corr
explain
select key, value, count(*)
from src b
where b.key in (select key from src where src.value = b.value)
group by key, value
having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key )
;
select key, value, count(*)
from src b
where b.key in (select key from src where src.value = b.value)
group by key, value
having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key )
;
-- non agg, non corr, windowing
explain
select p_mfgr, p_name, avg(p_size)
from part_subq
group by p_mfgr, p_name
having p_name in
(select first_value(p_name) over(partition by p_mfgr order by p_size) from part_subq)
;
CREATE TABLE src_null_n4 (key STRING COMMENT 'default', value STRING COMMENT 'default') STORED AS TEXTFILE;
LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" INTO TABLE src_null_n4;
INSERT INTO src_null_n4 values('5444', null);
explain
select key, value, count(*)
from src_null_n4 b
where NOT EXISTS (select key from src_null_n4 where src_null_n4.value <> b.value)
group by key, value
having count(*) not in (select count(*) from src_null_n4 s1 where s1.key > '9' and s1.value <> b.value group by s1.key );
set hive.auto.convert.anti.join=false;
select key, value, count(*)
from src_null_n4 b
where NOT EXISTS (select key from src_null_n4 where src_null_n4.value <> b.value)
group by key, value
having count(*) not in (select count(*) from src_null_n4 s1 where s1.key > '9' and s1.value <> b.value group by s1.key );
explain
select key, value, count(*)
from src_null_n4 b
where NOT EXISTS (select key from src_null_n4 where src_null_n4.value <> b.value)
group by key, value
having count(*) not in (select count(*) from src_null_n4 s1 where s1.key > '9' and s1.value <> b.value group by s1.key );
select key, value, count(*)
from src_null_n4 b
where NOT EXISTS (select key from src_null_n4 where src_null_n4.value <> b.value)
group by key, value
having count(*) not in (select count(*) from src_null_n4 s1 where s1.key > '9' and s1.value <> b.value group by s1.key );
DROP TABLE src_null_n4;
DROP TABLE part_subq;