blob: 0aeec0e50fa3dcff0e20f77cf9541d67c141d4db [file] [log] [blame]
--! qt:dataset:src
set hive.mapred.mode=nonstrict;
;
set hive.exec.reducers.max = 1;
set hive.explain.user=false;
CREATE TABLE tbl1_n4(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
CREATE TABLE tbl2_n3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
CREATE TABLE tbl3_n0(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
CREATE TABLE tbl4(key int, value string) CLUSTERED BY (value) SORTED BY (value) INTO 2 BUCKETS;
insert overwrite table tbl1_n4 select * from src;
insert overwrite table tbl2_n3 select * from src;
insert overwrite table tbl3_n0 select * from src;
insert overwrite table tbl4 select * from src;
set hive.auto.convert.sortmerge.join=true;
set hive.optimize.bucketmapjoin = true;
set hive.optimize.bucketmapjoin.sortedmerge = true;
set hive.auto.convert.join=true;
set hive.auto.convert.join.noconditionaltask=true;
set hive.auto.convert.join.noconditionaltask.size=200;
set hive.auto.convert.sortmerge.join.to.mapjoin=false;
-- A SMB join is being followed by a regular join on a non-bucketed table on a different key
-- Three tests below are all the same query with different alias, which changes dispatch order of GenMapRedWalker
-- This is dependent to iteration order of HashMap, so can be meaningless in non-sun jdk
-- b = TS[0]-OP[13]-MAPJOIN[11]-RS[6]-JOIN[8]-SEL[9]-FS[10]
-- c = TS[1]-RS[7]-JOIN[8]
-- a = TS[2]-MAPJOIN[11]
explain select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join src c on c.value = a.value;
select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join src c on c.value = a.value;
-- d = TS[0]-RS[7]-JOIN[8]-SEL[9]-FS[10]
-- b = TS[1]-OP[13]-MAPJOIN[11]-RS[6]-JOIN[8]
-- a = TS[2]-MAPJOIN[11]
explain select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join src d on d.value = a.value;
select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join src d on d.value = a.value;
-- b = TS[0]-OP[13]-MAPJOIN[11]-RS[6]-JOIN[8]-SEL[9]-FS[10]
-- a = TS[1]-MAPJOIN[11]
-- h = TS[2]-RS[7]-JOIN[8]
explain select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join src h on h.value = a.value;
select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join src h on h.value = a.value;
-- A SMB join is being followed by a regular join on a non-bucketed table on the same key
explain select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join src c on c.key = a.key;
select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join src c on c.key = a.key;
-- A SMB join is being followed by a regular join on a bucketed table on the same key
explain select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join tbl3_n0 c on c.key = a.key;
select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join tbl3_n0 c on c.key = a.key;
-- The join order ensures there is no SMB
explain select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join tbl4 c on c.value = a.value;
select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join tbl4 c on c.value = a.value;
set hive.auto.convert.sortmerge.join.to.mapjoin=true;
-- A SMB join is being followed by a regular join on a non-bucketed table on a different key
explain select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join src c on c.value = a.value;
select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join src c on c.value = a.value;
-- A SMB join is being followed by a regular join on a non-bucketed table on the same key
explain select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join src c on c.key = a.key;
select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join src c on c.key = a.key;
-- A SMB join is being followed by a regular join on a bucketed table on the same key
explain select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join tbl3_n0 c on c.key = a.key;
select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join tbl3_n0 c on c.key = a.key;
-- The join order ensures there is no SMB
explain select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join tbl4 c on c.value = a.value;
select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join tbl4 c on c.value = a.value;