| --! qt:dataset:src |
| set hive.mapred.mode=nonstrict; |
| ; |
| |
| set hive.exec.reducers.max = 1; |
| set hive.explain.user=false; |
| |
| CREATE TABLE tbl1_n4(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; |
| CREATE TABLE tbl2_n3(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; |
| CREATE TABLE tbl3_n0(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; |
| CREATE TABLE tbl4(key int, value string) CLUSTERED BY (value) SORTED BY (value) INTO 2 BUCKETS; |
| |
| insert overwrite table tbl1_n4 select * from src; |
| insert overwrite table tbl2_n3 select * from src; |
| insert overwrite table tbl3_n0 select * from src; |
| insert overwrite table tbl4 select * from src; |
| |
| set hive.auto.convert.sortmerge.join=true; |
| set hive.optimize.bucketmapjoin = true; |
| set hive.optimize.bucketmapjoin.sortedmerge = true; |
| set hive.auto.convert.join=true; |
| set hive.auto.convert.join.noconditionaltask=true; |
| set hive.auto.convert.join.noconditionaltask.size=200; |
| set hive.auto.convert.sortmerge.join.to.mapjoin=false; |
| |
| -- A SMB join is being followed by a regular join on a non-bucketed table on a different key |
| |
| -- Three tests below are all the same query with different alias, which changes dispatch order of GenMapRedWalker |
| -- This is dependent to iteration order of HashMap, so can be meaningless in non-sun jdk |
| -- b = TS[0]-OP[13]-MAPJOIN[11]-RS[6]-JOIN[8]-SEL[9]-FS[10] |
| -- c = TS[1]-RS[7]-JOIN[8] |
| -- a = TS[2]-MAPJOIN[11] |
| explain select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join src c on c.value = a.value; |
| select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join src c on c.value = a.value; |
| |
| -- d = TS[0]-RS[7]-JOIN[8]-SEL[9]-FS[10] |
| -- b = TS[1]-OP[13]-MAPJOIN[11]-RS[6]-JOIN[8] |
| -- a = TS[2]-MAPJOIN[11] |
| explain select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join src d on d.value = a.value; |
| select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join src d on d.value = a.value; |
| |
| -- b = TS[0]-OP[13]-MAPJOIN[11]-RS[6]-JOIN[8]-SEL[9]-FS[10] |
| -- a = TS[1]-MAPJOIN[11] |
| -- h = TS[2]-RS[7]-JOIN[8] |
| explain select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join src h on h.value = a.value; |
| select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join src h on h.value = a.value; |
| |
| -- A SMB join is being followed by a regular join on a non-bucketed table on the same key |
| explain select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join src c on c.key = a.key; |
| select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join src c on c.key = a.key; |
| |
| -- A SMB join is being followed by a regular join on a bucketed table on the same key |
| explain select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join tbl3_n0 c on c.key = a.key; |
| select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join tbl3_n0 c on c.key = a.key; |
| |
| -- The join order ensures there is no SMB |
| explain select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join tbl4 c on c.value = a.value; |
| select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join tbl4 c on c.value = a.value; |
| |
| set hive.auto.convert.sortmerge.join.to.mapjoin=true; |
| |
| -- A SMB join is being followed by a regular join on a non-bucketed table on a different key |
| explain select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join src c on c.value = a.value; |
| select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join src c on c.value = a.value; |
| |
| -- A SMB join is being followed by a regular join on a non-bucketed table on the same key |
| explain select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join src c on c.key = a.key; |
| select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join src c on c.key = a.key; |
| |
| -- A SMB join is being followed by a regular join on a bucketed table on the same key |
| explain select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join tbl3_n0 c on c.key = a.key; |
| select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join tbl3_n0 c on c.key = a.key; |
| |
| -- The join order ensures there is no SMB |
| explain select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join tbl4 c on c.value = a.value; |
| select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join tbl4 c on c.value = a.value; |