blob: 02309982b70028b07474c6ef05af2643e75dce1d [file] [log] [blame]
--! qt:disabled:Disabled in HIVE-19509
set hive.strict.checks.bucketing=false;
set hive.mapred.mode=nonstrict;
set hive.explain.user=false;
set hive.auto.convert.join=true;
set hive.auto.convert.join.noconditionaltask=true;
set hive.auto.convert.join.noconditionaltask.size=10000;
CREATE TABLE srcbucket_mapjoin_n14(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
CREATE TABLE tab_part_n9 (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
CREATE TABLE srcbucket_mapjoin_part_n15 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_n14 partition(ds='2008-04-08');
load data local inpath '../../data/files/bmj1/000001_0' INTO TABLE srcbucket_mapjoin_n14 partition(ds='2008-04-08');
load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part_n15 partition(ds='2008-04-08');
load data local inpath '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part_n15 partition(ds='2008-04-08');
load data local inpath '../../data/files/bmj/000002_0' INTO TABLE srcbucket_mapjoin_part_n15 partition(ds='2008-04-08');
load data local inpath '../../data/files/bmj/000003_0' INTO TABLE srcbucket_mapjoin_part_n15 partition(ds='2008-04-08');
-- SORT_QUERY_RESULTS
set hive.optimize.bucketingsorting=false;
insert overwrite table tab_part_n9 partition (ds='2008-04-08')
select key,value from srcbucket_mapjoin_part_n15;
CREATE TABLE tab_n8(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
insert overwrite table tab_n8 partition (ds='2008-04-08')
select key,value from srcbucket_mapjoin_n14;
analyze table srcbucket_mapjoin_n14 compute statistics for columns;
analyze table srcbucket_mapjoin_part_n15 compute statistics for columns;
analyze table tab_n8 compute statistics for columns;
analyze table tab_part_n9 compute statistics for columns;
set hive.convert.join.bucket.mapjoin.tez = false;
explain
select a.key, a.value, b.value
from tab_n8 a join tab_part_n9 b on a.key = b.key order by a.key, a.value, b.value;
select a.key, a.value, b.value
from tab_n8 a join tab_part_n9 b on a.key = b.key order by a.key, a.value, b.value;
set hive.convert.join.bucket.mapjoin.tez = true;
explain
select a.key, a.value, b.value
from tab_n8 a join tab_part_n9 b on a.key = b.key order by a.key, a.value, b.value;
select a.key, a.value, b.value
from tab_n8 a join tab_part_n9 b on a.key = b.key order by a.key, a.value, b.value;
set hive.auto.convert.join.noconditionaltask.size=900;
set hive.convert.join.bucket.mapjoin.tez = false;
explain
select count(*)
from
(select distinct key from tab_part_n9) a join tab_n8 b on a.key = b.key;
select count(*)
from
(select distinct key from tab_part_n9) a join tab_n8 b on a.key = b.key;
set hive.convert.join.bucket.mapjoin.tez = true;
explain
select count(*)
from
(select distinct key from tab_part_n9) a join tab_n8 b on a.key = b.key;
select count(*)
from
(select distinct key from tab_part_n9) a join tab_n8 b on a.key = b.key;
set hive.convert.join.bucket.mapjoin.tez = false;
explain
select count(*)
from
(select a.key as key, a.value as value from tab_n8 a join tab_part_n9 b on a.key = b.key) c
join
tab_part_n9 d on c.key = d.key;
select count(*)
from
(select a.key as key, a.value as value from tab_n8 a join tab_part_n9 b on a.key = b.key) c
join
tab_part_n9 d on c.key = d.key;
set hive.convert.join.bucket.mapjoin.tez = true;
explain
select count(*)
from
(select a.key as key, a.value as value from tab_n8 a join tab_part_n9 b on a.key = b.key) c
join
tab_part_n9 d on c.key = d.key;
select count(*)
from
(select a.key as key, a.value as value from tab_n8 a join tab_part_n9 b on a.key = b.key) c
join
tab_part_n9 d on c.key = d.key;
set hive.convert.join.bucket.mapjoin.tez = false;
explain
select count(*)
from
tab_part_n9 d
join
(select a.key as key, a.value as value from tab_n8 a join tab_part_n9 b on a.key = b.key) c on c.key = d.key;
select count(*)
from
tab_part_n9 d
join
(select a.key as key, a.value as value from tab_n8 a join tab_part_n9 b on a.key = b.key) c on c.key = d.key;
set hive.convert.join.bucket.mapjoin.tez = true;
explain
select count(*)
from
tab_part_n9 d
join
(select a.key as key, a.value as value from tab_n8 a join tab_part_n9 b on a.key = b.key) c on c.key = d.key;
select count(*)
from
tab_part_n9 d
join
(select a.key as key, a.value as value from tab_n8 a join tab_part_n9 b on a.key = b.key) c on c.key = d.key;
-- one side is really bucketed. srcbucket_mapjoin_n14 is not really a bucketed table.
-- In this case the sub-query is chosen as the big table.
set hive.convert.join.bucket.mapjoin.tez = false;
set hive.auto.convert.join.noconditionaltask.size=1000;
explain
select a.k1, a.v1, b.value
from (select sum(substr(srcbucket_mapjoin_n14.value,5)) as v1, key as k1 from srcbucket_mapjoin_n14 GROUP BY srcbucket_mapjoin_n14.key) a
join tab_n8 b on a.k1 = b.key;
set hive.convert.join.bucket.mapjoin.tez = true;
explain
select a.k1, a.v1, b.value
from (select sum(substr(srcbucket_mapjoin_n14.value,5)) as v1, key as k1 from srcbucket_mapjoin_n14 GROUP BY srcbucket_mapjoin_n14.key) a
join tab_n8 b on a.k1 = b.key;
set hive.convert.join.bucket.mapjoin.tez = false;
explain
select a.k1, a.v1, b.value
from (select sum(substr(tab_n8.value,5)) as v1, key as k1 from tab_part_n9 join tab_n8 on tab_part_n9.key = tab_n8.key GROUP BY tab_n8.key) a
join tab_n8 b on a.k1 = b.key;
set hive.convert.join.bucket.mapjoin.tez = true;
explain
select a.k1, a.v1, b.value
from (select sum(substr(tab_n8.value,5)) as v1, key as k1 from tab_part_n9 join tab_n8 on tab_part_n9.key = tab_n8.key GROUP BY tab_n8.key) a
join tab_n8 b on a.k1 = b.key;
set hive.convert.join.bucket.mapjoin.tez = false;
explain
select a.k1, a.v1, b.value
from (select sum(substr(x.value,5)) as v1, x.key as k1 from tab_n8 x join tab_n8 y on x.key = y.key GROUP BY x.key) a
join tab_part_n9 b on a.k1 = b.key;
set hive.convert.join.bucket.mapjoin.tez = true;
explain
select a.k1, a.v1, b.value
from (select sum(substr(x.value,5)) as v1, x.key as k1 from tab_n8 x join tab_n8 y on x.key = y.key GROUP BY x.key) a
join tab_part_n9 b on a.k1 = b.key;
-- multi-way join
set hive.convert.join.bucket.mapjoin.tez = false;
set hive.auto.convert.join.noconditionaltask.size=20000;
explain
select a.key, a.value, b.value
from tab_part_n9 a join tab_n8 b on a.key = b.key join tab_n8 c on a.key = c.key;
set hive.convert.join.bucket.mapjoin.tez = true;
explain
select a.key, a.value, b.value
from tab_part_n9 a join tab_n8 b on a.key = b.key join tab_n8 c on a.key = c.key;
set hive.convert.join.bucket.mapjoin.tez = false;
explain
select a.key, a.value, c.value
from (select x.key, x.value from tab_part_n9 x join tab_n8 y on x.key = y.key) a join tab_n8 c on a.key = c.key;
set hive.convert.join.bucket.mapjoin.tez = true;
explain
select a.key, a.value, c.value
from (select x.key, x.value from tab_part_n9 x join tab_n8 y on x.key = y.key) a join tab_n8 c on a.key = c.key;
-- in this case sub-query is the small table
set hive.convert.join.bucket.mapjoin.tez = false;
set hive.auto.convert.join.noconditionaltask.size=900;
explain
select a.key, a.value, b.value
from (select key, sum(substr(srcbucket_mapjoin_n14.value,5)) as value from srcbucket_mapjoin_n14 GROUP BY srcbucket_mapjoin_n14.key) a
join tab_part_n9 b on a.key = b.key;
set hive.convert.join.bucket.mapjoin.tez = true;
explain
select a.key, a.value, b.value
from (select key, sum(substr(srcbucket_mapjoin_n14.value,5)) as value from srcbucket_mapjoin_n14 GROUP BY srcbucket_mapjoin_n14.key) a
join tab_part_n9 b on a.key = b.key;
set hive.convert.join.bucket.mapjoin.tez = false;
set hive.map.aggr=false;
explain
select a.key, a.value, b.value
from (select key, sum(substr(srcbucket_mapjoin_n14.value,5)) as value from srcbucket_mapjoin_n14 GROUP BY srcbucket_mapjoin_n14.key) a
join tab_part_n9 b on a.key = b.key;
set hive.convert.join.bucket.mapjoin.tez = true;
explain
select a.key, a.value, b.value
from (select key, sum(substr(srcbucket_mapjoin_n14.value,5)) as value from srcbucket_mapjoin_n14 GROUP BY srcbucket_mapjoin_n14.key) a
join tab_part_n9 b on a.key = b.key;
-- join on non-bucketed column results in shuffle join.
set hive.convert.join.bucket.mapjoin.tez = false;
explain
select a.key, a.value, b.value
from tab_n8 a join tab_part_n9 b on a.value = b.value;
set hive.convert.join.bucket.mapjoin.tez = true;
explain
select a.key, a.value, b.value
from tab_n8 a join tab_part_n9 b on a.value = b.value;
CREATE TABLE tab1_n4(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
insert overwrite table tab1_n4
select key,value from srcbucket_mapjoin_n14;
set hive.auto.convert.join.noconditionaltask.size=20000;
set hive.convert.join.bucket.mapjoin.tez = false;
explain
select a.key, a.value, b.value
from tab1_n4 a join tab_part_n9 b on a.key = b.key;
set hive.convert.join.bucket.mapjoin.tez = true;
explain
select a.key, a.value, b.value
from tab1_n4 a join tab_part_n9 b on a.key = b.key;
-- No map joins should be created.
set hive.convert.join.bucket.mapjoin.tez = false;
set hive.auto.convert.join.noconditionaltask.size=15000;
explain select a.key, b.key from tab_part_n9 a join tab_part_n9 c on a.key = c.key join tab_part_n9 b on a.value = b.value;
set hive.convert.join.bucket.mapjoin.tez = true;
explain select a.key, b.key from tab_part_n9 a join tab_part_n9 c on a.key = c.key join tab_part_n9 b on a.value = b.value;
set hive.convert.join.bucket.mapjoin.tez = false;
-- This wont have any effect as the column ds is partition column which is not bucketed.
explain
select a.key, a.value, b.value
from tab_n8 a join tab_part_n9 b on a.key = b.key and a.ds = b.ds;
set hive.convert.join.bucket.mapjoin.tez = true;
explain
select a.key, a.value, b.value
from tab_n8 a join tab_part_n9 b on a.key = b.key and a.ds = b.ds;
-- HIVE-17792 : Enable Bucket Map Join when there are extra keys other than bucketed columns
set hive.auto.convert.join.noconditionaltask.size=20000;
set hive.convert.join.bucket.mapjoin.tez = false;
explain select a.key, a.value, b.value
from tab_n8 a join tab_part_n9 b on a.key = b.key and a.value = b.value;
select a.key, a.value, b.value
from tab_n8 a join tab_part_n9 b on a.key = b.key and a.value = b.value
order by a.key, a.value, b.value;
set hive.convert.join.bucket.mapjoin.tez = true;
explain select a.key, a.value, b.value
from tab_n8 a join tab_part_n9 b on a.key = b.key and a.value = b.value;
select a.key, a.value, b.value
from tab_n8 a join tab_part_n9 b on a.key = b.key and a.value = b.value
order by a.key, a.value, b.value;
-- With non-bucketed small table
CREATE TABLE tab2_n4(key int, value string) PARTITIONED BY(ds STRING) STORED AS TEXTFILE;
insert overwrite table tab2_n4 partition (ds='2008-04-08')
select key,value from srcbucket_mapjoin_n14;
analyze table tab2_n4 compute statistics for columns;
set hive.convert.join.bucket.mapjoin.tez = false;
explain select a.key, a.value, b.value
from tab2_n4 a join tab_part_n9 b on a.key = b.key and a.value = b.value;
select a.key, a.value, b.value
from tab2_n4 a join tab_part_n9 b on a.key = b.key and a.value = b.value
order by a.key, a.value, b.value;
set hive.convert.join.bucket.mapjoin.tez = true;
explain select a.key, a.value, b.value
from tab2_n4 a join tab_part_n9 b on a.key = b.key and a.value = b.value;
select a.key, a.value, b.value
from tab2_n4 a join tab_part_n9 b on a.key = b.key and a.value = b.value
order by a.key, a.value, b.value;