blob: 808798337cfa40c7201da3f41bff22b058162b6f [file] [log] [blame]
--! qt:dataset:src
set hive.mapred.mode=nonstrict;
set hive.exec.reducers.max = 1;
CREATE TABLE tbl1_n8(key int, value string) PARTITIONED by (ds string)
CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS rcfile;
CREATE TABLE tbl2_n7(key int, value string) PARTITIONED by (ds string)
CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS rcfile;
alter table tbl1_n8 set serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe';
alter table tbl2_n7 set serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe';
insert overwrite table tbl1_n8 partition (ds='1') select * from src where key < 10;
insert overwrite table tbl2_n7 partition (ds='1') select * from src where key < 10;
alter table tbl1_n8 change key key int;
insert overwrite table tbl1_n8 partition (ds='2') select * from src where key < 10;
alter table tbl1_n8 change key key string;
-- The subquery itself is being map-joined. Multiple partitions of tbl1_n8 with different schemas are being read for tbl2_n7
select /*+mapjoin(subq1)*/ count(*) from
(select a.key as key, a.value as value from tbl1_n8 a where key < 6) subq1
join
(select a.key as key, a.value as value from tbl2_n7 a where key < 6) subq2
on subq1.key = subq2.key;
set hive.optimize.bucketmapjoin = true;
set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
-- The subquery itself is being map-joined. Since the sub-query only contains selects and filters, it should
-- be converted to a bucketized mapside join. Multiple partitions of tbl1_n8 with different schemas are being read for each
-- bucket of tbl2_n7
select /*+mapjoin(subq1)*/ count(*) from
(select a.key as key, a.value as value from tbl1_n8 a where key < 6) subq1
join
(select a.key as key, a.value as value from tbl2_n7 a where key < 6) subq2
on subq1.key = subq2.key;
set hive.optimize.bucketmapjoin.sortedmerge = true;
-- The subquery itself is being map-joined. Since the sub-query only contains selects and filters, it should
-- be converted to a sort-merge join. Multiple partitions of tbl1_n8 with different schemas are being read for a
-- given file of tbl2_n7
select /*+mapjoin(subq1)*/ count(*) from
(select a.key as key, a.value as value from tbl1_n8 a where key < 6) subq1
join
(select a.key as key, a.value as value from tbl2_n7 a where key < 6) subq2
on subq1.key = subq2.key;
-- Since the join key is modified by the sub-query, neither sort-merge join not bucketized map-side
-- join should be performed. Multiple partitions of tbl1_n8 with different schemas are being read for tbl2_n7
select /*+mapjoin(subq1)*/ count(*) from
(select a.key+1 as key, concat(a.value, a.value) as value from tbl1_n8 a) subq1
join
(select a.key+1 as key, concat(a.value, a.value) as value from tbl2_n7 a) subq2
on subq1.key = subq2.key;