blob: bd32a69085d552c0aba5b20229d7676013a71c00 [file] [log] [blame]
-- Test for HIVE-27267
set hive.support.concurrency=true;
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
create table target_table(date_col date, string_col string, decimal_col decimal(38,0)) clustered by (decimal_col) into 7 buckets stored as orc tblproperties ('bucketing_version'='2', 'transactional'='true', 'transactional_properties'='default');
insert into table target_table values
('2017-05-17', 'pipeline', '50000000000000000000441610525'),
('2018-12-20', 'pipeline', '50000000000000000001048981030'),
('2020-06-30', 'pipeline', '50000000000000000002332575516'),
('2021-08-16', 'pipeline', '50000000000000000003897973989'),
('2017-06-06', 'pipeline', '50000000000000000000449148729'),
('2017-09-08', 'pipeline', '50000000000000000000525378314'),
('2022-08-30', 'pipeline', '50000000000000000005905545593'),
('2022-08-16', 'pipeline', '50000000000000000005905545593'),
('2018-05-03', 'pipeline', '50000000000000000000750826355'),
('2020-01-10', 'pipeline', '50000000000000000001816579677'),
('2021-11-01', 'pipeline', '50000000000000000004269423714'),
('2017-11-07', 'pipeline', '50000000000000000000585901787'),
('2019-10-15', 'pipeline', '50000000000000000001598843430'),
('2020-04-01', 'pipeline', '50000000000000000002035795461'),
('2020-02-24', 'pipeline', '50000000000000000001932600185'),
('2020-04-27', 'pipeline', '50000000000000000002108160849'),
('2016-07-05', 'pipeline', '50000000000000000000054405114'),
('2020-06-02', 'pipeline', '50000000000000000002234387967'),
('2020-08-21', 'pipeline', '50000000000000000002529168758'),
('2021-02-17', 'pipeline', '50000000000000000003158511687');
create table source_table(date_col date, string_col string, decimal_col decimal(38,0)) clustered by (decimal_col) into 7 buckets stored as orc tblproperties ('bucketing_version'='2', 'transactional'='true', 'transactional_properties'='default');
insert into table source_table values
('2022-08-30', 'pipeline', '50000000000000000005905545593'),
('2022-08-16', 'pipeline', '50000000000000000005905545593'),
('2022-09-01', 'pipeline', '50000000000000000006008686831'),
('2022-08-30', 'pipeline', '50000000000000000005992620837'),
('2022-09-01', 'pipeline', '50000000000000000005992620837'),
('2022-09-01', 'pipeline', '50000000000000000005992621067'),
('2022-08-30', 'pipeline', '50000000000000000005992621067');
-- Test 2 queries in 4 configs.
-- Each query has 1 join that can be converted to bucket join.
-- One of the query receives the small table from Map vertex while the other recives it from Reducer vertex.
-- 4 configs enfoce MapJoin to be converted to one of the following joins:
-- 1. BucketMapJoin, 2. MapJoin, 3. VectorBucketMapJoin, 4. VectorMapJoin
set hive.auto.convert.join=true;
set hive.optimize.dynamic.partition.hashjoin=false;
-- 1. BucketMapJoin
set hive.convert.join.bucket.mapjoin.tez=true;
set hive.vectorized.execution.enabled=false;
explain extended
select * from target_table inner join
(select date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col;
select * from target_table inner join
(select date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col;
explain extended
select * from target_table inner join
(select distinct date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col;
select * from target_table inner join
(select distinct date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col;
-- 2. MapJoin
set hive.convert.join.bucket.mapjoin.tez=false;
set hive.vectorized.execution.enabled=false;
explain extended
select * from target_table inner join
(select date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col;
select * from target_table inner join
(select date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col;
explain extended
select * from target_table inner join
(select distinct date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col;
select * from target_table inner join
(select distinct date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col;
-- 3. VectorBucketMapJoin
set hive.convert.join.bucket.mapjoin.tez=true;
set hive.vectorized.execution.enabled=true;
explain extended
select * from target_table inner join
(select date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col;
select * from target_table inner join
(select date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col;
explain extended
select * from target_table inner join
(select distinct date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col;
select * from target_table inner join
(select distinct date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col;
-- 4. VectorMapJoin
set hive.convert.join.bucket.mapjoin.tez=false;
set hive.vectorized.execution.enabled=true;
explain extended
select * from target_table inner join
(select date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col;
select * from target_table inner join
(select date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col;
explain extended
select * from target_table inner join
(select distinct date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col;
select * from target_table inner join
(select distinct date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col;