blob: dd7274014c8b12a8f268769f8fb807e9eb6a0a7b [file] [log] [blame]
-- HIVE-27069
set hive.query.results.cache.enabled=false;
set hive.compute.query.using.stats=false;
set hive.support.concurrency=true;
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.auto.convert.join=true;
set hive.auto.convert.sortmerge.join=true;
set hive.auto.convert.sortmerge.join.to.mapjoin=true;
set hive.convert.join.bucket.mapjoin.tez=true;
set hive.fetch.task.conversion=none;
set hive.merge.nway.joins=false;
set hive.optimize.dynamic.partition.hashjoin=true;
set hive.optimize.index.filter=true;
set hive.optimize.remove.sq_count_check=true;
set hive.prewarm.enabled=false;
set hive.join.inner.residual=false;
set hive.limit.optimize.enable=true;
set hive.mapjoin.bucket.cache.size=10000;
set hive.strict.managed.tables=true;
set hive.tez.auto.reducer.parallelism=true;
set hive.tez.bucket.pruning=true;
CREATE TABLE DUP_TEST (id int , in_date timestamp , sample varchar(100)) stored as orc tblproperties('transactional'='true', 'transactional_properties'='default');
CREATE TABLE DUP_TEST_TARGET (id int , in_date timestamp , sample varchar(100)) CLUSTERED by (ID) INTO 5 BUCKETS STORED AS ORC tblproperties('transactional'='true', 'transactional_properties'='default');
INSERT INTO DUP_TEST
(id , in_date , sample)
values
(1 , '2023-04-14 10:11:12.111' , 'test1'),
(2 , '2023-04-14 10:11:12.111' , 'test2'),
(3 , '2023-04-14 10:11:12.111' , 'test3'),
(4 , '2023-04-14 10:11:12.111' , 'test4'),
(5 , '2023-04-14 10:11:12.111' , 'test5'),
(6 , '2023-04-14 10:11:12.111' , 'test6'),
(7 , '2023-04-14 10:11:12.111' , 'test7'),
(8 , '2023-04-14 10:11:12.111' , 'test8'),
(9 , '2023-04-14 10:11:12.111' , 'test9');
-- Run merge into the target table for the first time
MERGE INTO DUP_TEST_TARGET T USING (SELECT id , in_date , sample FROM (SELECT id , in_date , sample ,ROW_NUMBER()
OVER(PARTITION BY id ORDER BY in_date DESC ) AS ROW_NUMB FROM DUP_TEST) OUTQUERY WHERE ROW_NUMB =1) as S ON T.id = S.id
WHEN MATCHED THEN UPDATE SET in_date = S.in_date , sample = S.sample
WHEN NOT MATCHED THEN INSERT VALUES (S.id, S.in_date , S.sample);
explain vectorization detail select * from DUP_TEST_TARGET T join (SELECT id , in_date , sample FROM (SELECT id , in_date , sample ,ROW_NUMBER()
OVER(PARTITION BY id ORDER BY in_date DESC ) AS ROW_NUMB FROM DUP_TEST) OUTQUERY WHERE ROW_NUMB =1) as S ON T.id = S.id;
select * from DUP_TEST_TARGET T join (SELECT id , in_date , sample FROM (SELECT id , in_date , sample ,ROW_NUMBER()
OVER(PARTITION BY id ORDER BY in_date DESC ) AS ROW_NUMB FROM DUP_TEST) OUTQUERY WHERE ROW_NUMB =1) as S ON T.id = S.id;
select * from DUP_TEST_TARGET T join DUP_TEST S ON T.id = S.id;