ql/src/test/queries/clientpositive/bucketmapjoin_with_subquery.q - hive - Git at Google

 -- HIVE-27069
 set hive.query.results.cache.enabled=false;
 set hive.compute.query.using.stats=false;
 set hive.support.concurrency=true;
 set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
 set hive.auto.convert.join=true;
 set hive.auto.convert.sortmerge.join=true;
 set hive.auto.convert.sortmerge.join.to.mapjoin=true;
 set hive.convert.join.bucket.mapjoin.tez=true;
 set hive.fetch.task.conversion=none;
 set hive.merge.nway.joins=false;
 set hive.optimize.dynamic.partition.hashjoin=true;
 set hive.optimize.index.filter=true;
 set hive.optimize.remove.sq_count_check=true;
 set hive.prewarm.enabled=false;
 set hive.join.inner.residual=false;
 set hive.limit.optimize.enable=true;
 set hive.mapjoin.bucket.cache.size=10000;
 set hive.strict.managed.tables=true;
 set hive.tez.auto.reducer.parallelism=true;
 set hive.tez.bucket.pruning=true;

 CREATE TABLE DUP_TEST (id int , in_date timestamp , sample varchar(100)) stored as orc tblproperties('transactional'='true', 'transactional_properties'='default');

 CREATE TABLE DUP_TEST_TARGET (id int , in_date timestamp , sample varchar(100)) CLUSTERED by (ID) INTO 5 BUCKETS STORED AS ORC tblproperties('transactional'='true', 'transactional_properties'='default');

 INSERT INTO DUP_TEST
 (id , in_date , sample)
 values
 (1  , '2023-04-14 10:11:12.111' , 'test1'),
 (2  , '2023-04-14 10:11:12.111' , 'test2'),
 (3  , '2023-04-14 10:11:12.111' , 'test3'),
 (4  , '2023-04-14 10:11:12.111' , 'test4'),
 (5  , '2023-04-14 10:11:12.111' , 'test5'),
 (6  , '2023-04-14 10:11:12.111' , 'test6'),
 (7  , '2023-04-14 10:11:12.111' , 'test7'),
 (8  , '2023-04-14 10:11:12.111' , 'test8'),
 (9  , '2023-04-14 10:11:12.111' , 'test9');

 -- Run merge into the target table for the first time
 MERGE INTO DUP_TEST_TARGET T USING (SELECT id , in_date , sample FROM (SELECT id , in_date , sample ,ROW_NUMBER()
 OVER(PARTITION BY id ORDER BY in_date DESC ) AS ROW_NUMB  FROM DUP_TEST) OUTQUERY WHERE ROW_NUMB =1) as S ON T.id = S.id
     WHEN MATCHED THEN UPDATE SET  in_date = S.in_date , sample = S.sample
     WHEN NOT MATCHED THEN INSERT VALUES (S.id, S.in_date , S.sample);

 explain vectorization detail select * from DUP_TEST_TARGET T join (SELECT id , in_date , sample FROM (SELECT id , in_date , sample ,ROW_NUMBER()
 OVER(PARTITION BY id ORDER BY in_date DESC ) AS ROW_NUMB  FROM DUP_TEST) OUTQUERY WHERE ROW_NUMB =1) as S ON T.id = S.id;

 select * from DUP_TEST_TARGET T join (SELECT id , in_date , sample FROM (SELECT id , in_date , sample ,ROW_NUMBER()
 OVER(PARTITION BY id ORDER BY in_date DESC ) AS ROW_NUMB  FROM DUP_TEST) OUTQUERY WHERE ROW_NUMB =1) as S ON T.id = S.id;

 select * from DUP_TEST_TARGET T join DUP_TEST S ON T.id = S.id;
	-- HIVE-27069
	set hive.query.results.cache.enabled=false;
	set hive.compute.query.using.stats=false;
	set hive.support.concurrency=true;
	set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
	set hive.auto.convert.join=true;
	set hive.auto.convert.sortmerge.join=true;
	set hive.auto.convert.sortmerge.join.to.mapjoin=true;
	set hive.convert.join.bucket.mapjoin.tez=true;
	set hive.fetch.task.conversion=none;
	set hive.merge.nway.joins=false;
	set hive.optimize.dynamic.partition.hashjoin=true;
	set hive.optimize.index.filter=true;
	set hive.optimize.remove.sq_count_check=true;
	set hive.prewarm.enabled=false;
	set hive.join.inner.residual=false;
	set hive.limit.optimize.enable=true;
	set hive.mapjoin.bucket.cache.size=10000;
	set hive.strict.managed.tables=true;
	set hive.tez.auto.reducer.parallelism=true;
	set hive.tez.bucket.pruning=true;

	CREATE TABLE DUP_TEST (id int , in_date timestamp , sample varchar(100)) stored as orc tblproperties('transactional'='true', 'transactional_properties'='default');

	CREATE TABLE DUP_TEST_TARGET (id int , in_date timestamp , sample varchar(100)) CLUSTERED by (ID) INTO 5 BUCKETS STORED AS ORC tblproperties('transactional'='true', 'transactional_properties'='default');

	INSERT INTO DUP_TEST
	(id , in_date , sample)
	values
	(1 , '2023-04-14 10:11:12.111' , 'test1'),
	(2 , '2023-04-14 10:11:12.111' , 'test2'),
	(3 , '2023-04-14 10:11:12.111' , 'test3'),
	(4 , '2023-04-14 10:11:12.111' , 'test4'),
	(5 , '2023-04-14 10:11:12.111' , 'test5'),
	(6 , '2023-04-14 10:11:12.111' , 'test6'),
	(7 , '2023-04-14 10:11:12.111' , 'test7'),
	(8 , '2023-04-14 10:11:12.111' , 'test8'),
	(9 , '2023-04-14 10:11:12.111' , 'test9');

	-- Run merge into the target table for the first time
	MERGE INTO DUP_TEST_TARGET T USING (SELECT id , in_date , sample FROM (SELECT id , in_date , sample ,ROW_NUMBER()
	OVER(PARTITION BY id ORDER BY in_date DESC ) AS ROW_NUMB FROM DUP_TEST) OUTQUERY WHERE ROW_NUMB =1) as S ON T.id = S.id
	WHEN MATCHED THEN UPDATE SET in_date = S.in_date , sample = S.sample
	WHEN NOT MATCHED THEN INSERT VALUES (S.id, S.in_date , S.sample);

	explain vectorization detail select * from DUP_TEST_TARGET T join (SELECT id , in_date , sample FROM (SELECT id , in_date , sample ,ROW_NUMBER()
	OVER(PARTITION BY id ORDER BY in_date DESC ) AS ROW_NUMB FROM DUP_TEST) OUTQUERY WHERE ROW_NUMB =1) as S ON T.id = S.id;

	select * from DUP_TEST_TARGET T join (SELECT id , in_date , sample FROM (SELECT id , in_date , sample ,ROW_NUMBER()
	OVER(PARTITION BY id ORDER BY in_date DESC ) AS ROW_NUMB FROM DUP_TEST) OUTQUERY WHERE ROW_NUMB =1) as S ON T.id = S.id;

	select * from DUP_TEST_TARGET T join DUP_TEST S ON T.id = S.id;