ql/src/test/queries/clientpositive/partition_wise_fileformat14.q - hive - Git at Google

 --! qt:dataset:src
 set hive.mapred.mode=nonstrict;
 set hive.exec.reducers.max = 1;

 CREATE TABLE tbl1_n8(key int, value string) PARTITIONED by (ds string)
 CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS rcfile;
 CREATE TABLE tbl2_n7(key int, value string) PARTITIONED by (ds string)
 CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS rcfile;

 alter table tbl1_n8 set serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe';
 alter table tbl2_n7 set serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe';

 insert overwrite table tbl1_n8 partition (ds='1') select * from src where key < 10;
 insert overwrite table tbl2_n7 partition (ds='1') select * from src where key < 10;

 alter table tbl1_n8 change key key int;
 insert overwrite table tbl1_n8 partition (ds='2') select * from src where key < 10;

 alter table tbl1_n8 change key key string;

 -- The subquery itself is being map-joined. Multiple partitions of tbl1_n8 with different schemas are being read for tbl2_n7
 select /*+mapjoin(subq1)*/ count(*) from
   (select a.key as key, a.value as value from tbl1_n8 a where key < 6) subq1
     join
   (select a.key as key, a.value as value from tbl2_n7 a where key < 6) subq2
   on subq1.key = subq2.key;

 set hive.optimize.bucketmapjoin = true;
 set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;

 -- The subquery itself is being map-joined. Since the sub-query only contains selects and filters, it should
 -- be converted to a bucketized mapside join. Multiple partitions of tbl1_n8 with different schemas are being read for each
 -- bucket of tbl2_n7
 select /*+mapjoin(subq1)*/ count(*) from
   (select a.key as key, a.value as value from tbl1_n8 a where key < 6) subq1
     join
   (select a.key as key, a.value as value from tbl2_n7 a where key < 6) subq2
   on subq1.key = subq2.key;

 set hive.optimize.bucketmapjoin.sortedmerge = true;

 -- The subquery itself is being map-joined. Since the sub-query only contains selects and filters, it should
 -- be converted to a sort-merge join. Multiple partitions of tbl1_n8 with different schemas are being read for a
 -- given file of tbl2_n7
 select /*+mapjoin(subq1)*/ count(*) from
   (select a.key as key, a.value as value from tbl1_n8 a where key < 6) subq1
     join
   (select a.key as key, a.value as value from tbl2_n7 a where key < 6) subq2
   on subq1.key = subq2.key;

 -- Since the join key is modified by the sub-query, neither sort-merge join not bucketized map-side
 -- join should be performed.  Multiple partitions of tbl1_n8 with different schemas are being read for tbl2_n7
 select /*+mapjoin(subq1)*/ count(*) from
   (select a.key+1 as key, concat(a.value, a.value) as value from tbl1_n8 a) subq1
     join
   (select a.key+1 as key, concat(a.value, a.value) as value from tbl2_n7 a) subq2
   on subq1.key = subq2.key;
	--! qt:dataset:src
	set hive.mapred.mode=nonstrict;
	set hive.exec.reducers.max = 1;

	CREATE TABLE tbl1_n8(key int, value string) PARTITIONED by (ds string)
	CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS rcfile;
	CREATE TABLE tbl2_n7(key int, value string) PARTITIONED by (ds string)
	CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS rcfile;

	alter table tbl1_n8 set serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe';
	alter table tbl2_n7 set serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe';

	insert overwrite table tbl1_n8 partition (ds='1') select * from src where key < 10;
	insert overwrite table tbl2_n7 partition (ds='1') select * from src where key < 10;

	alter table tbl1_n8 change key key int;
	insert overwrite table tbl1_n8 partition (ds='2') select * from src where key < 10;

	alter table tbl1_n8 change key key string;

	-- The subquery itself is being map-joined. Multiple partitions of tbl1_n8 with different schemas are being read for tbl2_n7
	select /+mapjoin(subq1)/ count(*) from
	(select a.key as key, a.value as value from tbl1_n8 a where key < 6) subq1
	join
	(select a.key as key, a.value as value from tbl2_n7 a where key < 6) subq2
	on subq1.key = subq2.key;

	set hive.optimize.bucketmapjoin = true;
	set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;

	-- The subquery itself is being map-joined. Since the sub-query only contains selects and filters, it should
	-- be converted to a bucketized mapside join. Multiple partitions of tbl1_n8 with different schemas are being read for each
	-- bucket of tbl2_n7
	select /+mapjoin(subq1)/ count(*) from
	(select a.key as key, a.value as value from tbl1_n8 a where key < 6) subq1
	join
	(select a.key as key, a.value as value from tbl2_n7 a where key < 6) subq2
	on subq1.key = subq2.key;

	set hive.optimize.bucketmapjoin.sortedmerge = true;

	-- The subquery itself is being map-joined. Since the sub-query only contains selects and filters, it should
	-- be converted to a sort-merge join. Multiple partitions of tbl1_n8 with different schemas are being read for a
	-- given file of tbl2_n7
	select /+mapjoin(subq1)/ count(*) from
	(select a.key as key, a.value as value from tbl1_n8 a where key < 6) subq1
	join
	(select a.key as key, a.value as value from tbl2_n7 a where key < 6) subq2
	on subq1.key = subq2.key;

	-- Since the join key is modified by the sub-query, neither sort-merge join not bucketized map-side
	-- join should be performed. Multiple partitions of tbl1_n8 with different schemas are being read for tbl2_n7
	select /+mapjoin(subq1)/ count(*) from
	(select a.key+1 as key, concat(a.value, a.value) as value from tbl1_n8 a) subq1
	join
	(select a.key+1 as key, concat(a.value, a.value) as value from tbl2_n7 a) subq2
	on subq1.key = subq2.key;