ql/src/test/queries/clientpositive/smb_mapjoin_10.q - hive - Git at Google

 set hive.strict.checks.bucketing=false;


 create table tmp_smb_bucket_10(userid int, pageid int, postid int, type string) partitioned by (ds string) CLUSTERED BY (userid) SORTED BY (pageid, postid, type, userid) INTO 2 BUCKETS STORED AS RCFILE;

 alter table tmp_smb_bucket_10 add partition (ds = '1');
 alter table tmp_smb_bucket_10 add partition (ds = '2');

 -- add dummy files to make sure that the number of files in each partition is same as number of buckets

 load data local inpath '../../data/files/smb_rc1/000000_0' INTO TABLE tmp_smb_bucket_10 partition(ds='1');
 load data local inpath '../../data/files/smb_rc1/000001_0' INTO TABLE tmp_smb_bucket_10 partition(ds='1');

 load data local inpath '../../data/files/smb_rc1/000000_0' INTO TABLE tmp_smb_bucket_10 partition(ds='2');
 load data local inpath '../../data/files/smb_rc1/000001_0' INTO TABLE tmp_smb_bucket_10 partition(ds='2');
 set hive.cbo.enable=false;
 set hive.optimize.bucketmapjoin = true;
 set hive.optimize.bucketmapjoin.sortedmerge = true;
 set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;

 explain
 select /*+mapjoin(a)*/ * from tmp_smb_bucket_10 a join tmp_smb_bucket_10 b
 on (a.ds = '1' and b.ds = '2' and
     a.userid = b.userid and
     a.pageid = b.pageid and
     a.postid = b.postid and
     a.type = b.type);
	set hive.strict.checks.bucketing=false;


	create table tmp_smb_bucket_10(userid int, pageid int, postid int, type string) partitioned by (ds string) CLUSTERED BY (userid) SORTED BY (pageid, postid, type, userid) INTO 2 BUCKETS STORED AS RCFILE;

	alter table tmp_smb_bucket_10 add partition (ds = '1');
	alter table tmp_smb_bucket_10 add partition (ds = '2');

	-- add dummy files to make sure that the number of files in each partition is same as number of buckets

	load data local inpath '../../data/files/smb_rc1/000000_0' INTO TABLE tmp_smb_bucket_10 partition(ds='1');
	load data local inpath '../../data/files/smb_rc1/000001_0' INTO TABLE tmp_smb_bucket_10 partition(ds='1');

	load data local inpath '../../data/files/smb_rc1/000000_0' INTO TABLE tmp_smb_bucket_10 partition(ds='2');
	load data local inpath '../../data/files/smb_rc1/000001_0' INTO TABLE tmp_smb_bucket_10 partition(ds='2');
	set hive.cbo.enable=false;
	set hive.optimize.bucketmapjoin = true;
	set hive.optimize.bucketmapjoin.sortedmerge = true;
	set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;

	explain
	select /+mapjoin(a)/ * from tmp_smb_bucket_10 a join tmp_smb_bucket_10 b
	on (a.ds = '1' and b.ds = '2' and
	a.userid = b.userid and
	a.pageid = b.pageid and
	a.postid = b.postid and
	a.type = b.type);