ql/src/test/queries/clientpositive/bucket5.q - hive - Git at Google

 --! qt:dataset:src
 set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
 ;

 set hive.exec.reducers.max = 1;
 set hive.merge.mapfiles = true;
 set hive.merge.mapredfiles = true;
 set hive.merge.tezfiles = true;
 set mapred.reduce.tasks = 2;

 -- Tests that when a multi insert inserts into a bucketed table and a table which is not bucketed
 -- the bucketed table is not merged and the table which is not bucketed is

 CREATE TABLE bucketed_table(key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
 CREATE TABLE unbucketed_table(key INT, value STRING);

 EXPLAIN EXTENDED
 FROM src
 INSERT OVERWRITE TABLE bucketed_table SELECT key, value
 INSERT OVERWRITE TABLE unbucketed_table SELECT key, value cluster by key;

 FROM src
 INSERT OVERWRITE TABLE bucketed_table SELECT key, value
 INSERT OVERWRITE TABLE unbucketed_table SELECT key, value cluster by key;

 DESC FORMATTED bucketed_table;

 SELECT * FROM bucketed_table TABLESAMPLE (BUCKET 1 OUT OF 2) s LIMIT 10;
 SELECT * FROM bucketed_table TABLESAMPLE (BUCKET 2 OUT OF 2) s LIMIT 10;

 -- Should be 2 (not merged)
 SELECT COUNT(DISTINCT INPUT__FILE__NAME) FROM bucketed_table;

 -- Should be 1 (merged)
 SELECT COUNT(DISTINCT INPUT__FILE__NAME) FROM unbucketed_table;
	--! qt:dataset:src
	set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
	;

	set hive.exec.reducers.max = 1;
	set hive.merge.mapfiles = true;
	set hive.merge.mapredfiles = true;
	set hive.merge.tezfiles = true;
	set mapred.reduce.tasks = 2;

	-- Tests that when a multi insert inserts into a bucketed table and a table which is not bucketed
	-- the bucketed table is not merged and the table which is not bucketed is

	CREATE TABLE bucketed_table(key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
	CREATE TABLE unbucketed_table(key INT, value STRING);

	EXPLAIN EXTENDED
	FROM src
	INSERT OVERWRITE TABLE bucketed_table SELECT key, value
	INSERT OVERWRITE TABLE unbucketed_table SELECT key, value cluster by key;

	FROM src
	INSERT OVERWRITE TABLE bucketed_table SELECT key, value
	INSERT OVERWRITE TABLE unbucketed_table SELECT key, value cluster by key;

	DESC FORMATTED bucketed_table;

	SELECT * FROM bucketed_table TABLESAMPLE (BUCKET 1 OUT OF 2) s LIMIT 10;
	SELECT * FROM bucketed_table TABLESAMPLE (BUCKET 2 OUT OF 2) s LIMIT 10;

	-- Should be 2 (not merged)
	SELECT COUNT(DISTINCT INPUT__FILE__NAME) FROM bucketed_table;

	-- Should be 1 (merged)
	SELECT COUNT(DISTINCT INPUT__FILE__NAME) FROM unbucketed_table;