ql/src/test/queries/clientpositive/infer_bucket_sort_bucketed_table.q - hive - Git at Google

 --! qt:dataset:src
 --! qt:dataset:part


 set hive.exec.infer.bucket.sort=true;

 -- Test writing to a bucketed table, the output should be bucketed by the bucketing key into the
 -- a number of files equal to the number of buckets
 CREATE TABLE test_table_bucketed (key STRING, value STRING) PARTITIONED BY (part STRING)
 CLUSTERED BY (value) SORTED BY (value) INTO 3 BUCKETS;

 -- Despite the fact that normally inferring would say this table is bucketed and sorted on key,
 -- this should be bucketed and sorted by value into 3 buckets
 INSERT OVERWRITE TABLE test_table_bucketed PARTITION (part = '1')
 SELECT key, count(1) FROM src GROUP BY KEY;

 DESCRIBE FORMATTED test_table_bucketed PARTITION (part = '1');

 -- If the count(*) from sampling the buckets matches the count(*) from each file, the table is
 -- bucketed
 SELECT COUNT(*) FROM test_table_bucketed TABLESAMPLE (BUCKET 1 OUT OF 3) WHERE part = '1';

 SELECT COUNT(*) FROM test_table_bucketed TABLESAMPLE (BUCKET 2 OUT OF 3) WHERE part = '1';

 SELECT COUNT(*) FROM test_table_bucketed TABLESAMPLE (BUCKET 3 OUT OF 3) WHERE part = '1';

 SELECT cnt FROM (SELECT INPUT__FILE__NAME, COUNT(*) cnt FROM test_table_bucketed WHERE part = '1'
 GROUP BY INPUT__FILE__NAME ORDER BY INPUT__FILE__NAME ASC LIMIT 3) a;
	--! qt:dataset:src
	--! qt:dataset:part


	set hive.exec.infer.bucket.sort=true;

	-- Test writing to a bucketed table, the output should be bucketed by the bucketing key into the
	-- a number of files equal to the number of buckets
	CREATE TABLE test_table_bucketed (key STRING, value STRING) PARTITIONED BY (part STRING)
	CLUSTERED BY (value) SORTED BY (value) INTO 3 BUCKETS;

	-- Despite the fact that normally inferring would say this table is bucketed and sorted on key,
	-- this should be bucketed and sorted by value into 3 buckets
	INSERT OVERWRITE TABLE test_table_bucketed PARTITION (part = '1')
	SELECT key, count(1) FROM src GROUP BY KEY;

	DESCRIBE FORMATTED test_table_bucketed PARTITION (part = '1');

	-- If the count() from sampling the buckets matches the count() from each file, the table is
	-- bucketed
	SELECT COUNT(*) FROM test_table_bucketed TABLESAMPLE (BUCKET 1 OUT OF 3) WHERE part = '1';

	SELECT COUNT(*) FROM test_table_bucketed TABLESAMPLE (BUCKET 2 OUT OF 3) WHERE part = '1';

	SELECT COUNT(*) FROM test_table_bucketed TABLESAMPLE (BUCKET 3 OUT OF 3) WHERE part = '1';

	SELECT cnt FROM (SELECT INPUT__FILE__NAME, COUNT(*) cnt FROM test_table_bucketed WHERE part = '1'
	GROUP BY INPUT__FILE__NAME ORDER BY INPUT__FILE__NAME ASC LIMIT 3) a;