ql/src/test/queries/clientpositive/orc_empty_files.q - hive - Git at Google

 --! qt:dataset:src

 set hive.vectorized.execution.enabled=false;
 CREATE TABLE test_orc_n2 (key STRING, cnt INT)
 CLUSTERED BY (key) INTO 3 BUCKETS
 ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
 STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'
 OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat';


 set hive.exec.reducers.max = 1;
 set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;

 -- Creates a table bucketed into 3 buckets, but only one contains data, specifically bucket 1,
 -- buckets 0 and 2 are empty, so this tests reading from and empty file followed by a file
 -- containing data and a file containing data followed by an empty file.
 -- This can produce unexpected results with CombineHiveInputFormat

 INSERT OVERWRITE TABLE test_orc_n2 SELECT one, COUNT(*) FROM (SELECT 1 AS one FROM src) a GROUP BY one;

 SELECT count(*) FROM test_orc_n2;
	--! qt:dataset:src

	set hive.vectorized.execution.enabled=false;
	CREATE TABLE test_orc_n2 (key STRING, cnt INT)
	CLUSTERED BY (key) INTO 3 BUCKETS
	ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
	STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'
	OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat';


	set hive.exec.reducers.max = 1;
	set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;

	-- Creates a table bucketed into 3 buckets, but only one contains data, specifically bucket 1,
	-- buckets 0 and 2 are empty, so this tests reading from and empty file followed by a file
	-- containing data and a file containing data followed by an empty file.
	-- This can produce unexpected results with CombineHiveInputFormat

	INSERT OVERWRITE TABLE test_orc_n2 SELECT one, COUNT(*) FROM (SELECT 1 AS one FROM src) a GROUP BY one;

	SELECT count(*) FROM test_orc_n2;