| --! qt:dataset:src |
| |
| set hive.vectorized.execution.enabled=false; |
| CREATE TABLE test_orc_n2 (key STRING, cnt INT) |
| CLUSTERED BY (key) INTO 3 BUCKETS |
| ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' |
| STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' |
| OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'; |
| |
| |
| set hive.exec.reducers.max = 1; |
| set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; |
| |
| -- Creates a table bucketed into 3 buckets, but only one contains data, specifically bucket 1, |
| -- buckets 0 and 2 are empty, so this tests reading from and empty file followed by a file |
| -- containing data and a file containing data followed by an empty file. |
| -- This can produce unexpected results with CombineHiveInputFormat |
| |
| INSERT OVERWRITE TABLE test_orc_n2 SELECT one, COUNT(*) FROM (SELECT 1 AS one FROM src) a GROUP BY one; |
| |
| SELECT count(*) FROM test_orc_n2; |