| --! qt:dataset:src |
| --! qt:dataset:part |
| set hive.exec.infer.bucket.sort=true; |
| set hive.exec.infer.bucket.sort.num.buckets.power.two=true; |
| set hive.stats.dbclass=fs; |
| |
| -- This tests inferring how data is bucketed/sorted from the operators in the reducer |
| -- and populating that information in partitions' metadata. In particular, those cases |
| -- where multi insert is used. |
| |
| CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING); |
| |
| -- Simple case, neither partition should be bucketed or sorted |
| |
| FROM src |
| INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value |
| INSERT OVERWRITE TABLE test_table PARTITION (part = '2') SELECT value, key; |
| |
| DESCRIBE FORMATTED test_table PARTITION (part = '1'); |
| DESCRIBE FORMATTED test_table PARTITION (part = '2'); |
| |
| -- The partitions should be bucketed and sorted by different keys |
| |
| FROM src |
| INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, COUNT(*) GROUP BY key |
| INSERT OVERWRITE TABLE test_table PARTITION (part = '2') SELECT COUNT(*), value GROUP BY value; |
| |
| DESCRIBE FORMATTED test_table PARTITION (part = '1'); |
| DESCRIBE FORMATTED test_table PARTITION (part = '2'); |
| |
| -- The first partition should be bucketed and sorted, the second should not |
| |
| FROM src |
| INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, COUNT(*) GROUP BY key |
| INSERT OVERWRITE TABLE test_table PARTITION (part = '2') SELECT key, value; |
| |
| DESCRIBE FORMATTED test_table PARTITION (part = '1'); |
| DESCRIBE FORMATTED test_table PARTITION (part = '2'); |
| |
| set hive.multigroupby.singlereducer=true; |
| |
| -- Test the multi group by single reducer optimization |
| -- Both partitions should be bucketed by key |
| FROM src |
| INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, COUNT(*) GROUP BY key |
| INSERT OVERWRITE TABLE test_table PARTITION (part = '2') SELECT key, SUM(SUBSTR(value, 5)) GROUP BY key; |
| |
| DESCRIBE FORMATTED test_table PARTITION (part = '1'); |
| DESCRIBE FORMATTED test_table PARTITION (part = '2'); |