| --! qt:dataset:src |
| |
| set hive.vectorized.execution.enabled=false; |
| set hive.exec.orc.dictionary.key.size.threshold=-1; |
| |
| -- Set the threshold to -1 to guarantee dictionary encoding is turned off |
| -- Tests that the data can be read back correctly when a string column is stored |
| -- without dictionary encoding |
| |
| CREATE TABLE test_orc_n5 (key STRING) |
| ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' |
| STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' |
| OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'; |
| |
| -- should be single split |
| INSERT OVERWRITE TABLE test_orc_n5 SELECT key FROM src TABLESAMPLE (10 ROWS); |
| |
| -- Test reading the column back |
| |
| SELECT * FROM test_orc_n5; |
| |
| ALTER TABLE test_orc_n5 SET SERDEPROPERTIES ('orc.stripe.size' = '1'); |
| |
| CREATE TABLE src_thousand(key STRING) STORED AS TEXTFILE; |
| LOAD DATA LOCAL INPATH '../../data/files/kv1kv2.cogroup.txt' |
| INTO TABLE src_thousand; |
| |
| set hive.exec.orc.dictionary.key.size.threshold=0.5; |
| |
| -- Add data to the table in such a way that alternate stripes encode the column |
| -- differently. Setting orc.stripe.size = 1 guarantees the stripes each have |
| -- 5000 rows. The first stripe will have 5 * 630 distinct rows and thus be |
| -- above the cutoff of 50% and will be direct encoded. The second stripe |
| -- will have 5 * 1 distinct rows and thus be under the cutoff and will be |
| -- dictionary encoded. The final stripe will have 630 out of 1000 and be |
| -- direct encoded. |
| |
| INSERT OVERWRITE TABLE test_orc_n5 |
| SELECT key FROM ( |
| SELECT CONCAT("a", key) AS key FROM src_thousand |
| UNION ALL |
| SELECT CONCAT("b", key) AS key FROM src_thousand |
| UNION ALL |
| SELECT CONCAT("c", key) AS key FROM src_thousand |
| UNION ALL |
| SELECT CONCAT("d", key) AS key FROM src_thousand |
| UNION ALL |
| SELECT CONCAT("e", key) AS key FROM src_thousand |
| UNION ALL |
| SELECT CONCAT("f", 1) AS key FROM src_thousand |
| UNION ALL |
| SELECT CONCAT("g", 1) AS key FROM src_thousand |
| UNION ALL |
| SELECT CONCAT("h", 1) AS key FROM src_thousand |
| UNION ALL |
| SELECT CONCAT("i", 1) AS key FROM src_thousand |
| UNION ALL |
| SELECT CONCAT("j", 1) AS key FROM src_thousand |
| UNION ALL |
| SELECT CONCAT("k", key) AS key FROM src_thousand |
| ) a ORDER BY key LIMIT 11000; |
| |
| SELECT SUM(HASH(key)) FROM test_orc_n5; |