blob: 5ef66e7f01803b0b1e96df2e2fee196670134258 [file] [log] [blame]
--! qt:dataset:src
--! qt:dataset:part
set hive.vectorized.execution.enabled=false;
set hive.compute.query.using.stats=false;
set hive.mapred.mode=nonstrict;
set hive.explain.user=false;
set hive.merge.orcfile.stripe.level=false;
set hive.exec.dynamic.partition=true;
set mapred.min.split.size=1000;
set mapred.max.split.size=2000;
set tez.am.grouping.split-count=2;
set tez.grouping.split-count=2;
set hive.merge.tezfiles=false;
set hive.merge.mapfiles=false;
set hive.merge.mapredfiles=false;
-- SORT_QUERY_RESULTS
DROP TABLE orcfile_merge1;
DROP TABLE orcfile_merge1b;
DROP TABLE orcfile_merge1c;
CREATE TABLE orcfile_merge1 (key INT, value STRING)
PARTITIONED BY (ds STRING, part STRING) STORED AS ORC tblproperties("orc.compress"="SNAPPY","orc.compress.size"="4096");
CREATE TABLE orcfile_merge1b (key INT, value STRING)
PARTITIONED BY (ds STRING, part STRING) STORED AS ORC tblproperties("orc.compress"="SNAPPY","orc.compress.size"="4096");
CREATE TABLE orcfile_merge1c (key INT, value STRING)
PARTITIONED BY (ds STRING, part STRING) STORED AS ORC tblproperties("orc.compress"="SNAPPY","orc.compress.size"="4096");
-- merge disabled
EXPLAIN
INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part)
SELECT key, value, PMOD(HASH(key), 2) as part
FROM src;
INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part)
SELECT key, value, PMOD(HASH(key), 2) as part
FROM src;
dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orcfile_merge1/ds=1/part=0/;
set hive.merge.tezfiles=true;
set hive.merge.mapfiles=true;
set hive.merge.mapredfiles=true;
-- auto-merge slow way
EXPLAIN
INSERT OVERWRITE TABLE orcfile_merge1b PARTITION (ds='1', part)
SELECT key, value, PMOD(HASH(key), 2) as part
FROM src;
INSERT OVERWRITE TABLE orcfile_merge1b PARTITION (ds='1', part)
SELECT key, value, PMOD(HASH(key), 2) as part
FROM src;
dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orcfile_merge1b/ds=1/part=0/;
set hive.merge.orcfile.stripe.level=true;
-- auto-merge fast way
EXPLAIN
INSERT OVERWRITE TABLE orcfile_merge1c PARTITION (ds='1', part)
SELECT key, value, PMOD(HASH(key), 2) as part
FROM src;
INSERT OVERWRITE TABLE orcfile_merge1c PARTITION (ds='1', part)
SELECT key, value, PMOD(HASH(key), 2) as part
FROM src;
dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orcfile_merge1c/ds=1/part=0/;
set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
-- Verify
SELECT SUM(HASH(c)) FROM (
SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
FROM orcfile_merge1 WHERE ds='1'
) t;
SELECT SUM(HASH(c)) FROM (
SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
FROM orcfile_merge1b WHERE ds='1'
) t;
select count(*) from orcfile_merge1;
select count(*) from orcfile_merge1b;
set tez.am.grouping.split-count=1;
set tez.grouping.split-count=1;
-- concatenate
explain ALTER TABLE orcfile_merge1 PARTITION (ds='1', part='0') CONCATENATE;
ALTER TABLE orcfile_merge1 PARTITION (ds='1', part='0') CONCATENATE;
dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orcfile_merge1/ds=1/part=0/;
-- Verify
SELECT SUM(HASH(c)) FROM (
SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
FROM orcfile_merge1c WHERE ds='1'
) t;
SELECT SUM(HASH(c)) FROM (
SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
FROM orcfile_merge1 WHERE ds='1'
) t;
select count(*) from orcfile_merge1;
select count(*) from orcfile_merge1c;
SET hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecOrcFileDump;
select * from orcfile_merge1 where ds='1' and part='0' limit 1;
select * from orcfile_merge1c where ds='1' and part='0' limit 1;
DROP TABLE orcfile_merge1;
DROP TABLE orcfile_merge1b;
DROP TABLE orcfile_merge1c;