blob: ba862e2637bec2694305661788e40d66db34b1fe [file] [log] [blame]
--! qt:dataset:src
--! qt:dataset:part
set hive.vectorized.execution.enabled=false;
set hive.compute.query.using.stats=false;
set hive.mapred.mode=nonstrict;
set hive.explain.user=false;
set hive.merge.orcfile.stripe.level=false;
set hive.exec.dynamic.partition=true;
set hive.optimize.sort.dynamic.partition.threshold=-1;
set mapred.min.split.size=1000;
set mapred.max.split.size=2000;
set tez.grouping.min-size=1000;
set tez.grouping.max-size=2000;
set hive.merge.tezfiles=false;
set hive.merge.mapfiles=false;
set hive.merge.mapredfiles=false;
-- SORT_QUERY_RESULTS
DROP TABLE orcfile_merge1_n1;
DROP TABLE orcfile_merge1b_n1;
DROP TABLE orcfile_merge1c_n1;
CREATE TABLE orcfile_merge1_n1 (key INT, value STRING)
PARTITIONED BY (ds STRING, part STRING) STORED AS ORC;
CREATE TABLE orcfile_merge1b_n1 (key INT, value STRING)
PARTITIONED BY (ds STRING, part STRING) STORED AS ORC;
CREATE TABLE orcfile_merge1c_n1 (key INT, value STRING)
PARTITIONED BY (ds STRING, part STRING) STORED AS ORC;
-- merge disabled
EXPLAIN
INSERT OVERWRITE TABLE orcfile_merge1_n1 PARTITION (ds='1', part)
SELECT key, value, PMOD(HASH(key), 2) as part
FROM src;
INSERT OVERWRITE TABLE orcfile_merge1_n1 PARTITION (ds='1', part)
SELECT key, value, PMOD(HASH(key), 2) as part
FROM src;
dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orcfile_merge1_n1/ds=1/part=0/;
set hive.merge.tezfiles=true;
set hive.merge.mapfiles=true;
set hive.merge.mapredfiles=true;
-- auto-merge slow way
EXPLAIN
INSERT OVERWRITE TABLE orcfile_merge1b_n1 PARTITION (ds='1', part)
SELECT key, value, PMOD(HASH(key), 2) as part
FROM src;
INSERT OVERWRITE TABLE orcfile_merge1b_n1 PARTITION (ds='1', part)
SELECT key, value, PMOD(HASH(key), 2) as part
FROM src;
dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orcfile_merge1b_n1/ds=1/part=0/;
set hive.merge.orcfile.stripe.level=true;
-- auto-merge fast way
EXPLAIN
INSERT OVERWRITE TABLE orcfile_merge1c_n1 PARTITION (ds='1', part)
SELECT key, value, PMOD(HASH(key), 2) as part
FROM src;
INSERT OVERWRITE TABLE orcfile_merge1c_n1 PARTITION (ds='1', part)
SELECT key, value, PMOD(HASH(key), 2) as part
FROM src;
dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orcfile_merge1c_n1/ds=1/part=0/;
set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
-- Verify
SELECT SUM(HASH(c)) FROM (
SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
FROM orcfile_merge1_n1 WHERE ds='1'
) t;
SELECT SUM(HASH(c)) FROM (
SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
FROM orcfile_merge1b_n1 WHERE ds='1'
) t;
SELECT SUM(HASH(c)) FROM (
SELECT TRANSFORM(*) USING 'tr \t _' AS (c)
FROM orcfile_merge1c_n1 WHERE ds='1'
) t;
select count(*) from orcfile_merge1_n1;
select count(*) from orcfile_merge1b_n1;
select count(*) from orcfile_merge1c_n1;
DROP TABLE orcfile_merge1_n1;
DROP TABLE orcfile_merge1b_n1;
DROP TABLE orcfile_merge1c_n1;