blob: 208c5b726c1f51d6dbcef75f98c649ffe828311e [file] [log] [blame]
set hive.vectorized.execution.enabled=false;
DROP TABLE orcfile_merge1_n2;
DROP TABLE orc_split_elim_n0;
create table orc_split_elim_n0 (userid bigint, string1 string, subtype double, decimal1 decimal(38,0), ts timestamp) stored as orc;
load data local inpath '../../data/files/orc_split_elim.orc' into table orc_split_elim_n0;
load data local inpath '../../data/files/orc_split_elim.orc' into table orc_split_elim_n0;
create table orcfile_merge1_n2 (userid bigint, string1 string, subtype double, decimal1 decimal(38,0), ts timestamp) stored as orc tblproperties("orc.compress.size"="4096");
insert overwrite table orcfile_merge1_n2 select * from orc_split_elim_n0 order by userid;
insert into table orcfile_merge1_n2 select * from orc_split_elim_n0 order by userid;
dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orcfile_merge1_n2/;
set hive.merge.tezfiles=true;
set hive.merge.mapfiles=true;
set hive.merge.mapredfiles=true;
set hive.merge.orcfile.stripe.level=true;
set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
set tez.am.grouping.split-count=1;
set tez.grouping.split-count=1;
set hive.exec.orc.default.buffer.size=120;
SET hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecOrcFileDump;
select * from orcfile_merge1_n2 limit 1;
SET hive.exec.post.hooks=;
-- concatenate
ALTER TABLE orcfile_merge1_n2 CONCATENATE;
dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orcfile_merge1_n2/;
select count(*) from orc_split_elim_n0;
-- will have double the number of rows
select count(*) from orcfile_merge1_n2;
SET hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecOrcFileDump;
select * from orcfile_merge1_n2 limit 1;
SET hive.exec.post.hooks=;
SET mapreduce.job.reduces=2;
INSERT OVERWRITE DIRECTORY 'output' stored as orcfile select * from orc_split_elim_n0;
DROP TABLE orc_split_elim_n0;
DROP TABLE orcfile_merge1_n2;