blob: 8f5167226aeb78218786ffe0ef29debb96c6a625 [file] [log] [blame]
set hive.mapred.mode=nonstrict;
set hive.support.concurrency=true;
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.vectorized.execution.enabled=true;
-- enable ppd
set hive.optimize.index.filter=true;
set hive.explain.user=false;
-- needed for TestCliDriver but not TestMiniTezCliDriver
-- set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
-- attempts to get compaction to run - see below
-- set yarn.scheduler.maximum-allocation-mb=2024;
-- set hive.tez.container.size=500;
-- set mapreduce.map.memory.mb=500;
-- set mapreduce.reduce.memory.mb=500;
-- set mapred.job.map.memory.mb=500;
-- set mapred.job.reduce.memory.mb=500;
CREATE TEMPORARY FUNCTION runWorker AS 'org.apache.hadoop.hive.ql.udf.UDFRunWorker';
create table mydual_n0(a int);
insert into mydual_n0 values(1);
CREATE TABLE over10k_n9(t tinyint,
si smallint,
i int,
b bigint,
f float,
d double,
bo boolean,
s string,
ts timestamp,
`dec` decimal(4,2),
bin binary)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
STORED AS TEXTFILE
TBLPROPERTIES ("hive.serialization.decode.binary.as.base64"="false");
--oddly this has 9999 rows not > 10K
LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over10k_n9;
CREATE TABLE over10k_orc_bucketed_n0(t tinyint,
si smallint,
i int,
b bigint,
f float,
d double,
bo boolean,
s string,
ts timestamp,
`dec` decimal(4,2),
bin binary) CLUSTERED BY(si) INTO 4 BUCKETS STORED AS ORC;
-- this produces about 250 distinct values across all 4 equivalence classes
select distinct si, si%4 from over10k_n9 order by si;
-- explain insert into over10k_orc_bucketed_n0 select * from over10k_n9;
insert into over10k_orc_bucketed_n0 select * from over10k_n9;
dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/over10k_orc_bucketed_n0;
-- create copy_N files
insert into over10k_orc_bucketed_n0 select * from over10k_n9;
-- this output of this is masked in .out - it is visible in .orig
dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/over10k_orc_bucketed_n0;
--this actually shows the data files in the .out on Tez but not LLAP
select distinct 7 as seven, INPUT__FILE__NAME from over10k_orc_bucketed_n0;
-- convert table to acid
alter table over10k_orc_bucketed_n0 convert to acid;
-- this should vectorize (and push predicate to storage: filterExpr in TableScan )
-- Execution mode: vectorized (both Map and Reducer)
explain select t, si, i from over10k_orc_bucketed_n0 where b = 4294967363 and t < 100 order by t, si, i;
select t, si, i from over10k_orc_bucketed_n0 where b = 4294967363 and t < 100 order by t, si, i;
-- this should vectorize (and push predicate to storage: filterExpr in TableScan )
-- Execution mode: vectorized
explain select ROW__ID, t, si, i from over10k_orc_bucketed_n0 where b = 4294967363 and t < 100 order by ROW__ID;
select ROW__ID, t, si, i from over10k_orc_bucketed_n0 where b = 4294967363 and t < 100 order by ROW__ID;
-- this should vectorize (and push predicate to storage: filterExpr in TableScan )
-- same as above
explain update over10k_orc_bucketed_n0 set i = 0 where b = 4294967363 and t < 100;
update over10k_orc_bucketed_n0 set i = 0 where b = 4294967363 and t < 100;
-- this should produce the same result (data) as previous time this exact query ran
-- ROW__ID will be different (same bucketProperty)
select ROW__ID, t, si, i from over10k_orc_bucketed_n0 where b = 4294967363 and t < 100 order by ROW__ID;
-- The idea below was to do check sum queries to ensure that ROW__IDs are unique
-- to run Compaction and to check that ROW__IDs are the same before and after compaction (for rows
-- w/o applicable delete events)
-- this doesn't vectorize
-- use explain VECTORIZATION DETAIL to see
-- notVectorizedReason: Key expression for GROUPBY operator: Vectorizing complex type STRUCT not supported
explain select ROW__ID, count(*) from over10k_orc_bucketed_n0 group by ROW__ID having count(*) > 1;
-- this test that there are no duplicate ROW__IDs so should produce no output
select ROW__ID, count(*) from over10k_orc_bucketed_n0 group by ROW__ID having count(*) > 1;
-- schedule compactor
explain alter table over10k_orc_bucketed_n0 compact 'major' WITH OVERWRITE TBLPROPERTIES ('compactor.mapreduce.map.memory.mb'='500', 'compactor.mapreduce.reduce.memory.mb'='500','compactor.mapreduce.map.memory.mb'='500', 'compactor.hive.tez.container.size'='500');
alter table over10k_orc_bucketed_n0 compact 'major' WITH OVERWRITE TBLPROPERTIES ('compactor.mapreduce.map.memory.mb'='500', 'compactor.mapreduce.reduce.memory.mb'='500','compactor.mapreduce.map.memory.mb'='500', 'compactor.hive.tez.container.size'='500');
-- run compactor - this currently fails with
-- Invalid resource request, requested memory < 0, or requested memory > max configured, requestedMemory=1536, maxMemory=512
-- select runWorker() from mydual_n0;
-- show compactions;
-- this should produce the same (data + ROW__ID) as before compaction
select ROW__ID, t, si, i from over10k_orc_bucketed_n0 where b = 4294967363 and t < 100 order by ROW__ID;
-- this test that there are no duplicate ROW__IDs so should produce no output
select ROW__ID, count(*) from over10k_orc_bucketed_n0 group by ROW__ID having count(*) > 1;