blob: 03e0e73730ec0deebf8f9bbb76866d5355774b71 [file] [log] [blame]
set hive.vectorized.execution.enabled=false;
-- SORT_QUERY_RESULTS
create table orc_split_elim (userid bigint, string1 string, subtype double, decimal1 decimal(38,0), ts timestamp) stored as orc;
load data local inpath '../../data/files/orc_split_elim.orc' into table orc_split_elim;
SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
SET mapred.min.split.size=1000;
SET mapred.max.split.size=5000;
SET hive.optimize.index.filter=false;
-- The above table will have 5 splits with the followings stats
-- Stripe 1:
-- Column 0: count: 5000
-- Column 1: count: 5000 min: 2 max: 100 sum: 499902
-- Column 2: count: 5000 min: foo max: zebra sum: 24998
-- Column 3: count: 5000 min: 0.8 max: 8.0 sum: 39992.8
-- Column 4: count: 5000 min: 0 max: 1.2 sum: 1.2
-- Column 5: count: 5000
-- Stripe 2:
-- Column 0: count: 5000
-- Column 1: count: 5000 min: 13 max: 100 sum: 499913
-- Column 2: count: 5000 min: bar max: zebra sum: 24998
-- Column 3: count: 5000 min: 8.0 max: 80.0 sum: 40072.0
-- Column 4: count: 5000 min: 0 max: 2.2 sum: 2.2
-- Column 5: count: 5000
-- Stripe 3:
-- Column 0: count: 5000
-- Column 1: count: 5000 min: 29 max: 100 sum: 499929
-- Column 2: count: 5000 min: cat max: zebra sum: 24998
-- Column 3: count: 5000 min: 8.0 max: 8.0 sum: 40000.0
-- Column 4: count: 5000 min: 0 max: 3.3 sum: 3.3
-- Column 5: count: 5000
-- Stripe 4:
-- Column 0: count: 5000
-- Column 1: count: 5000 min: 70 max: 100 sum: 499970
-- Column 2: count: 5000 min: dog max: zebra sum: 24998
-- Column 3: count: 5000 min: 1.8 max: 8.0 sum: 39993.8
-- Column 4: count: 5000 min: 0 max: 4.4 sum: 4.4
-- Column 5: count: 5000
-- Stripe 5:
-- Column 0: count: 5000
-- Column 1: count: 5000 min: 5 max: 100 sum: 499905
-- Column 2: count: 5000 min: eat max: zebra sum: 24998
-- Column 3: count: 5000 min: 0.8 max: 8.0 sum: 39992.8
-- Column 4: count: 5000 min: 0 max: 5.5 sum: 5.5
-- Column 5: count: 5000
-- 5 mappers
select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=0;
SET hive.optimize.index.filter=true;
-- 0 mapper
select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=0;
SET hive.optimize.index.filter=false;
-- 5 mappers. count should be 0
select count(*) from orc_split_elim where userid<=0;
SET hive.optimize.index.filter=true;
-- 0 mapper
select count(*) from orc_split_elim where userid<=0;
SET hive.optimize.index.filter=false;
-- 5 mappers
select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=2;
SET hive.optimize.index.filter=true;
-- 1 mapper
select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=2;
SET hive.optimize.index.filter=false;
-- 5 mappers
select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=5;
SET hive.optimize.index.filter=true;
-- 2 mappers
select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=5;
SET hive.optimize.index.filter=false;
-- 5 mappers
select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=13;
SET hive.optimize.index.filter=true;
-- 3 mappers
select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=13;
SET hive.optimize.index.filter=false;
-- 5 mappers
select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=29;
SET hive.optimize.index.filter=true;
-- 4 mappers
select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=29;
SET hive.optimize.index.filter=false;
-- 5 mappers
select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=70;
SET hive.optimize.index.filter=true;
-- 5 mappers
select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=70;
SET hive.optimize.index.filter=false;
-- partitioned table
create table orc_split_elim_part (userid bigint, string1 string, subtype double, decimal1 decimal(38,0), ts timestamp) partitioned by (country string, year int) stored as orc;
alter table orc_split_elim_part add partition(country='us', year=2000);
alter table orc_split_elim_part add partition(country='us', year=2001);
load data local inpath '../../data/files/orc_split_elim.orc' into table orc_split_elim_part partition(country='us', year=2000);
load data local inpath '../../data/files/orc_split_elim.orc' into table orc_split_elim_part partition(country='us', year=2001);
-- 10 mapper - no split elimination
select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=2 and country='us';
SET hive.optimize.index.filter=true;
-- 2 mapper - split elimination
select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=2 and country='us';
SET hive.optimize.index.filter=false;
-- 10 mapper - no split elimination
select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=2 and country='us' and (year=2000 or year=2001);
SET hive.optimize.index.filter=true;
-- 2 mapper - split elimination
select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=2 and country='us' and (year=2000 or year=2001);
SET hive.optimize.index.filter=false;
-- 10 mapper - no split elimination
select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=2 and country='us' and year=2000;
SET hive.optimize.index.filter=true;
-- 1 mapper - split elimination
select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=2 and country='us' and year=2000;
SET hive.optimize.index.filter=false;
-- 10 mapper - no split elimination
select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=5 and country='us';
SET hive.optimize.index.filter=true;
-- 4 mapper - split elimination
select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=5 and country='us';
SET hive.optimize.index.filter=false;
-- 10 mapper - no split elimination
select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=5 and country='us' and (year=2000 or year=2001);
SET hive.optimize.index.filter=true;
-- 4 mapper - split elimination
select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=5 and country='us' and (year=2000 or year=2001);
SET hive.optimize.index.filter=false;
-- 10 mapper - no split elimination
select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=5 and country='us' and year=2000;
SET hive.optimize.index.filter=true;
-- 2 mapper - split elimination
select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=5 and country='us' and year=2000;
SET hive.optimize.index.filter=false;
-- 0 mapper - no split elimination
select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=70 and country='in';
select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=70 and country='us' and year=2002;
SET hive.optimize.index.filter=true;
-- 0 mapper - split elimination
select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=70 and country='in';
select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=70 and country='us' and year=2002;
SET hive.optimize.index.filter=false;