blob: d5f6a26b10ad141b4ce6a09f120d1cee09f5fe6f [file] [log] [blame]
--! qt:dataset:src
set hive.mapred.mode=nonstrict;
set hive.optimize.listbucketing=true;
set mapred.input.dir.recursive=true;
set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
-- List bucketing query logic test case. We simulate the directory structure by DML here.
-- Test condition:
-- 1. where clause has multiple skewed columns and non-skewed columns
-- 3. where clause has a few operators
-- Test focus:
-- 1. query works for on partition level.
-- A table can mix up non-skewed partition and skewed partition
-- Even for skewed partition, it can have different skewed information.
-- Test result:
-- 1. pruner only pick up right directory
-- 2. query result is right
-- create a skewed table
create table fact_daily_n3 (key String, value String)
partitioned by (ds String, hr String) ;
-- partition no skew
insert overwrite table fact_daily_n3 partition (ds = '1', hr = '1')
select key, value from src;
describe formatted fact_daily_n3 PARTITION (ds = '1', hr='1');
-- partition. skewed value is 484/238
alter table fact_daily_n3 skewed by (key, value) on (('484','val_484'),('238','val_238')) stored as DIRECTORIES;
insert overwrite table fact_daily_n3 partition (ds = '1', hr = '2')
select key, value from src;
describe formatted fact_daily_n3 PARTITION (ds = '1', hr='2');
-- another partition. skewed value is 327
alter table fact_daily_n3 skewed by (key, value) on (('327','val_327')) stored as DIRECTORIES;
insert overwrite table fact_daily_n3 partition (ds = '1', hr = '3')
select key, value from src;
describe formatted fact_daily_n3 PARTITION (ds = '1', hr='3');
-- query non-skewed partition
explain extended
select * from fact_daily_n3 where ds = '1' and hr='1' and key='145';
select * from fact_daily_n3 where ds = '1' and hr='1' and key='145';
explain extended
select count(*) from fact_daily_n3 where ds = '1' and hr='1';
select count(*) from fact_daily_n3 where ds = '1' and hr='1';
-- query skewed partition
explain extended
SELECT * FROM fact_daily_n3 WHERE ds='1' and hr='2' and (key='484' and value='val_484');
SELECT * FROM fact_daily_n3 WHERE ds='1' and hr='2' and (key='484' and value='val_484');
-- query another skewed partition
explain extended
SELECT * FROM fact_daily_n3 WHERE ds='1' and hr='3' and (key='327' and value='val_327');
SELECT * FROM fact_daily_n3 WHERE ds='1' and hr='3' and (key='327' and value='val_327');