blob: f80585e56c6f391feedcd4a1c76b9b0d4ae97675 [file] [log] [blame]
--! qt:dataset:srcpart
set hive.mapred.mode=nonstrict;
set hive.exec.dynamic.partition=true;
set hive.input.format=org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
set hive.merge.smallfiles.avgsize=200;
set mapred.input.dir.recursive=true;
set hive.merge.mapfiles=false;
set hive.merge.mapredfiles=false;
set hive.merge.rcfile.block.level=true;
-- SORT_QUERY_RESULTS
-- list bucketing DML : dynamic partition (one level) , merge , one skewed column
-- DML without merge files mixed with small and big files:
-- ds=2008-04-08/hr=a1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/
-- 155 000000_0
-- ds=2008-04-08/hr=b1/key=484
-- 87 000000_0
-- 87 000001_0
-- ds=2008-04-08/hr=b1/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/
-- 5201 000000_0
-- 5201 000001_0
-- DML with merge will merge small files
-- skewed table
CREATE TABLE list_bucketing_dynamic_part (key String, value STRING)
PARTITIONED BY (ds string, hr string)
skewed by (key) on ('484')
stored as DIRECTORIES
STORED AS RCFILE;
-- list bucketing DML without merge. use bucketize to generate a few small files.
explain extended
insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr)
select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08';
insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr)
select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08';
-- check DML result
show partitions list_bucketing_dynamic_part;
desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='a1');
desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='b1');
set hive.merge.mapfiles=true;
set hive.merge.mapredfiles=true;
-- list bucketing DML with merge. use bucketize to generate a few small files.
explain extended
insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr)
select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08';
insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr)
select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08';
-- check DML result
show partitions list_bucketing_dynamic_part;
desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='a1');
desc formatted list_bucketing_dynamic_part partition (ds='2008-04-08', hr='b1');
select count(1) from srcpart where ds = '2008-04-08';
select count(*) from list_bucketing_dynamic_part;
set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
explain extended
select * from list_bucketing_dynamic_part where key = '484' and value = 'val_484';
select * from list_bucketing_dynamic_part where key = '484' and value = 'val_484';
select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484';
-- clean up
drop table list_bucketing_dynamic_part;