| --! qt:dataset:src |
| set hive.exec.compress.output = true; |
| set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; |
| set mapred.min.split.size=256; |
| set mapred.min.split.size.per.node=256; |
| set mapred.min.split.size.per.rack=256; |
| set mapred.max.split.size=256; |
| |
| |
| drop table combine_3_srcpart_seq_rc; |
| |
| create table combine_3_srcpart_seq_rc (key int , value string) partitioned by (ds string, hr string) stored as sequencefile; |
| |
| insert overwrite table combine_3_srcpart_seq_rc partition (ds="2010-08-03", hr="00") select * from src; |
| |
| alter table combine_3_srcpart_seq_rc set fileformat rcfile; |
| insert overwrite table combine_3_srcpart_seq_rc partition (ds="2010-08-03", hr="001") select * from src; |
| |
| desc extended combine_3_srcpart_seq_rc partition(ds="2010-08-03", hr="00"); |
| desc extended combine_3_srcpart_seq_rc partition(ds="2010-08-03", hr="001"); |
| |
| select key, value, ds, hr from combine_3_srcpart_seq_rc where ds="2010-08-03" order by key, hr limit 30; |
| |
| ; |
| set hive.exec.reducers.max = 1; |
| |
| drop table bucket3_1; |
| CREATE TABLE combine_3_srcpart_seq_rc_bucket(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS stored as sequencefile; |
| |
| insert overwrite table combine_3_srcpart_seq_rc_bucket partition (ds='1') |
| select * from src; |
| |
| alter table combine_3_srcpart_seq_rc_bucket set fileformat rcfile; |
| |
| insert overwrite table combine_3_srcpart_seq_rc_bucket partition (ds='11') |
| select * from src; |
| |
| select key, ds from combine_3_srcpart_seq_rc_bucket tablesample (bucket 1 out of 2) s where ds = '1' or ds= '11' order by key, ds limit 30; |
| |
| drop table combine_3_srcpart_seq_rc_bucket; |
| |
| drop table combine_3_srcpart_seq_rc; |