| --! qt:dataset:src |
| set hive.mapred.mode=nonstrict; |
| set hive.optimize.listbucketing=true; |
| set mapred.input.dir.recursive=true; |
| set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; |
| |
| |
| -- List bucketing query logic test case. We simulate the directory structure by DML here. |
| -- Test condition: |
| -- 1. where clause has multiple skewed columns and non-skewed columns |
| -- 3. where clause has a few operators |
| -- Test focus: |
| -- 1. query works for on partition level. |
| -- A table can mix up non-skewed partition and skewed partition |
| -- Even for skewed partition, it can have different skewed information. |
| -- Test result: |
| -- 1. pruner only pick up right directory |
| -- 2. query result is right |
| |
| -- create a skewed table |
| create table fact_daily_n3 (key String, value String) |
| partitioned by (ds String, hr String) ; |
| |
| -- partition no skew |
| insert overwrite table fact_daily_n3 partition (ds = '1', hr = '1') |
| select key, value from src; |
| describe formatted fact_daily_n3 PARTITION (ds = '1', hr='1'); |
| |
| -- partition. skewed value is 484/238 |
| alter table fact_daily_n3 skewed by (key, value) on (('484','val_484'),('238','val_238')) stored as DIRECTORIES; |
| insert overwrite table fact_daily_n3 partition (ds = '1', hr = '2') |
| select key, value from src; |
| describe formatted fact_daily_n3 PARTITION (ds = '1', hr='2'); |
| |
| -- another partition. skewed value is 327 |
| alter table fact_daily_n3 skewed by (key, value) on (('327','val_327')) stored as DIRECTORIES; |
| insert overwrite table fact_daily_n3 partition (ds = '1', hr = '3') |
| select key, value from src; |
| describe formatted fact_daily_n3 PARTITION (ds = '1', hr='3'); |
| |
| -- query non-skewed partition |
| explain extended |
| select * from fact_daily_n3 where ds = '1' and hr='1' and key='145'; |
| select * from fact_daily_n3 where ds = '1' and hr='1' and key='145'; |
| explain extended |
| select count(*) from fact_daily_n3 where ds = '1' and hr='1'; |
| select count(*) from fact_daily_n3 where ds = '1' and hr='1'; |
| |
| -- query skewed partition |
| explain extended |
| SELECT * FROM fact_daily_n3 WHERE ds='1' and hr='2' and (key='484' and value='val_484'); |
| SELECT * FROM fact_daily_n3 WHERE ds='1' and hr='2' and (key='484' and value='val_484'); |
| |
| -- query another skewed partition |
| explain extended |
| SELECT * FROM fact_daily_n3 WHERE ds='1' and hr='3' and (key='327' and value='val_327'); |
| SELECT * FROM fact_daily_n3 WHERE ds='1' and hr='3' and (key='327' and value='val_327'); |