| PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) |
| |
| -- List bucketing query logic test case. We simulate the directory structure by DML here. |
| -- Test condition: |
| -- 1. where clause has multiple skewed columns |
| -- 2. where clause doesn't have non-skewed column |
| -- 3. where clause has one and operator |
| -- Test focus: |
| -- 1. basic list bucketing query work |
| -- Test result: |
| -- 1. pruner only pick up right directory |
| -- 2. query result is right |
| |
| -- create a skewed table |
| create table fact_daily (key String, value String) |
| partitioned by (ds String, hr String) |
| skewed by (key, value) on (('484','val_484'),('238','val_238')) |
| stored as DIRECTORIES |
| PREHOOK: type: CREATETABLE |
| PREHOOK: Output: database:default |
| PREHOOK: Output: default@fact_daily |
| POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) |
| |
| -- List bucketing query logic test case. We simulate the directory structure by DML here. |
| -- Test condition: |
| -- 1. where clause has multiple skewed columns |
| -- 2. where clause doesn't have non-skewed column |
| -- 3. where clause has one and operator |
| -- Test focus: |
| -- 1. basic list bucketing query work |
| -- Test result: |
| -- 1. pruner only pick up right directory |
| -- 2. query result is right |
| |
| -- create a skewed table |
| create table fact_daily (key String, value String) |
| partitioned by (ds String, hr String) |
| skewed by (key, value) on (('484','val_484'),('238','val_238')) |
| stored as DIRECTORIES |
| POSTHOOK: type: CREATETABLE |
| POSTHOOK: Output: database:default |
| POSTHOOK: Output: default@fact_daily |
| PREHOOK: query: insert overwrite table fact_daily partition (ds = '1', hr = '4') |
| select key, value from src |
| PREHOOK: type: QUERY |
| PREHOOK: Input: default@src |
| PREHOOK: Output: default@fact_daily@ds=1/hr=4 |
| POSTHOOK: query: insert overwrite table fact_daily partition (ds = '1', hr = '4') |
| select key, value from src |
| POSTHOOK: type: QUERY |
| POSTHOOK: Input: default@src |
| POSTHOOK: Output: default@fact_daily@ds=1/hr=4 |
| POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] |
| POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=4).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] |
| PREHOOK: query: describe formatted fact_daily PARTITION (ds = '1', hr='4') |
| PREHOOK: type: DESCTABLE |
| PREHOOK: Input: default@fact_daily |
| POSTHOOK: query: describe formatted fact_daily PARTITION (ds = '1', hr='4') |
| POSTHOOK: type: DESCTABLE |
| POSTHOOK: Input: default@fact_daily |
| # col_name data_type comment |
| |
| key string |
| value string |
| |
| # Partition Information |
| # col_name data_type comment |
| |
| ds string |
| hr string |
| |
| # Detailed Partition Information |
| Partition Value: [1, 4] |
| Database: default |
| Table: fact_daily |
| #### A masked pattern was here #### |
| Partition Parameters: |
| COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} |
| numFiles 3 |
| numRows 500 |
| rawDataSize 5312 |
| totalSize 5812 |
| #### A masked pattern was here #### |
| |
| # Storage Information |
| SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe |
| InputFormat: org.apache.hadoop.mapred.TextInputFormat |
| OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat |
| Compressed: No |
| Num Buckets: -1 |
| Bucket Columns: [] |
| Sort Columns: [] |
| Stored As SubDirectories: Yes |
| Skewed Columns: [key, value] |
| Skewed Values: [[238, val_238], [484, val_484]] |
| #### A masked pattern was here #### |
| Skewed Value to Truncated Path: {[238, val_238]=/fact_daily/ds=1/hr=4/key=238/value=val_238, [484, val_484]=/fact_daily/ds=1/hr=4/key=484/value=val_484} |
| Storage Desc Params: |
| serialization.format 1 |
| PREHOOK: query: SELECT count(1) FROM fact_daily WHERE ds='1' and hr='4' |
| PREHOOK: type: QUERY |
| PREHOOK: Input: default@fact_daily |
| PREHOOK: Input: default@fact_daily@ds=1/hr=4 |
| #### A masked pattern was here #### |
| POSTHOOK: query: SELECT count(1) FROM fact_daily WHERE ds='1' and hr='4' |
| POSTHOOK: type: QUERY |
| POSTHOOK: Input: default@fact_daily |
| POSTHOOK: Input: default@fact_daily@ds=1/hr=4 |
| #### A masked pattern was here #### |
| 500 |
| PREHOOK: query: -- pruner only pick up skewed-value directory |
| -- explain plan shows which directory selected: Truncated Path -> Alias |
| explain extended SELECT key FROM fact_daily WHERE ( ds='1' and hr='4') and (key='484' and value= 'val_484') |
| PREHOOK: type: QUERY |
| POSTHOOK: query: -- pruner only pick up skewed-value directory |
| -- explain plan shows which directory selected: Truncated Path -> Alias |
| explain extended SELECT key FROM fact_daily WHERE ( ds='1' and hr='4') and (key='484' and value= 'val_484') |
| POSTHOOK: type: QUERY |
| STAGE DEPENDENCIES: |
| Stage-0 is a root stage |
| |
| STAGE PLANS: |
| Stage: Stage-0 |
| Fetch Operator |
| limit: -1 |
| Partition Description: |
| Partition |
| input format: org.apache.hadoop.mapred.TextInputFormat |
| output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat |
| partition values: |
| ds 1 |
| hr 4 |
| properties: |
| COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} |
| bucket_count -1 |
| columns key,value |
| columns.comments |
| columns.types string:string |
| #### A masked pattern was here #### |
| name default.fact_daily |
| numFiles 3 |
| numRows 500 |
| partition_columns ds/hr |
| partition_columns.types string:string |
| rawDataSize 5312 |
| serialization.ddl struct fact_daily { string key, string value} |
| serialization.format 1 |
| serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe |
| totalSize 5812 |
| #### A masked pattern was here #### |
| serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe |
| |
| input format: org.apache.hadoop.mapred.TextInputFormat |
| output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat |
| properties: |
| bucket_count -1 |
| columns key,value |
| columns.comments |
| columns.types string:string |
| #### A masked pattern was here #### |
| name default.fact_daily |
| partition_columns ds/hr |
| partition_columns.types string:string |
| serialization.ddl struct fact_daily { string key, string value} |
| serialization.format 1 |
| serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe |
| #### A masked pattern was here #### |
| serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe |
| name: default.fact_daily |
| name: default.fact_daily |
| Processor Tree: |
| TableScan |
| alias: fact_daily |
| Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE |
| GatherStats: false |
| Filter Operator |
| isSamplingPred: false |
| predicate: ((key = '484') and (value = 'val_484')) (type: boolean) |
| Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE |
| Select Operator |
| expressions: '484' (type: string) |
| outputColumnNames: _col0 |
| Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE |
| ListSink |
| |
| PREHOOK: query: -- List Bucketing Query |
| SELECT key FROM fact_daily WHERE ( ds='1' and hr='4') and (key='484' and value= 'val_484') |
| PREHOOK: type: QUERY |
| PREHOOK: Input: default@fact_daily |
| PREHOOK: Input: default@fact_daily@ds=1/hr=4 |
| #### A masked pattern was here #### |
| POSTHOOK: query: -- List Bucketing Query |
| SELECT key FROM fact_daily WHERE ( ds='1' and hr='4') and (key='484' and value= 'val_484') |
| POSTHOOK: type: QUERY |
| POSTHOOK: Input: default@fact_daily |
| POSTHOOK: Input: default@fact_daily@ds=1/hr=4 |
| #### A masked pattern was here #### |
| 484 |
| PREHOOK: query: -- pruner only pick up skewed-value directory |
| -- explain plan shows which directory selected: Truncated Path -> Alias |
| explain extended SELECT key,value FROM fact_daily WHERE ( ds='1' and hr='4') and (key='238' and value= 'val_238') |
| PREHOOK: type: QUERY |
| POSTHOOK: query: -- pruner only pick up skewed-value directory |
| -- explain plan shows which directory selected: Truncated Path -> Alias |
| explain extended SELECT key,value FROM fact_daily WHERE ( ds='1' and hr='4') and (key='238' and value= 'val_238') |
| POSTHOOK: type: QUERY |
| STAGE DEPENDENCIES: |
| Stage-0 is a root stage |
| |
| STAGE PLANS: |
| Stage: Stage-0 |
| Fetch Operator |
| limit: -1 |
| Partition Description: |
| Partition |
| input format: org.apache.hadoop.mapred.TextInputFormat |
| output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat |
| partition values: |
| ds 1 |
| hr 4 |
| properties: |
| COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} |
| bucket_count -1 |
| columns key,value |
| columns.comments |
| columns.types string:string |
| #### A masked pattern was here #### |
| name default.fact_daily |
| numFiles 3 |
| numRows 500 |
| partition_columns ds/hr |
| partition_columns.types string:string |
| rawDataSize 5312 |
| serialization.ddl struct fact_daily { string key, string value} |
| serialization.format 1 |
| serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe |
| totalSize 5812 |
| #### A masked pattern was here #### |
| serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe |
| |
| input format: org.apache.hadoop.mapred.TextInputFormat |
| output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat |
| properties: |
| bucket_count -1 |
| columns key,value |
| columns.comments |
| columns.types string:string |
| #### A masked pattern was here #### |
| name default.fact_daily |
| partition_columns ds/hr |
| partition_columns.types string:string |
| serialization.ddl struct fact_daily { string key, string value} |
| serialization.format 1 |
| serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe |
| #### A masked pattern was here #### |
| serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe |
| name: default.fact_daily |
| name: default.fact_daily |
| Processor Tree: |
| TableScan |
| alias: fact_daily |
| Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE |
| GatherStats: false |
| Filter Operator |
| isSamplingPred: false |
| predicate: ((key = '238') and (value = 'val_238')) (type: boolean) |
| Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE |
| Select Operator |
| expressions: '238' (type: string), 'val_238' (type: string) |
| outputColumnNames: _col0, _col1 |
| Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE |
| ListSink |
| |
| PREHOOK: query: -- List Bucketing Query |
| SELECT key,value FROM fact_daily WHERE ( ds='1' and hr='4') and (key='238' and value= 'val_238') |
| PREHOOK: type: QUERY |
| PREHOOK: Input: default@fact_daily |
| PREHOOK: Input: default@fact_daily@ds=1/hr=4 |
| #### A masked pattern was here #### |
| POSTHOOK: query: -- List Bucketing Query |
| SELECT key,value FROM fact_daily WHERE ( ds='1' and hr='4') and (key='238' and value= 'val_238') |
| POSTHOOK: type: QUERY |
| POSTHOOK: Input: default@fact_daily |
| POSTHOOK: Input: default@fact_daily@ds=1/hr=4 |
| #### A masked pattern was here #### |
| 238 val_238 |
| 238 val_238 |
| PREHOOK: query: -- pruner only pick up default directory |
| -- explain plan shows which directory selected: Truncated Path -> Alias |
| explain extended SELECT key FROM fact_daily WHERE ( ds='1' and hr='4') and (value = "3") |
| PREHOOK: type: QUERY |
| POSTHOOK: query: -- pruner only pick up default directory |
| -- explain plan shows which directory selected: Truncated Path -> Alias |
| explain extended SELECT key FROM fact_daily WHERE ( ds='1' and hr='4') and (value = "3") |
| POSTHOOK: type: QUERY |
| STAGE DEPENDENCIES: |
| Stage-0 is a root stage |
| |
| STAGE PLANS: |
| Stage: Stage-0 |
| Fetch Operator |
| limit: -1 |
| Partition Description: |
| Partition |
| input format: org.apache.hadoop.mapred.TextInputFormat |
| output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat |
| partition values: |
| ds 1 |
| hr 4 |
| properties: |
| COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} |
| bucket_count -1 |
| columns key,value |
| columns.comments |
| columns.types string:string |
| #### A masked pattern was here #### |
| name default.fact_daily |
| numFiles 3 |
| numRows 500 |
| partition_columns ds/hr |
| partition_columns.types string:string |
| rawDataSize 5312 |
| serialization.ddl struct fact_daily { string key, string value} |
| serialization.format 1 |
| serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe |
| totalSize 5812 |
| #### A masked pattern was here #### |
| serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe |
| |
| input format: org.apache.hadoop.mapred.TextInputFormat |
| output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat |
| properties: |
| bucket_count -1 |
| columns key,value |
| columns.comments |
| columns.types string:string |
| #### A masked pattern was here #### |
| name default.fact_daily |
| partition_columns ds/hr |
| partition_columns.types string:string |
| serialization.ddl struct fact_daily { string key, string value} |
| serialization.format 1 |
| serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe |
| #### A masked pattern was here #### |
| serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe |
| name: default.fact_daily |
| name: default.fact_daily |
| Processor Tree: |
| TableScan |
| alias: fact_daily |
| Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE |
| GatherStats: false |
| Filter Operator |
| isSamplingPred: false |
| predicate: (value = '3') (type: boolean) |
| Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE |
| Select Operator |
| expressions: key (type: string) |
| outputColumnNames: _col0 |
| Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE |
| ListSink |
| |
| PREHOOK: query: -- List Bucketing Query |
| SELECT key FROM fact_daily WHERE ( ds='1' and hr='4') and (value = "3") |
| PREHOOK: type: QUERY |
| PREHOOK: Input: default@fact_daily |
| PREHOOK: Input: default@fact_daily@ds=1/hr=4 |
| #### A masked pattern was here #### |
| POSTHOOK: query: -- List Bucketing Query |
| SELECT key FROM fact_daily WHERE ( ds='1' and hr='4') and (value = "3") |
| POSTHOOK: type: QUERY |
| POSTHOOK: Input: default@fact_daily |
| POSTHOOK: Input: default@fact_daily@ds=1/hr=4 |
| #### A masked pattern was here #### |
| PREHOOK: query: -- pruner only pick up default directory |
| -- explain plan shows which directory selected: Truncated Path -> Alias |
| explain extended SELECT key,value FROM fact_daily WHERE ( ds='1' and hr='4') and key = '495' |
| PREHOOK: type: QUERY |
| POSTHOOK: query: -- pruner only pick up default directory |
| -- explain plan shows which directory selected: Truncated Path -> Alias |
| explain extended SELECT key,value FROM fact_daily WHERE ( ds='1' and hr='4') and key = '495' |
| POSTHOOK: type: QUERY |
| STAGE DEPENDENCIES: |
| Stage-0 is a root stage |
| |
| STAGE PLANS: |
| Stage: Stage-0 |
| Fetch Operator |
| limit: -1 |
| Partition Description: |
| Partition |
| input format: org.apache.hadoop.mapred.TextInputFormat |
| output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat |
| partition values: |
| ds 1 |
| hr 4 |
| properties: |
| COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} |
| bucket_count -1 |
| columns key,value |
| columns.comments |
| columns.types string:string |
| #### A masked pattern was here #### |
| name default.fact_daily |
| numFiles 3 |
| numRows 500 |
| partition_columns ds/hr |
| partition_columns.types string:string |
| rawDataSize 5312 |
| serialization.ddl struct fact_daily { string key, string value} |
| serialization.format 1 |
| serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe |
| totalSize 5812 |
| #### A masked pattern was here #### |
| serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe |
| |
| input format: org.apache.hadoop.mapred.TextInputFormat |
| output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat |
| properties: |
| bucket_count -1 |
| columns key,value |
| columns.comments |
| columns.types string:string |
| #### A masked pattern was here #### |
| name default.fact_daily |
| partition_columns ds/hr |
| partition_columns.types string:string |
| serialization.ddl struct fact_daily { string key, string value} |
| serialization.format 1 |
| serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe |
| #### A masked pattern was here #### |
| serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe |
| name: default.fact_daily |
| name: default.fact_daily |
| Processor Tree: |
| TableScan |
| alias: fact_daily |
| Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE |
| GatherStats: false |
| Filter Operator |
| isSamplingPred: false |
| predicate: (key = '495') (type: boolean) |
| Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE |
| Select Operator |
| expressions: '495' (type: string), value (type: string) |
| outputColumnNames: _col0, _col1 |
| Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE |
| ListSink |
| |
| PREHOOK: query: -- List Bucketing Query |
| SELECT key,value FROM fact_daily WHERE ( ds='1' and hr='4') and key = '369' |
| PREHOOK: type: QUERY |
| PREHOOK: Input: default@fact_daily |
| PREHOOK: Input: default@fact_daily@ds=1/hr=4 |
| #### A masked pattern was here #### |
| POSTHOOK: query: -- List Bucketing Query |
| SELECT key,value FROM fact_daily WHERE ( ds='1' and hr='4') and key = '369' |
| POSTHOOK: type: QUERY |
| POSTHOOK: Input: default@fact_daily |
| POSTHOOK: Input: default@fact_daily@ds=1/hr=4 |
| #### A masked pattern was here #### |
| 369 val_369 |
| 369 val_369 |
| 369 val_369 |