| PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) |
| -- SORT_QUERY_RESULTS |
| |
| -- List bucketing query logic test case. |
| -- Test condition: |
| -- 1. where clause has single skewed columns and non-skewed columns |
| -- 3. where clause has a few operators |
| -- Test focus: |
| -- 1. basic list bucketing query works for not (equal) case |
| -- Test result: |
| -- 1. pruner only pick up right directory |
| -- 2. query result is right |
| |
| -- create 2 tables: fact_daily and fact_tz |
| -- fact_daily will be used for list bucketing query |
| -- fact_tz is a table used to prepare data and test directories |
| CREATE TABLE fact_daily(x int, y STRING, z STRING) PARTITIONED BY (ds STRING) |
| PREHOOK: type: CREATETABLE |
| PREHOOK: Output: database:default |
| PREHOOK: Output: default@fact_daily |
| POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) |
| -- SORT_QUERY_RESULTS |
| |
| -- List bucketing query logic test case. |
| -- Test condition: |
| -- 1. where clause has single skewed columns and non-skewed columns |
| -- 3. where clause has a few operators |
| -- Test focus: |
| -- 1. basic list bucketing query works for not (equal) case |
| -- Test result: |
| -- 1. pruner only pick up right directory |
| -- 2. query result is right |
| |
| -- create 2 tables: fact_daily and fact_tz |
| -- fact_daily will be used for list bucketing query |
| -- fact_tz is a table used to prepare data and test directories |
| CREATE TABLE fact_daily(x int, y STRING, z STRING) PARTITIONED BY (ds STRING) |
| POSTHOOK: type: CREATETABLE |
| POSTHOOK: Output: database:default |
| POSTHOOK: Output: default@fact_daily |
| PREHOOK: query: CREATE TABLE fact_tz(x int, y STRING, z STRING) PARTITIONED BY (ds STRING, hr STRING) |
| #### A masked pattern was here #### |
| PREHOOK: type: CREATETABLE |
| #### A masked pattern was here #### |
| PREHOOK: Output: database:default |
| PREHOOK: Output: default@fact_tz |
| POSTHOOK: query: CREATE TABLE fact_tz(x int, y STRING, z STRING) PARTITIONED BY (ds STRING, hr STRING) |
| #### A masked pattern was here #### |
| POSTHOOK: type: CREATETABLE |
| #### A masked pattern was here #### |
| POSTHOOK: Output: database:default |
| POSTHOOK: Output: default@fact_tz |
| PREHOOK: query: -- create /fact_tz/ds=1/hr=1 directory |
| INSERT OVERWRITE TABLE fact_tz PARTITION (ds='1', hr='1') |
| SELECT key, value, value FROM src WHERE key=484 |
| PREHOOK: type: QUERY |
| PREHOOK: Input: default@src |
| PREHOOK: Output: default@fact_tz@ds=1/hr=1 |
| POSTHOOK: query: -- create /fact_tz/ds=1/hr=1 directory |
| INSERT OVERWRITE TABLE fact_tz PARTITION (ds='1', hr='1') |
| SELECT key, value, value FROM src WHERE key=484 |
| POSTHOOK: type: QUERY |
| POSTHOOK: Input: default@src |
| POSTHOOK: Output: default@fact_tz@ds=1/hr=1 |
| POSTHOOK: Lineage: fact_tz PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] |
| POSTHOOK: Lineage: fact_tz PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] |
| POSTHOOK: Lineage: fact_tz PARTITION(ds=1,hr=1).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] |
| PREHOOK: query: -- create /fact_tz/ds=1/hr=2 directory |
| INSERT OVERWRITE TABLE fact_tz PARTITION (ds='1', hr='2') |
| SELECT key, value, value FROM src WHERE key=278 or key=86 |
| PREHOOK: type: QUERY |
| PREHOOK: Input: default@src |
| PREHOOK: Output: default@fact_tz@ds=1/hr=2 |
| POSTHOOK: query: -- create /fact_tz/ds=1/hr=2 directory |
| INSERT OVERWRITE TABLE fact_tz PARTITION (ds='1', hr='2') |
| SELECT key, value, value FROM src WHERE key=278 or key=86 |
| POSTHOOK: type: QUERY |
| POSTHOOK: Input: default@src |
| POSTHOOK: Output: default@fact_tz@ds=1/hr=2 |
| POSTHOOK: Lineage: fact_tz PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] |
| POSTHOOK: Lineage: fact_tz PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] |
| POSTHOOK: Lineage: fact_tz PARTITION(ds=1,hr=2).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] |
| PREHOOK: query: -- create /fact_tz/ds=1/hr=3 directory |
| INSERT OVERWRITE TABLE fact_tz PARTITION (ds='1', hr='3') |
| SELECT key, value, value FROM src WHERE key=238 |
| PREHOOK: type: QUERY |
| PREHOOK: Input: default@src |
| PREHOOK: Output: default@fact_tz@ds=1/hr=3 |
| POSTHOOK: query: -- create /fact_tz/ds=1/hr=3 directory |
| INSERT OVERWRITE TABLE fact_tz PARTITION (ds='1', hr='3') |
| SELECT key, value, value FROM src WHERE key=238 |
| POSTHOOK: type: QUERY |
| POSTHOOK: Input: default@src |
| POSTHOOK: Output: default@fact_tz@ds=1/hr=3 |
| POSTHOOK: Lineage: fact_tz PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] |
| POSTHOOK: Lineage: fact_tz PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] |
| POSTHOOK: Lineage: fact_tz PARTITION(ds=1,hr=3).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] |
| #### A masked pattern was here #### |
| PREHOOK: query: -- switch fact_daily to skewed table and point its location to /fact_tz/ds=1 |
| alter table fact_daily skewed by (x) on (484,238) |
| PREHOOK: type: ALTERTABLE_SKEWED |
| PREHOOK: Input: default@fact_daily |
| PREHOOK: Output: default@fact_daily |
| POSTHOOK: query: -- switch fact_daily to skewed table and point its location to /fact_tz/ds=1 |
| alter table fact_daily skewed by (x) on (484,238) |
| POSTHOOK: type: ALTERTABLE_SKEWED |
| POSTHOOK: Input: default@fact_daily |
| POSTHOOK: Output: default@fact_daily |
| PREHOOK: query: ALTER TABLE fact_daily SET TBLPROPERTIES('EXTERNAL'='TRUE') |
| PREHOOK: type: ALTERTABLE_PROPERTIES |
| PREHOOK: Input: default@fact_daily |
| PREHOOK: Output: default@fact_daily |
| POSTHOOK: query: ALTER TABLE fact_daily SET TBLPROPERTIES('EXTERNAL'='TRUE') |
| POSTHOOK: type: ALTERTABLE_PROPERTIES |
| POSTHOOK: Input: default@fact_daily |
| POSTHOOK: Output: default@fact_daily |
| PREHOOK: query: ALTER TABLE fact_daily ADD PARTITION (ds='1') |
| #### A masked pattern was here #### |
| PREHOOK: type: ALTERTABLE_ADDPARTS |
| #### A masked pattern was here #### |
| PREHOOK: Output: default@fact_daily |
| POSTHOOK: query: ALTER TABLE fact_daily ADD PARTITION (ds='1') |
| #### A masked pattern was here #### |
| POSTHOOK: type: ALTERTABLE_ADDPARTS |
| #### A masked pattern was here #### |
| POSTHOOK: Output: default@fact_daily |
| POSTHOOK: Output: default@fact_daily@ds=1 |
| PREHOOK: query: -- set List Bucketing location map |
| #### A masked pattern was here #### |
| PREHOOK: type: ALTERTBLPART_SKEWED_LOCATION |
| PREHOOK: Input: default@fact_daily |
| PREHOOK: Output: default@fact_daily@ds=1 |
| #### A masked pattern was here #### |
| POSTHOOK: query: -- set List Bucketing location map |
| #### A masked pattern was here #### |
| POSTHOOK: type: ALTERTBLPART_SKEWED_LOCATION |
| POSTHOOK: Input: default@fact_daily |
| POSTHOOK: Input: default@fact_daily@ds=1 |
| POSTHOOK: Output: default@fact_daily@ds=1 |
| #### A masked pattern was here #### |
| PREHOOK: query: describe formatted fact_daily PARTITION (ds = '1') |
| PREHOOK: type: DESCTABLE |
| PREHOOK: Input: default@fact_daily |
| POSTHOOK: query: describe formatted fact_daily PARTITION (ds = '1') |
| POSTHOOK: type: DESCTABLE |
| POSTHOOK: Input: default@fact_daily |
| # col_name data_type comment |
| |
| x int |
| y string |
| z string |
| |
| # Partition Information |
| # col_name data_type comment |
| |
| ds string |
| |
| # Detailed Partition Information |
| Partition Value: [1] |
| Database: default |
| Table: fact_daily |
| #### A masked pattern was here #### |
| Partition Parameters: |
| #### A masked pattern was here #### |
| numFiles 3 |
| totalSize 117 |
| #### A masked pattern was here #### |
| |
| # Storage Information |
| SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe |
| InputFormat: org.apache.hadoop.mapred.TextInputFormat |
| OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat |
| Compressed: No |
| Num Buckets: -1 |
| Bucket Columns: [] |
| Sort Columns: [] |
| Skewed Columns: [x] |
| Skewed Values: [[238], [484]] |
| #### A masked pattern was here #### |
| Storage Desc Params: |
| serialization.format 1 |
| PREHOOK: query: SELECT * FROM fact_daily WHERE ds='1' |
| PREHOOK: type: QUERY |
| PREHOOK: Input: default@fact_daily |
| PREHOOK: Input: default@fact_daily@ds=1 |
| #### A masked pattern was here #### |
| POSTHOOK: query: SELECT * FROM fact_daily WHERE ds='1' |
| POSTHOOK: type: QUERY |
| POSTHOOK: Input: default@fact_daily |
| POSTHOOK: Input: default@fact_daily@ds=1 |
| #### A masked pattern was here #### |
| 238 val_238 val_238 1 |
| 238 val_238 val_238 1 |
| 278 val_278 val_278 1 |
| 278 val_278 val_278 1 |
| 484 val_484 val_484 1 |
| 86 val_86 val_86 1 |
| PREHOOK: query: -- pruner pick up right directory |
| -- explain plan shows which directory selected: Truncated Path -> Alias |
| explain extended SELECT x FROM fact_daily WHERE ds='1' and not (x = 86) |
| PREHOOK: type: QUERY |
| POSTHOOK: query: -- pruner pick up right directory |
| -- explain plan shows which directory selected: Truncated Path -> Alias |
| explain extended SELECT x FROM fact_daily WHERE ds='1' and not (x = 86) |
| POSTHOOK: type: QUERY |
| STAGE DEPENDENCIES: |
| Stage-0 is a root stage |
| |
| STAGE PLANS: |
| Stage: Stage-0 |
| Fetch Operator |
| limit: -1 |
| Partition Description: |
| Partition |
| input format: org.apache.hadoop.mapred.TextInputFormat |
| output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat |
| partition values: |
| ds 1 |
| properties: |
| bucket_count -1 |
| columns x,y,z |
| columns.comments |
| columns.types int:string:string |
| #### A masked pattern was here #### |
| name default.fact_daily |
| numFiles 3 |
| partition_columns ds |
| partition_columns.types string |
| serialization.ddl struct fact_daily { i32 x, string y, string z} |
| serialization.format 1 |
| serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe |
| totalSize 117 |
| #### A masked pattern was here #### |
| serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe |
| |
| input format: org.apache.hadoop.mapred.TextInputFormat |
| output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat |
| properties: |
| EXTERNAL TRUE |
| bucket_count -1 |
| columns x,y,z |
| columns.comments |
| columns.types int:string:string |
| #### A masked pattern was here #### |
| name default.fact_daily |
| partition_columns ds |
| partition_columns.types string |
| serialization.ddl struct fact_daily { i32 x, string y, string z} |
| serialization.format 1 |
| serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe |
| #### A masked pattern was here #### |
| serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe |
| name: default.fact_daily |
| name: default.fact_daily |
| Processor Tree: |
| TableScan |
| alias: fact_daily |
| Statistics: Num rows: 29 Data size: 117 Basic stats: COMPLETE Column stats: NONE |
| GatherStats: false |
| Filter Operator |
| isSamplingPred: false |
| predicate: (x <> 86) (type: boolean) |
| Statistics: Num rows: 29 Data size: 117 Basic stats: COMPLETE Column stats: NONE |
| Select Operator |
| expressions: x (type: int) |
| outputColumnNames: _col0 |
| Statistics: Num rows: 29 Data size: 117 Basic stats: COMPLETE Column stats: NONE |
| ListSink |
| |
| PREHOOK: query: -- List Bucketing Query |
| SELECT x FROM fact_daily WHERE ds='1' and not (x = 86) |
| PREHOOK: type: QUERY |
| PREHOOK: Input: default@fact_daily |
| PREHOOK: Input: default@fact_daily@ds=1 |
| #### A masked pattern was here #### |
| POSTHOOK: query: -- List Bucketing Query |
| SELECT x FROM fact_daily WHERE ds='1' and not (x = 86) |
| POSTHOOK: type: QUERY |
| POSTHOOK: Input: default@fact_daily |
| POSTHOOK: Input: default@fact_daily@ds=1 |
| #### A masked pattern was here #### |
| 238 |
| 238 |
| 278 |
| 278 |
| 484 |