blob: 702bc934ad4971e5cf43d37eb8b930de20aa3cf4 [file] [log] [blame]
PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
-- SORT_QUERY_RESULTS
-- List bucketing query logic test case.
-- Test condition:
-- 1. where clause has single skewed columns and non-skewed columns
-- 3. where clause has a few operators
-- Test focus:
-- 1. basic list bucketing query works for not (equal) case
-- Test result:
-- 1. pruner only pick up right directory
-- 2. query result is right
-- create 2 tables: fact_daily and fact_tz
-- fact_daily will be used for list bucketing query
-- fact_tz is a table used to prepare data and test directories
CREATE TABLE fact_daily(x int, y STRING, z STRING) PARTITIONED BY (ds STRING)
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@fact_daily
POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
-- SORT_QUERY_RESULTS
-- List bucketing query logic test case.
-- Test condition:
-- 1. where clause has single skewed columns and non-skewed columns
-- 3. where clause has a few operators
-- Test focus:
-- 1. basic list bucketing query works for not (equal) case
-- Test result:
-- 1. pruner only pick up right directory
-- 2. query result is right
-- create 2 tables: fact_daily and fact_tz
-- fact_daily will be used for list bucketing query
-- fact_tz is a table used to prepare data and test directories
CREATE TABLE fact_daily(x int, y STRING, z STRING) PARTITIONED BY (ds STRING)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@fact_daily
PREHOOK: query: CREATE TABLE fact_tz(x int, y STRING, z STRING) PARTITIONED BY (ds STRING, hr STRING)
#### A masked pattern was here ####
PREHOOK: type: CREATETABLE
#### A masked pattern was here ####
PREHOOK: Output: database:default
PREHOOK: Output: default@fact_tz
POSTHOOK: query: CREATE TABLE fact_tz(x int, y STRING, z STRING) PARTITIONED BY (ds STRING, hr STRING)
#### A masked pattern was here ####
POSTHOOK: type: CREATETABLE
#### A masked pattern was here ####
POSTHOOK: Output: database:default
POSTHOOK: Output: default@fact_tz
PREHOOK: query: -- create /fact_tz/ds=1/hr=1 directory
INSERT OVERWRITE TABLE fact_tz PARTITION (ds='1', hr='1')
SELECT key, value, value FROM src WHERE key=484
PREHOOK: type: QUERY
PREHOOK: Input: default@src
PREHOOK: Output: default@fact_tz@ds=1/hr=1
POSTHOOK: query: -- create /fact_tz/ds=1/hr=1 directory
INSERT OVERWRITE TABLE fact_tz PARTITION (ds='1', hr='1')
SELECT key, value, value FROM src WHERE key=484
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
POSTHOOK: Output: default@fact_tz@ds=1/hr=1
POSTHOOK: Lineage: fact_tz PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: fact_tz PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: fact_tz PARTITION(ds=1,hr=1).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: -- create /fact_tz/ds=1/hr=2 directory
INSERT OVERWRITE TABLE fact_tz PARTITION (ds='1', hr='2')
SELECT key, value, value FROM src WHERE key=278 or key=86
PREHOOK: type: QUERY
PREHOOK: Input: default@src
PREHOOK: Output: default@fact_tz@ds=1/hr=2
POSTHOOK: query: -- create /fact_tz/ds=1/hr=2 directory
INSERT OVERWRITE TABLE fact_tz PARTITION (ds='1', hr='2')
SELECT key, value, value FROM src WHERE key=278 or key=86
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
POSTHOOK: Output: default@fact_tz@ds=1/hr=2
POSTHOOK: Lineage: fact_tz PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: fact_tz PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: fact_tz PARTITION(ds=1,hr=2).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: -- create /fact_tz/ds=1/hr=3 directory
INSERT OVERWRITE TABLE fact_tz PARTITION (ds='1', hr='3')
SELECT key, value, value FROM src WHERE key=238
PREHOOK: type: QUERY
PREHOOK: Input: default@src
PREHOOK: Output: default@fact_tz@ds=1/hr=3
POSTHOOK: query: -- create /fact_tz/ds=1/hr=3 directory
INSERT OVERWRITE TABLE fact_tz PARTITION (ds='1', hr='3')
SELECT key, value, value FROM src WHERE key=238
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
POSTHOOK: Output: default@fact_tz@ds=1/hr=3
POSTHOOK: Lineage: fact_tz PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: fact_tz PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: fact_tz PARTITION(ds=1,hr=3).z SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
#### A masked pattern was here ####
PREHOOK: query: -- switch fact_daily to skewed table and point its location to /fact_tz/ds=1
alter table fact_daily skewed by (x) on (484,238)
PREHOOK: type: ALTERTABLE_SKEWED
PREHOOK: Input: default@fact_daily
PREHOOK: Output: default@fact_daily
POSTHOOK: query: -- switch fact_daily to skewed table and point its location to /fact_tz/ds=1
alter table fact_daily skewed by (x) on (484,238)
POSTHOOK: type: ALTERTABLE_SKEWED
POSTHOOK: Input: default@fact_daily
POSTHOOK: Output: default@fact_daily
PREHOOK: query: ALTER TABLE fact_daily SET TBLPROPERTIES('EXTERNAL'='TRUE')
PREHOOK: type: ALTERTABLE_PROPERTIES
PREHOOK: Input: default@fact_daily
PREHOOK: Output: default@fact_daily
POSTHOOK: query: ALTER TABLE fact_daily SET TBLPROPERTIES('EXTERNAL'='TRUE')
POSTHOOK: type: ALTERTABLE_PROPERTIES
POSTHOOK: Input: default@fact_daily
POSTHOOK: Output: default@fact_daily
PREHOOK: query: ALTER TABLE fact_daily ADD PARTITION (ds='1')
#### A masked pattern was here ####
PREHOOK: type: ALTERTABLE_ADDPARTS
#### A masked pattern was here ####
PREHOOK: Output: default@fact_daily
POSTHOOK: query: ALTER TABLE fact_daily ADD PARTITION (ds='1')
#### A masked pattern was here ####
POSTHOOK: type: ALTERTABLE_ADDPARTS
#### A masked pattern was here ####
POSTHOOK: Output: default@fact_daily
POSTHOOK: Output: default@fact_daily@ds=1
PREHOOK: query: -- set List Bucketing location map
#### A masked pattern was here ####
PREHOOK: type: ALTERTBLPART_SKEWED_LOCATION
PREHOOK: Input: default@fact_daily
PREHOOK: Output: default@fact_daily@ds=1
#### A masked pattern was here ####
POSTHOOK: query: -- set List Bucketing location map
#### A masked pattern was here ####
POSTHOOK: type: ALTERTBLPART_SKEWED_LOCATION
POSTHOOK: Input: default@fact_daily
POSTHOOK: Input: default@fact_daily@ds=1
POSTHOOK: Output: default@fact_daily@ds=1
#### A masked pattern was here ####
PREHOOK: query: describe formatted fact_daily PARTITION (ds = '1')
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@fact_daily
POSTHOOK: query: describe formatted fact_daily PARTITION (ds = '1')
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@fact_daily
# col_name data_type comment
x int
y string
z string
# Partition Information
# col_name data_type comment
ds string
# Detailed Partition Information
Partition Value: [1]
Database: default
Table: fact_daily
#### A masked pattern was here ####
Partition Parameters:
#### A masked pattern was here ####
numFiles 3
totalSize 117
#### A masked pattern was here ####
# Storage Information
SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Compressed: No
Num Buckets: -1
Bucket Columns: []
Sort Columns: []
Skewed Columns: [x]
Skewed Values: [[238], [484]]
#### A masked pattern was here ####
Storage Desc Params:
serialization.format 1
PREHOOK: query: SELECT * FROM fact_daily WHERE ds='1'
PREHOOK: type: QUERY
PREHOOK: Input: default@fact_daily
PREHOOK: Input: default@fact_daily@ds=1
#### A masked pattern was here ####
POSTHOOK: query: SELECT * FROM fact_daily WHERE ds='1'
POSTHOOK: type: QUERY
POSTHOOK: Input: default@fact_daily
POSTHOOK: Input: default@fact_daily@ds=1
#### A masked pattern was here ####
238 val_238 val_238 1
238 val_238 val_238 1
278 val_278 val_278 1
278 val_278 val_278 1
484 val_484 val_484 1
86 val_86 val_86 1
PREHOOK: query: -- pruner pick up right directory
-- explain plan shows which directory selected: Truncated Path -> Alias
explain extended SELECT x FROM fact_daily WHERE ds='1' and not (x = 86)
PREHOOK: type: QUERY
POSTHOOK: query: -- pruner pick up right directory
-- explain plan shows which directory selected: Truncated Path -> Alias
explain extended SELECT x FROM fact_daily WHERE ds='1' and not (x = 86)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-0 is a root stage
STAGE PLANS:
Stage: Stage-0
Fetch Operator
limit: -1
Partition Description:
Partition
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 1
properties:
bucket_count -1
columns x,y,z
columns.comments
columns.types int:string:string
#### A masked pattern was here ####
name default.fact_daily
numFiles 3
partition_columns ds
partition_columns.types string
serialization.ddl struct fact_daily { i32 x, string y, string z}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 117
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
EXTERNAL TRUE
bucket_count -1
columns x,y,z
columns.comments
columns.types int:string:string
#### A masked pattern was here ####
name default.fact_daily
partition_columns ds
partition_columns.types string
serialization.ddl struct fact_daily { i32 x, string y, string z}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.fact_daily
name: default.fact_daily
Processor Tree:
TableScan
alias: fact_daily
Statistics: Num rows: 29 Data size: 117 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: (x <> 86) (type: boolean)
Statistics: Num rows: 29 Data size: 117 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: x (type: int)
outputColumnNames: _col0
Statistics: Num rows: 29 Data size: 117 Basic stats: COMPLETE Column stats: NONE
ListSink
PREHOOK: query: -- List Bucketing Query
SELECT x FROM fact_daily WHERE ds='1' and not (x = 86)
PREHOOK: type: QUERY
PREHOOK: Input: default@fact_daily
PREHOOK: Input: default@fact_daily@ds=1
#### A masked pattern was here ####
POSTHOOK: query: -- List Bucketing Query
SELECT x FROM fact_daily WHERE ds='1' and not (x = 86)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@fact_daily
POSTHOOK: Input: default@fact_daily@ds=1
#### A masked pattern was here ####
238
238
278
278
484