blob: 07b07fb09498b0a363c3d725aa5269480100b602 [file] [log] [blame]
PREHOOK: query: create table list_bucketing_mul_col_n0 (col1 String, col2 String, col3 String, col4 String, col5 string)
partitioned by (ds String, hr String)
skewed by (col2, col4) on (('466','val_466'),('287','val_287'),('82','val_82'))
stored as DIRECTORIES
STORED AS RCFILE
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@list_bucketing_mul_col_n0
POSTHOOK: query: create table list_bucketing_mul_col_n0 (col1 String, col2 String, col3 String, col4 String, col5 string)
partitioned by (ds String, hr String)
skewed by (col2, col4) on (('466','val_466'),('287','val_287'),('82','val_82'))
stored as DIRECTORIES
STORED AS RCFILE
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@list_bucketing_mul_col_n0
PREHOOK: query: explain extended
insert overwrite table list_bucketing_mul_col_n0 partition (ds = '2008-04-08', hr = '11')
select 1, key, 1, value, 1 from src
PREHOOK: type: QUERY
POSTHOOK: query: explain extended
insert overwrite table list_bucketing_mul_col_n0 partition (ds = '2008-04-08', hr = '11')
select 1, key, 1, value, 1 from src
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
Stage-2 depends on stages: Stage-0
STAGE PLANS:
Stage: Stage-1
Map Reduce
Map Operator Tree:
TableScan
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Select Operator
expressions: '1' (type: string), key (type: string), '1' (type: string), value (type: string), '1' (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
GlobalTableId: 1
#### A masked pattern was here ####
NumFilesPerFileSink: 1
Static Partition Specification: ds=2008-04-08/hr=11/
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
properties:
bucket_count -1
bucketing_version 2
column.name.delimiter ,
columns col1,col2,col3,col4,col5
columns.comments
columns.types string:string:string:string:string
#### A masked pattern was here ####
name default.list_bucketing_mul_col_n0
partition_columns ds/hr
partition_columns.types string:string
serialization.ddl struct list_bucketing_mul_col_n0 { string col1, string col2, string col3, string col4, string col5}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
name: default.list_bucketing_mul_col_n0
TotalFiles: 1
GatherStats: true
MultiFileSpray: false
Select Operator
expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), '2008-04-08' (type: string), '11' (type: string)
outputColumnNames: col1, col2, col3, col4, col5, ds, hr
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll'), compute_stats(col4, 'hll'), compute_stats(col5, 'hll')
keys: ds (type: string), hr (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
null sort order: aa
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
tag: -1
value expressions: _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col4 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col5 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col6 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
base file name: src
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
bucket_count -1
bucketing_version 2
column.name.delimiter ,
columns key,value
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
name default.src
numFiles 1
numRows 500
rawDataSize 5312
serialization.ddl struct src { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
bucket_count -1
bucketing_version 2
column.name.delimiter ,
columns key,value
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
name default.src
numFiles 1
numRows 500
rawDataSize 5312
serialization.ddl struct src { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.src
name: default.src
Truncated Path -> Alias:
/src [src]
Needs Tagging: false
Reduce Operator Tree:
Group By Operator
aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4)
keys: KEY._col0 (type: string), KEY._col1 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col4 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col5 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col6 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string), _col1 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
GlobalTableId: 0
#### A masked pattern was here ####
NumFilesPerFileSink: 1
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
properties:
columns _col0,_col1,_col2,_col3,_col4,_col5,_col6
columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:string:string
escape.delim \
hive.serialization.extend.additional.nesting.levels true
serialization.escape.crlf true
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
Stage: Stage-0
Move Operator
tables:
partition:
ds 2008-04-08
hr 11
replace: true
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
properties:
bucket_count -1
bucketing_version 2
column.name.delimiter ,
columns col1,col2,col3,col4,col5
columns.comments
columns.types string:string:string:string:string
#### A masked pattern was here ####
name default.list_bucketing_mul_col_n0
partition_columns ds/hr
partition_columns.types string:string
serialization.ddl struct list_bucketing_mul_col_n0 { string col1, string col2, string col3, string col4, string col5}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
name: default.list_bucketing_mul_col_n0
Stage: Stage-2
Stats Work
Basic Stats Work:
#### A masked pattern was here ####
Column Stats Desc:
Columns: col1, col2, col3, col4, col5
Column Types: string, string, string, string, string
Table: default.list_bucketing_mul_col_n0
Is Table Level Stats: false
PREHOOK: query: insert overwrite table list_bucketing_mul_col_n0 partition (ds = '2008-04-08', hr = '11')
select 1, key, 1, value, 1 from src
PREHOOK: type: QUERY
PREHOOK: Input: default@src
PREHOOK: Output: default@list_bucketing_mul_col_n0@ds=2008-04-08/hr=11
POSTHOOK: query: insert overwrite table list_bucketing_mul_col_n0 partition (ds = '2008-04-08', hr = '11')
select 1, key, 1, value, 1 from src
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
POSTHOOK: Output: default@list_bucketing_mul_col_n0@ds=2008-04-08/hr=11
POSTHOOK: Lineage: list_bucketing_mul_col_n0 PARTITION(ds=2008-04-08,hr=11).col1 EXPRESSION []
POSTHOOK: Lineage: list_bucketing_mul_col_n0 PARTITION(ds=2008-04-08,hr=11).col2 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: list_bucketing_mul_col_n0 PARTITION(ds=2008-04-08,hr=11).col3 EXPRESSION []
POSTHOOK: Lineage: list_bucketing_mul_col_n0 PARTITION(ds=2008-04-08,hr=11).col4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: list_bucketing_mul_col_n0 PARTITION(ds=2008-04-08,hr=11).col5 EXPRESSION []
PREHOOK: query: show partitions list_bucketing_mul_col_n0
PREHOOK: type: SHOWPARTITIONS
PREHOOK: Input: default@list_bucketing_mul_col_n0
POSTHOOK: query: show partitions list_bucketing_mul_col_n0
POSTHOOK: type: SHOWPARTITIONS
POSTHOOK: Input: default@list_bucketing_mul_col_n0
ds=2008-04-08/hr=11
PREHOOK: query: desc formatted list_bucketing_mul_col_n0 partition (ds='2008-04-08', hr='11')
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@list_bucketing_mul_col_n0
POSTHOOK: query: desc formatted list_bucketing_mul_col_n0 partition (ds='2008-04-08', hr='11')
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@list_bucketing_mul_col_n0
# col_name data_type comment
col1 string
col2 string
col3 string
col4 string
col5 string
# Partition Information
# col_name data_type comment
ds string
hr string
# Detailed Partition Information
Partition Value: [2008-04-08, 11]
Database: default
Table: list_bucketing_mul_col_n0
#### A masked pattern was here ####
Partition Parameters:
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\",\"col4\":\"true\",\"col5\":\"true\"}}
numFiles 4
numRows 500
rawDataSize 6312
totalSize 7094
#### A masked pattern was here ####
# Storage Information
SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
Compressed: No
Num Buckets: -1
Bucket Columns: []
Sort Columns: []
Stored As SubDirectories: Yes
Skewed Columns: [col2, col4]
Skewed Values: [[287, val_287], [466, val_466], [82, val_82]]
#### A masked pattern was here ####
Skewed Value to Truncated Path: {[287, val_287]=/list_bucketing_mul_col_n0/ds=2008-04-08/hr=11/col2=287/col4=val_287, [466, val_466]=/list_bucketing_mul_col_n0/ds=2008-04-08/hr=11/col2=466/col4=val_466, [82, val_82]=/list_bucketing_mul_col_n0/ds=2008-04-08/hr=11/col2=82/col4=val_82}
Storage Desc Params:
serialization.format 1
PREHOOK: query: explain extended
select * from list_bucketing_mul_col_n0
where ds='2008-04-08' and hr='11' and col2 = "466" and col4 = "val_466"
PREHOOK: type: QUERY
POSTHOOK: query: explain extended
select * from list_bucketing_mul_col_n0
where ds='2008-04-08' and hr='11' and col2 = "466" and col4 = "val_466"
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-0 is a root stage
STAGE PLANS:
Stage: Stage-0
Fetch Operator
limit: -1
Partition Description:
Partition
input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
partition values:
ds 2008-04-08
hr 11
properties:
COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"col1":"true","col2":"true","col3":"true","col4":"true","col5":"true"}}
bucket_count -1
column.name.delimiter ,
columns col1,col2,col3,col4,col5
columns.comments
columns.types string:string:string:string:string
#### A masked pattern was here ####
name default.list_bucketing_mul_col_n0
numFiles 4
numRows 500
partition_columns ds/hr
partition_columns.types string:string
rawDataSize 6312
serialization.ddl struct list_bucketing_mul_col_n0 { string col1, string col2, string col3, string col4, string col5}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
totalSize 7094
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
properties:
bucket_count -1
bucketing_version 2
column.name.delimiter ,
columns col1,col2,col3,col4,col5
columns.comments
columns.types string:string:string:string:string
#### A masked pattern was here ####
name default.list_bucketing_mul_col_n0
partition_columns ds/hr
partition_columns.types string:string
serialization.ddl struct list_bucketing_mul_col_n0 { string col1, string col2, string col3, string col4, string col5}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
name: default.list_bucketing_mul_col_n0
name: default.list_bucketing_mul_col_n0
Processor Tree:
TableScan
alias: list_bucketing_mul_col_n0
Statistics: Num rows: 500 Data size: 6312 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: ((col2 = '466') and (col4 = 'val_466')) (type: boolean)
Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: col1 (type: string), '466' (type: string), col3 (type: string), 'val_466' (type: string), col5 (type: string), '2008-04-08' (type: string), '11' (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE
ListSink
PREHOOK: query: select * from list_bucketing_mul_col_n0
where ds='2008-04-08' and hr='11' and col2 = "466" and col4 = "val_466"
PREHOOK: type: QUERY
PREHOOK: Input: default@list_bucketing_mul_col_n0
PREHOOK: Input: default@list_bucketing_mul_col_n0@ds=2008-04-08/hr=11
#### A masked pattern was here ####
POSTHOOK: query: select * from list_bucketing_mul_col_n0
where ds='2008-04-08' and hr='11' and col2 = "466" and col4 = "val_466"
POSTHOOK: type: QUERY
POSTHOOK: Input: default@list_bucketing_mul_col_n0
POSTHOOK: Input: default@list_bucketing_mul_col_n0@ds=2008-04-08/hr=11
#### A masked pattern was here ####
1 466 1 val_466 1 2008-04-08 11
1 466 1 val_466 1 2008-04-08 11
1 466 1 val_466 1 2008-04-08 11
PREHOOK: query: explain extended
select * from list_bucketing_mul_col_n0
where ds='2008-04-08' and hr='11' and col2 = "382" and col4 = "val_382"
PREHOOK: type: QUERY
POSTHOOK: query: explain extended
select * from list_bucketing_mul_col_n0
where ds='2008-04-08' and hr='11' and col2 = "382" and col4 = "val_382"
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-0 is a root stage
STAGE PLANS:
Stage: Stage-0
Fetch Operator
limit: -1
Partition Description:
Partition
input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
partition values:
ds 2008-04-08
hr 11
properties:
COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"col1":"true","col2":"true","col3":"true","col4":"true","col5":"true"}}
bucket_count -1
column.name.delimiter ,
columns col1,col2,col3,col4,col5
columns.comments
columns.types string:string:string:string:string
#### A masked pattern was here ####
name default.list_bucketing_mul_col_n0
numFiles 4
numRows 500
partition_columns ds/hr
partition_columns.types string:string
rawDataSize 6312
serialization.ddl struct list_bucketing_mul_col_n0 { string col1, string col2, string col3, string col4, string col5}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
totalSize 7094
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
properties:
bucket_count -1
bucketing_version 2
column.name.delimiter ,
columns col1,col2,col3,col4,col5
columns.comments
columns.types string:string:string:string:string
#### A masked pattern was here ####
name default.list_bucketing_mul_col_n0
partition_columns ds/hr
partition_columns.types string:string
serialization.ddl struct list_bucketing_mul_col_n0 { string col1, string col2, string col3, string col4, string col5}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
name: default.list_bucketing_mul_col_n0
name: default.list_bucketing_mul_col_n0
Processor Tree:
TableScan
alias: list_bucketing_mul_col_n0
Statistics: Num rows: 500 Data size: 6312 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: ((col2 = '382') and (col4 = 'val_382')) (type: boolean)
Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: col1 (type: string), '382' (type: string), col3 (type: string), 'val_382' (type: string), col5 (type: string), '2008-04-08' (type: string), '11' (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Statistics: Num rows: 125 Data size: 1578 Basic stats: COMPLETE Column stats: NONE
ListSink
PREHOOK: query: select * from list_bucketing_mul_col_n0
where ds='2008-04-08' and hr='11' and col2 = "382" and col4 = "val_382"
PREHOOK: type: QUERY
PREHOOK: Input: default@list_bucketing_mul_col_n0
PREHOOK: Input: default@list_bucketing_mul_col_n0@ds=2008-04-08/hr=11
#### A masked pattern was here ####
POSTHOOK: query: select * from list_bucketing_mul_col_n0
where ds='2008-04-08' and hr='11' and col2 = "382" and col4 = "val_382"
POSTHOOK: type: QUERY
POSTHOOK: Input: default@list_bucketing_mul_col_n0
POSTHOOK: Input: default@list_bucketing_mul_col_n0@ds=2008-04-08/hr=11
#### A masked pattern was here ####
1 382 1 val_382 1 2008-04-08 11
1 382 1 val_382 1 2008-04-08 11
PREHOOK: query: drop table list_bucketing_mul_col_n0
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@list_bucketing_mul_col_n0
PREHOOK: Output: default@list_bucketing_mul_col_n0
POSTHOOK: query: drop table list_bucketing_mul_col_n0
POSTHOOK: type: DROPTABLE
POSTHOOK: Input: default@list_bucketing_mul_col_n0
POSTHOOK: Output: default@list_bucketing_mul_col_n0