blob: ca07f615d1078a241fbdeb6b0334c420f43f7ac1 [file] [log] [blame]
PREHOOK: query: DROP TABLE Employee_Part
PREHOOK: type: DROPTABLE
POSTHOOK: query: DROP TABLE Employee_Part
POSTHOOK: type: DROPTABLE
PREHOOK: query: CREATE TABLE Employee_Part(employeeID int, employeeName String) partitioned by (employeeSalary double)
row format delimited fields terminated by '|' stored as textfile
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@Employee_Part
POSTHOOK: query: CREATE TABLE Employee_Part(employeeID int, employeeName String) partitioned by (employeeSalary double)
row format delimited fields terminated by '|' stored as textfile
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@Employee_Part
PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee.dat" INTO TABLE Employee_Part partition(employeeSalary=2000.0)
PREHOOK: type: LOAD
#### A masked pattern was here ####
PREHOOK: Output: default@employee_part
POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee.dat" INTO TABLE Employee_Part partition(employeeSalary=2000.0)
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@employee_part
POSTHOOK: Output: default@employee_part@employeesalary=2000.0
PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee.dat" INTO TABLE Employee_Part partition(employeeSalary=4000.0)
PREHOOK: type: LOAD
#### A masked pattern was here ####
PREHOOK: Output: default@employee_part
POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee.dat" INTO TABLE Employee_Part partition(employeeSalary=4000.0)
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@employee_part
POSTHOOK: Output: default@employee_part@employeesalary=4000.0
PREHOOK: query: explain
analyze table Employee_Part partition (employeeSalary=2000.0) compute statistics for columns employeeID
PREHOOK: type: ANALYZE_TABLE
POSTHOOK: query: explain
analyze table Employee_Part partition (employeeSalary=2000.0) compute statistics for columns employeeID
POSTHOOK: type: ANALYZE_TABLE
STAGE DEPENDENCIES:
Stage-0 is a root stage
Stage-1 depends on stages: Stage-0
STAGE PLANS:
Stage: Stage-0
Map Reduce
Map Operator Tree:
TableScan
alias: employee_part
Statistics: Num rows: 3 Data size: 1050 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: employeeid (type: int)
outputColumnNames: employeeid
Statistics: Num rows: 3 Data size: 1050 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: compute_stats(employeeid, 'hll')
keys: 2000.0D (type: double)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 3 Data size: 1050 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: 2000.0D (type: double)
sort order: +
Map-reduce partition columns: 2000.0D (type: double)
Statistics: Num rows: 3 Data size: 1050 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
Reduce Operator Tree:
Group By Operator
aggregations: compute_stats(VALUE._col0)
keys: 2000.0D (type: double)
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 350 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), 2000.0D (type: double)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 350 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
Statistics: Num rows: 1 Data size: 350 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-1
Stats Work
Basic Stats Work:
Column Stats Desc:
Columns: employeeID
Column Types: int
Table: default.employee_part
PREHOOK: query: explain extended
analyze table Employee_Part partition (employeeSalary=2000.0) compute statistics for columns employeeID
PREHOOK: type: ANALYZE_TABLE
POSTHOOK: query: explain extended
analyze table Employee_Part partition (employeeSalary=2000.0) compute statistics for columns employeeID
POSTHOOK: type: ANALYZE_TABLE
STAGE DEPENDENCIES:
Stage-0 is a root stage
Stage-1 depends on stages: Stage-0
STAGE PLANS:
Stage: Stage-0
Map Reduce
Map Operator Tree:
TableScan
alias: employee_part
Statistics: Num rows: 3 Data size: 1050 Basic stats: COMPLETE Column stats: NONE
Statistics Aggregation Key Prefix: default.employee_part/
GatherStats: true
Select Operator
expressions: employeeid (type: int)
outputColumnNames: employeeid
Statistics: Num rows: 3 Data size: 1050 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: compute_stats(employeeid, 'hll')
keys: 2000.0D (type: double)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 3 Data size: 1050 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: 2000.0D (type: double)
null sort order: a
sort order: +
Map-reduce partition columns: 2000.0D (type: double)
Statistics: Num rows: 3 Data size: 1050 Basic stats: COMPLETE Column stats: NONE
tag: -1
value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
base file name: employeesalary=2000.0
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
employeesalary 2000.0
properties:
bucket_count -1
column.name.delimiter ,
columns employeeid,employeename
columns.comments
columns.types int:string
field.delim |
#### A masked pattern was here ####
name default.employee_part
numFiles 1
numRows 0
partition_columns employeesalary
partition_columns.types double
rawDataSize 0
serialization.ddl struct employee_part { i32 employeeid, string employeename}
serialization.format |
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 105
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
bucket_count -1
bucketing_version 2
column.name.delimiter ,
columns employeeid,employeename
columns.comments
columns.types int:string
field.delim |
#### A masked pattern was here ####
name default.employee_part
partition_columns employeesalary
partition_columns.types double
serialization.ddl struct employee_part { i32 employeeid, string employeename}
serialization.format |
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.employee_part
name: default.employee_part
Truncated Path -> Alias:
/employee_part/employeesalary=2000.0 [employee_part]
Needs Tagging: false
Reduce Operator Tree:
Group By Operator
aggregations: compute_stats(VALUE._col0)
keys: 2000.0D (type: double)
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 350 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), 2000.0D (type: double)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 350 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
GlobalTableId: 0
#### A masked pattern was here ####
NumFilesPerFileSink: 1
Statistics: Num rows: 1 Data size: 350 Basic stats: COMPLETE Column stats: NONE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
properties:
columns _col0,_col1
columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:double
escape.delim \
hive.serialization.extend.additional.nesting.levels true
serialization.escape.crlf true
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
Stage: Stage-1
Stats Work
Basic Stats Work:
Stats Aggregation Key Prefix: default.employee_part/
Column Stats Desc:
Columns: employeeID
Column Types: int
Table: default.employee_part
Is Table Level Stats: false
PREHOOK: query: analyze table Employee_Part partition (employeeSalary=2000.0) compute statistics for columns employeeID
PREHOOK: type: ANALYZE_TABLE
PREHOOK: Input: default@employee_part
PREHOOK: Input: default@employee_part@employeesalary=2000.0
PREHOOK: Output: default@employee_part
PREHOOK: Output: default@employee_part@employeesalary=2000.0
#### A masked pattern was here ####
POSTHOOK: query: analyze table Employee_Part partition (employeeSalary=2000.0) compute statistics for columns employeeID
POSTHOOK: type: ANALYZE_TABLE
POSTHOOK: Input: default@employee_part
POSTHOOK: Input: default@employee_part@employeesalary=2000.0
POSTHOOK: Output: default@employee_part
POSTHOOK: Output: default@employee_part@employeesalary=2000.0
#### A masked pattern was here ####
PREHOOK: query: describe formatted Employee_Part partition(employeeSalary=2000.0)
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@employee_part
POSTHOOK: query: describe formatted Employee_Part partition(employeeSalary=2000.0)
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@employee_part
# col_name data_type comment
employeeid int
employeename string
# Partition Information
# col_name data_type comment
employeesalary double
# Detailed Partition Information
Partition Value: [2000.0]
Database: default
Table: employee_part
#### A masked pattern was here ####
Partition Parameters:
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"employeeid\":\"true\"}}
numFiles 1
numRows 13
rawDataSize 92
totalSize 105
#### A masked pattern was here ####
# Storage Information
SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Compressed: No
Num Buckets: -1
Bucket Columns: []
Sort Columns: []
Storage Desc Params:
field.delim |
serialization.format |
PREHOOK: query: explain
analyze table Employee_Part partition (employeeSalary=4000.0) compute statistics for columns employeeID
PREHOOK: type: ANALYZE_TABLE
POSTHOOK: query: explain
analyze table Employee_Part partition (employeeSalary=4000.0) compute statistics for columns employeeID
POSTHOOK: type: ANALYZE_TABLE
STAGE DEPENDENCIES:
Stage-0 is a root stage
Stage-1 depends on stages: Stage-0
STAGE PLANS:
Stage: Stage-0
Map Reduce
Map Operator Tree:
TableScan
alias: employee_part
Statistics: Num rows: 3 Data size: 1050 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: employeeid (type: int)
outputColumnNames: employeeid
Statistics: Num rows: 3 Data size: 1050 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: compute_stats(employeeid, 'hll')
keys: 4000.0D (type: double)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 3 Data size: 1050 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: 4000.0D (type: double)
sort order: +
Map-reduce partition columns: 4000.0D (type: double)
Statistics: Num rows: 3 Data size: 1050 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
Reduce Operator Tree:
Group By Operator
aggregations: compute_stats(VALUE._col0)
keys: 4000.0D (type: double)
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 350 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), 4000.0D (type: double)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 350 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
Statistics: Num rows: 1 Data size: 350 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-1
Stats Work
Basic Stats Work:
Column Stats Desc:
Columns: employeeID
Column Types: int
Table: default.employee_part
PREHOOK: query: explain extended
analyze table Employee_Part partition (employeeSalary=4000.0) compute statistics for columns employeeID
PREHOOK: type: ANALYZE_TABLE
POSTHOOK: query: explain extended
analyze table Employee_Part partition (employeeSalary=4000.0) compute statistics for columns employeeID
POSTHOOK: type: ANALYZE_TABLE
STAGE DEPENDENCIES:
Stage-0 is a root stage
Stage-1 depends on stages: Stage-0
STAGE PLANS:
Stage: Stage-0
Map Reduce
Map Operator Tree:
TableScan
alias: employee_part
Statistics: Num rows: 3 Data size: 1050 Basic stats: COMPLETE Column stats: NONE
Statistics Aggregation Key Prefix: default.employee_part/
GatherStats: true
Select Operator
expressions: employeeid (type: int)
outputColumnNames: employeeid
Statistics: Num rows: 3 Data size: 1050 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: compute_stats(employeeid, 'hll')
keys: 4000.0D (type: double)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 3 Data size: 1050 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: 4000.0D (type: double)
null sort order: a
sort order: +
Map-reduce partition columns: 4000.0D (type: double)
Statistics: Num rows: 3 Data size: 1050 Basic stats: COMPLETE Column stats: NONE
tag: -1
value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
base file name: employeesalary=4000.0
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
employeesalary 4000.0
properties:
bucket_count -1
column.name.delimiter ,
columns employeeid,employeename
columns.comments
columns.types int:string
field.delim |
#### A masked pattern was here ####
name default.employee_part
numFiles 1
numRows 0
partition_columns employeesalary
partition_columns.types double
rawDataSize 0
serialization.ddl struct employee_part { i32 employeeid, string employeename}
serialization.format |
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 105
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
bucket_count -1
bucketing_version 2
column.name.delimiter ,
columns employeeid,employeename
columns.comments
columns.types int:string
field.delim |
#### A masked pattern was here ####
name default.employee_part
partition_columns employeesalary
partition_columns.types double
serialization.ddl struct employee_part { i32 employeeid, string employeename}
serialization.format |
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.employee_part
name: default.employee_part
Truncated Path -> Alias:
/employee_part/employeesalary=4000.0 [employee_part]
Needs Tagging: false
Reduce Operator Tree:
Group By Operator
aggregations: compute_stats(VALUE._col0)
keys: 4000.0D (type: double)
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 350 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), 4000.0D (type: double)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 350 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
GlobalTableId: 0
#### A masked pattern was here ####
NumFilesPerFileSink: 1
Statistics: Num rows: 1 Data size: 350 Basic stats: COMPLETE Column stats: NONE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
properties:
columns _col0,_col1
columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:double
escape.delim \
hive.serialization.extend.additional.nesting.levels true
serialization.escape.crlf true
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
Stage: Stage-1
Stats Work
Basic Stats Work:
Stats Aggregation Key Prefix: default.employee_part/
Column Stats Desc:
Columns: employeeID
Column Types: int
Table: default.employee_part
Is Table Level Stats: false
PREHOOK: query: analyze table Employee_Part partition (employeeSalary=4000.0) compute statistics for columns employeeID
PREHOOK: type: ANALYZE_TABLE
PREHOOK: Input: default@employee_part
PREHOOK: Input: default@employee_part@employeesalary=4000.0
PREHOOK: Output: default@employee_part
PREHOOK: Output: default@employee_part@employeesalary=4000.0
#### A masked pattern was here ####
POSTHOOK: query: analyze table Employee_Part partition (employeeSalary=4000.0) compute statistics for columns employeeID
POSTHOOK: type: ANALYZE_TABLE
POSTHOOK: Input: default@employee_part
POSTHOOK: Input: default@employee_part@employeesalary=4000.0
POSTHOOK: Output: default@employee_part
POSTHOOK: Output: default@employee_part@employeesalary=4000.0
#### A masked pattern was here ####
PREHOOK: query: explain
analyze table Employee_Part partition (employeeSalary=2000.0) compute statistics for columns
PREHOOK: type: ANALYZE_TABLE
POSTHOOK: query: explain
analyze table Employee_Part partition (employeeSalary=2000.0) compute statistics for columns
POSTHOOK: type: ANALYZE_TABLE
STAGE DEPENDENCIES:
Stage-0 is a root stage
Stage-1 depends on stages: Stage-0
STAGE PLANS:
Stage: Stage-0
Map Reduce
Map Operator Tree:
TableScan
alias: employee_part
Statistics: Num rows: 13 Data size: 92 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: employeeid (type: int), employeename (type: string)
outputColumnNames: employeeid, employeename
Statistics: Num rows: 13 Data size: 92 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: compute_stats(employeeid, 'hll'), compute_stats(employeename, 'hll')
keys: 2000.0D (type: double)
mode: hash
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 13 Data size: 92 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: 2000.0D (type: double)
sort order: +
Map-reduce partition columns: 2000.0D (type: double)
Statistics: Num rows: 13 Data size: 92 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
Reduce Operator Tree:
Group By Operator
aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
keys: 2000.0D (type: double)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 6 Data size: 42 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), 2000.0D (type: double)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 6 Data size: 42 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
Statistics: Num rows: 6 Data size: 42 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-1
Stats Work
Basic Stats Work:
Column Stats Desc:
Columns: employeeid, employeename
Column Types: int, string
Table: default.employee_part
PREHOOK: query: analyze table Employee_Part partition (employeeSalary=2000.0) compute statistics for columns
PREHOOK: type: ANALYZE_TABLE
PREHOOK: Input: default@employee_part
PREHOOK: Input: default@employee_part@employeesalary=2000.0
PREHOOK: Output: default@employee_part
PREHOOK: Output: default@employee_part@employeesalary=2000.0
#### A masked pattern was here ####
POSTHOOK: query: analyze table Employee_Part partition (employeeSalary=2000.0) compute statistics for columns
POSTHOOK: type: ANALYZE_TABLE
POSTHOOK: Input: default@employee_part
POSTHOOK: Input: default@employee_part@employeesalary=2000.0
POSTHOOK: Output: default@employee_part
POSTHOOK: Output: default@employee_part@employeesalary=2000.0
#### A masked pattern was here ####
PREHOOK: query: describe formatted Employee_Part partition (employeeSalary=2000.0) employeeID
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@employee_part
POSTHOOK: query: describe formatted Employee_Part partition (employeeSalary=2000.0) employeeID
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@employee_part
col_name employeeID
data_type int
min 16
max 34
num_nulls 1
distinct_count 12
avg_col_len
max_col_len
num_trues
num_falses
bitVector HL
comment from deserializer
PREHOOK: query: describe formatted Employee_Part partition (employeeSalary=2000.0) employeeName
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@employee_part
POSTHOOK: query: describe formatted Employee_Part partition (employeeSalary=2000.0) employeeName
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@employee_part
col_name employeeName
data_type string
min
max
num_nulls 1
distinct_count 12
avg_col_len 4.3076923076923075
max_col_len 6
num_trues
num_falses
bitVector HL
comment from deserializer
PREHOOK: query: explain
analyze table Employee_Part compute statistics for columns
PREHOOK: type: ANALYZE_TABLE
POSTHOOK: query: explain
analyze table Employee_Part compute statistics for columns
POSTHOOK: type: ANALYZE_TABLE
STAGE DEPENDENCIES:
Stage-0 is a root stage
Stage-1 depends on stages: Stage-0
STAGE PLANS:
Stage: Stage-0
Map Reduce
Map Operator Tree:
TableScan
alias: employee_part
Statistics: Num rows: 26 Data size: 184 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: employeeid (type: int), employeename (type: string), employeesalary (type: double)
outputColumnNames: employeeid, employeename, employeesalary
Statistics: Num rows: 26 Data size: 184 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: compute_stats(employeeid, 'hll'), compute_stats(employeename, 'hll')
keys: employeesalary (type: double)
mode: hash
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 26 Data size: 184 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: double)
sort order: +
Map-reduce partition columns: _col0 (type: double)
Statistics: Num rows: 26 Data size: 184 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
Reduce Operator Tree:
Group By Operator
aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
keys: KEY._col0 (type: double)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 13 Data size: 92 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: double)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 13 Data size: 92 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
Statistics: Num rows: 13 Data size: 92 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-1
Stats Work
Basic Stats Work:
Column Stats Desc:
Columns: employeeid, employeename
Column Types: int, string
Table: default.employee_part
PREHOOK: query: analyze table Employee_Part compute statistics for columns
PREHOOK: type: ANALYZE_TABLE
PREHOOK: Input: default@employee_part
PREHOOK: Input: default@employee_part@employeesalary=2000.0
PREHOOK: Input: default@employee_part@employeesalary=4000.0
PREHOOK: Output: default@employee_part
PREHOOK: Output: default@employee_part@employeesalary=2000.0
PREHOOK: Output: default@employee_part@employeesalary=4000.0
#### A masked pattern was here ####
POSTHOOK: query: analyze table Employee_Part compute statistics for columns
POSTHOOK: type: ANALYZE_TABLE
POSTHOOK: Input: default@employee_part
POSTHOOK: Input: default@employee_part@employeesalary=2000.0
POSTHOOK: Input: default@employee_part@employeesalary=4000.0
POSTHOOK: Output: default@employee_part
POSTHOOK: Output: default@employee_part@employeesalary=2000.0
POSTHOOK: Output: default@employee_part@employeesalary=4000.0
#### A masked pattern was here ####
PREHOOK: query: describe formatted Employee_Part partition(employeeSalary=2000.0) employeeID
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@employee_part
POSTHOOK: query: describe formatted Employee_Part partition(employeeSalary=2000.0) employeeID
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@employee_part
col_name employeeID
data_type int
min 16
max 34
num_nulls 1
distinct_count 12
avg_col_len
max_col_len
num_trues
num_falses
bitVector HL
comment from deserializer
PREHOOK: query: describe formatted Employee_Part partition(employeeSalary=4000.0) employeeID
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@employee_part
POSTHOOK: query: describe formatted Employee_Part partition(employeeSalary=4000.0) employeeID
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@employee_part
col_name employeeID
data_type int
min 16
max 34
num_nulls 1
distinct_count 12
avg_col_len
max_col_len
num_trues
num_falses
bitVector HL
comment from deserializer
PREHOOK: query: explain
analyze table Employee_Part compute statistics for columns
PREHOOK: type: ANALYZE_TABLE
POSTHOOK: query: explain
analyze table Employee_Part compute statistics for columns
POSTHOOK: type: ANALYZE_TABLE
STAGE DEPENDENCIES:
Stage-0 is a root stage
Stage-1 depends on stages: Stage-0
STAGE PLANS:
Stage: Stage-0
Map Reduce
Map Operator Tree:
TableScan
alias: employee_part
Statistics: Num rows: 26 Data size: 184 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: employeeid (type: int), employeename (type: string)
outputColumnNames: employeeid, employeename
Statistics: Num rows: 26 Data size: 184 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: compute_stats(employeeid, 'hll'), compute_stats(employeename, 'hll')
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
Reduce Operator Tree:
Group By Operator
aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-1
Stats Work
Basic Stats Work:
Column Stats Desc:
Columns: employeeid, employeename
Column Types: int, string
Table: default.employee_part
PREHOOK: query: analyze table Employee_Part compute statistics for columns
PREHOOK: type: ANALYZE_TABLE
PREHOOK: Input: default@employee_part
PREHOOK: Input: default@employee_part@employeesalary=2000.0
PREHOOK: Input: default@employee_part@employeesalary=4000.0
PREHOOK: Output: default@employee_part
PREHOOK: Output: default@employee_part@employeesalary=2000.0
PREHOOK: Output: default@employee_part@employeesalary=4000.0
#### A masked pattern was here ####
POSTHOOK: query: analyze table Employee_Part compute statistics for columns
POSTHOOK: type: ANALYZE_TABLE
POSTHOOK: Input: default@employee_part
POSTHOOK: Input: default@employee_part@employeesalary=2000.0
POSTHOOK: Input: default@employee_part@employeesalary=4000.0
POSTHOOK: Output: default@employee_part
POSTHOOK: Output: default@employee_part@employeesalary=2000.0
POSTHOOK: Output: default@employee_part@employeesalary=4000.0
#### A masked pattern was here ####
PREHOOK: query: describe formatted Employee_Part employeeID
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@employee_part
POSTHOOK: query: describe formatted Employee_Part employeeID
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@employee_part
col_name employeeID
data_type int
min 16
max 34
num_nulls 2
distinct_count 12
avg_col_len
max_col_len
num_trues
num_falses
bitVector HL
comment from deserializer
COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"employeeid\":\"true\",\"employeename\":\"true\"}}
PREHOOK: query: create database if not exists dummydb
PREHOOK: type: CREATEDATABASE
PREHOOK: Output: database:dummydb
POSTHOOK: query: create database if not exists dummydb
POSTHOOK: type: CREATEDATABASE
POSTHOOK: Output: database:dummydb
PREHOOK: query: use dummydb
PREHOOK: type: SWITCHDATABASE
PREHOOK: Input: database:dummydb
POSTHOOK: query: use dummydb
POSTHOOK: type: SWITCHDATABASE
POSTHOOK: Input: database:dummydb
PREHOOK: query: analyze table default.Employee_Part partition (employeeSalary=2000.0) compute statistics for columns
PREHOOK: type: ANALYZE_TABLE
PREHOOK: Input: default@employee_part
PREHOOK: Input: default@employee_part@employeesalary=2000.0
PREHOOK: Output: default@employee_part
PREHOOK: Output: default@employee_part@employeesalary=2000.0
#### A masked pattern was here ####
POSTHOOK: query: analyze table default.Employee_Part partition (employeeSalary=2000.0) compute statistics for columns
POSTHOOK: type: ANALYZE_TABLE
POSTHOOK: Input: default@employee_part
POSTHOOK: Input: default@employee_part@employeesalary=2000.0
POSTHOOK: Output: default@employee_part
POSTHOOK: Output: default@employee_part@employeesalary=2000.0
#### A masked pattern was here ####
PREHOOK: query: describe formatted default.Employee_Part partition (employeeSalary=2000.0) employeeID
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@employee_part
POSTHOOK: query: describe formatted default.Employee_Part partition (employeeSalary=2000.0) employeeID
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@employee_part
col_name employeeID
data_type int
min 16
max 34
num_nulls 1
distinct_count 12
avg_col_len
max_col_len
num_trues
num_falses
bitVector HL
comment from deserializer
COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"employeeid\":\"true\",\"employeename\":\"true\"}}
PREHOOK: query: analyze table default.Employee_Part compute statistics for columns
PREHOOK: type: ANALYZE_TABLE
PREHOOK: Input: default@employee_part
PREHOOK: Input: default@employee_part@employeesalary=2000.0
PREHOOK: Input: default@employee_part@employeesalary=4000.0
PREHOOK: Output: default@employee_part
PREHOOK: Output: default@employee_part@employeesalary=2000.0
PREHOOK: Output: default@employee_part@employeesalary=4000.0
#### A masked pattern was here ####
POSTHOOK: query: analyze table default.Employee_Part compute statistics for columns
POSTHOOK: type: ANALYZE_TABLE
POSTHOOK: Input: default@employee_part
POSTHOOK: Input: default@employee_part@employeesalary=2000.0
POSTHOOK: Input: default@employee_part@employeesalary=4000.0
POSTHOOK: Output: default@employee_part
POSTHOOK: Output: default@employee_part@employeesalary=2000.0
POSTHOOK: Output: default@employee_part@employeesalary=4000.0
#### A masked pattern was here ####
PREHOOK: query: use default
PREHOOK: type: SWITCHDATABASE
PREHOOK: Input: database:default
POSTHOOK: query: use default
POSTHOOK: type: SWITCHDATABASE
POSTHOOK: Input: database:default
PREHOOK: query: drop database dummydb
PREHOOK: type: DROPDATABASE
PREHOOK: Input: database:dummydb
PREHOOK: Output: database:dummydb
POSTHOOK: query: drop database dummydb
POSTHOOK: type: DROPDATABASE
POSTHOOK: Input: database:dummydb
POSTHOOK: Output: database:dummydb