blob: aae7754bc88f49047debc9f244766b943a9c0599 [file] [log] [blame]
PREHOOK: query: CREATE TABLE test_table_n0 (key INT, value STRING) PARTITIONED BY (ds STRING, hr STRING)
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@test_table_n0
POSTHOOK: query: CREATE TABLE test_table_n0 (key INT, value STRING) PARTITIONED BY (ds STRING, hr STRING)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@test_table_n0
PREHOOK: query: EXPLAIN EXTENDED
INSERT OVERWRITE TABLE test_table_n0 PARTITION (ds = '2008-04-08', hr)
SELECT key2, value, cast(hr as int) FROM
(SELECT if ((key % 3) < 2, 0, 1) as key2, value, (key % 2) as hr
FROM srcpart
WHERE ds = '2008-04-08') a
DISTRIBUTE BY key2
PREHOOK: type: QUERY
PREHOOK: Input: default@srcpart
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
PREHOOK: Output: default@test_table_n0@ds=2008-04-08
POSTHOOK: query: EXPLAIN EXTENDED
INSERT OVERWRITE TABLE test_table_n0 PARTITION (ds = '2008-04-08', hr)
SELECT key2, value, cast(hr as int) FROM
(SELECT if ((key % 3) < 2, 0, 1) as key2, value, (key % 2) as hr
FROM srcpart
WHERE ds = '2008-04-08') a
DISTRIBUTE BY key2
POSTHOOK: type: QUERY
POSTHOOK: Input: default@srcpart
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-2 depends on stages: Stage-1
Stage-0 depends on stages: Stage-2
Stage-3 depends on stages: Stage-0
STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
Reducer 4 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: srcpart
filterExpr: (ds = '2008-04-08') (type: boolean)
Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE
GatherStats: false
Select Operator
expressions: if(((key % 3) < 2), 0, 1) (type: int), value (type: string), UDFToInteger((key % 2)) (type: int)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1000 Data size: 99000 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
bucketingVersion: 2
null sort order:
numBuckets: -1
sort order:
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 1000 Data size: 99000 Basic stats: COMPLETE Column stats: COMPLETE
tag: -1
value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int)
auto parallelism: false
Execution mode: vectorized, llap
LLAP IO: all inputs
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
base file name: hr=11
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-08
hr 11
properties:
column.name.delimiter ,
columns key,value
columns.types string:string
#### A masked pattern was here ####
name default.srcpart
partition_columns ds/hr
partition_columns.types string:string
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
bucketing_version 2
column.name.delimiter ,
columns key,value
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
name default.srcpart
partition_columns ds/hr
partition_columns.types string:string
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcpart
name: default.srcpart
#### A masked pattern was here ####
Partition
base file name: hr=12
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-08
hr 12
properties:
column.name.delimiter ,
columns key,value
columns.types string:string
#### A masked pattern was here ####
name default.srcpart
partition_columns ds/hr
partition_columns.types string:string
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
bucketing_version 2
column.name.delimiter ,
columns key,value
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
name default.srcpart
partition_columns ds/hr
partition_columns.types string:string
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcpart
name: default.srcpart
Truncated Path -> Alias:
/srcpart/ds=2008-04-08/hr=11 [srcpart]
/srcpart/ds=2008-04-08/hr=12 [srcpart]
Reducer 2
Execution mode: llap
Needs Tagging: false
Reduce Operator Tree:
Select Operator
expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), CAST( VALUE._col2 AS STRING) (type: string)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1000 Data size: 279000 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
bucketingVersion: 2
key expressions: _col2 (type: string)
null sort order: a
numBuckets: -1
sort order: +
Map-reduce partition columns: _col2 (type: string)
Statistics: Num rows: 1000 Data size: 279000 Basic stats: COMPLETE Column stats: COMPLETE
tag: -1
value expressions: _col0 (type: int), _col1 (type: string)
auto parallelism: false
Select Operator
expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
outputColumnNames: key, value, hr
Statistics: Num rows: 1000 Data size: 373000 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: min(key), max(key), count(1), count(key), compute_bit_vector(key, 'hll'), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector(value, 'hll')
keys: '2008-04-08' (type: string), hr (type: string)
minReductionHashAggr: 0.684
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
Statistics: Num rows: 316 Data size: 214248 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
bucketingVersion: 2
key expressions: '2008-04-08' (type: string), _col1 (type: string)
null sort order: zz
numBuckets: -1
sort order: ++
Map-reduce partition columns: '2008-04-08' (type: string), _col1 (type: string)
Statistics: Num rows: 316 Data size: 214248 Basic stats: COMPLETE Column stats: COMPLETE
tag: -1
value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct<count:bigint,sum:double,input:int>), _col9 (type: bigint), _col10 (type: binary)
auto parallelism: false
Reducer 3
Execution mode: vectorized, llap
Needs Tagging: false
Reduce Operator Tree:
Select Operator
expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: string)
outputColumnNames: _col0, _col1, _col2
File Output Operator
bucketingVersion: 2
compressed: false
GlobalTableId: 1
#### A masked pattern was here ####
Dp Sort State: PARTITION_SORTED
NumFilesPerFileSink: 1
Static Partition Specification: ds=2008-04-08/
Statistics: Num rows: 1000 Data size: 279000 Basic stats: COMPLETE Column stats: COMPLETE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
bucketing_version 2
column.name.delimiter ,
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
name default.test_table_n0
partition_columns ds/hr
partition_columns.types string:string
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.test_table_n0
TotalFiles: 1
GatherStats: true
MultiFileSpray: false
Reducer 4
Execution mode: llap
Needs Tagging: false
Reduce Operator Tree:
Group By Operator
aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector(VALUE._col8)
keys: '2008-04-08' (type: string), KEY._col1 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
Statistics: Num rows: 316 Data size: 192760 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: 'LONG' (type: string), UDFToLong(_col2) (type: bigint), UDFToLong(_col3) (type: bigint), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), '2008-04-08' (type: string), _col1 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
Statistics: Num rows: 316 Data size: 255328 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
bucketingVersion: 2
compressed: false
GlobalTableId: 0
#### A masked pattern was here ####
NumFilesPerFileSink: 1
Statistics: Num rows: 316 Data size: 255328 Basic stats: COMPLETE Column stats: COMPLETE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
properties:
bucketing_version -1
columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13
columns.types string:bigint:bigint:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:string
escape.delim \
hive.serialization.extend.additional.nesting.levels true
serialization.escape.crlf true
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
Stage: Stage-2
Dependency Collection
Stage: Stage-0
Move Operator
tables:
partition:
ds 2008-04-08
hr
replace: true
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
bucketing_version 2
column.name.delimiter ,
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
name default.test_table_n0
partition_columns ds/hr
partition_columns.types string:string
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.test_table_n0
Stage: Stage-3
Stats Work
Basic Stats Work:
#### A masked pattern was here ####
Column Stats Desc:
Columns: key, value
Column Types: int, string
Table: default.test_table_n0
Is Table Level Stats: false
PREHOOK: query: INSERT OVERWRITE TABLE test_table_n0 PARTITION (ds = '2008-04-08', hr)
SELECT key2, value, cast(hr as int) FROM
(SELECT if ((key % 3) < 2, 0, 1) as key2, value, (key % 3 % 2) as hr
FROM srcpart
WHERE ds = '2008-04-08') a
DISTRIBUTE BY key2
PREHOOK: type: QUERY
PREHOOK: Input: default@srcpart
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
PREHOOK: Output: default@test_table_n0@ds=2008-04-08
POSTHOOK: query: INSERT OVERWRITE TABLE test_table_n0 PARTITION (ds = '2008-04-08', hr)
SELECT key2, value, cast(hr as int) FROM
(SELECT if ((key % 3) < 2, 0, 1) as key2, value, (key % 3 % 2) as hr
FROM srcpart
WHERE ds = '2008-04-08') a
DISTRIBUTE BY key2
POSTHOOK: type: QUERY
POSTHOOK: Input: default@srcpart
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
POSTHOOK: Output: default@test_table_n0@ds=2008-04-08/hr=0
POSTHOOK: Output: default@test_table_n0@ds=2008-04-08/hr=1
POSTHOOK: Lineage: test_table_n0 PARTITION(ds=2008-04-08,hr=0).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table_n0 PARTITION(ds=2008-04-08,hr=0).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: test_table_n0 PARTITION(ds=2008-04-08,hr=1).key EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table_n0 PARTITION(ds=2008-04-08,hr=1).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: DESCRIBE FORMATTED test_table_n0 PARTITION (ds='2008-04-08', hr='0')
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@test_table_n0
POSTHOOK: query: DESCRIBE FORMATTED test_table_n0 PARTITION (ds='2008-04-08', hr='0')
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@test_table_n0
# col_name data_type comment
key int
value string
# Partition Information
# col_name data_type comment
ds string
hr string
# Detailed Partition Information
Partition Value: [2008-04-08, 0]
Database: default
Table: test_table_n0
#### A masked pattern was here ####
Partition Parameters:
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
numFiles 1
numRows 670
rawDataSize 5888
totalSize 6558
#### A masked pattern was here ####
# Storage Information
SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Compressed: No
Num Buckets: -1
Bucket Columns: []
Sort Columns: []
Storage Desc Params:
serialization.format 1
PREHOOK: query: DESCRIBE FORMATTED test_table_n0 PARTITION (ds='2008-04-08', hr='1')
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@test_table_n0
POSTHOOK: query: DESCRIBE FORMATTED test_table_n0 PARTITION (ds='2008-04-08', hr='1')
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@test_table_n0
# col_name data_type comment
key int
value string
# Partition Information
# col_name data_type comment
ds string
hr string
# Detailed Partition Information
Partition Value: [2008-04-08, 1]
Database: default
Table: test_table_n0
#### A masked pattern was here ####
Partition Parameters:
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
numFiles 1
numRows 330
rawDataSize 2924
totalSize 3254
#### A masked pattern was here ####
# Storage Information
SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Compressed: No
Num Buckets: -1
Bucket Columns: []
Sort Columns: []
Storage Desc Params:
serialization.format 1