blob: bd93c1d55dc4d09e286bc879a84504a149360697 [file] [log] [blame]
PREHOOK: query: CREATE TABLE dest_j1_n7(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@dest_j1_n7
POSTHOOK: query: CREATE TABLE dest_j1_n7(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@dest_j1_n7
PREHOOK: query: EXPLAIN EXTENDED
INSERT OVERWRITE TABLE dest_j1_n7
SELECT x.key, z.value, y.value
FROM src1 x JOIN src y ON (x.key = y.key)
JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11)
PREHOOK: type: QUERY
PREHOOK: Input: default@src
PREHOOK: Input: default@src1
PREHOOK: Input: default@srcpart
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
PREHOOK: Output: default@dest_j1_n7
POSTHOOK: query: EXPLAIN EXTENDED
INSERT OVERWRITE TABLE dest_j1_n7
SELECT x.key, z.value, y.value
FROM src1 x JOIN src y ON (x.key = y.key)
JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
POSTHOOK: Input: default@src1
POSTHOOK: Input: default@srcpart
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
POSTHOOK: Output: default@dest_j1_n7
OPTIMIZED SQL: SELECT `t4`.`key`, `t0`.`value`, `t2`.`value` AS `value1`
FROM (SELECT `value`
FROM `default`.`srcpart`
WHERE `ds` = '2008-04-08' AND (`hr` = 11 AND `value` IS NOT NULL)) AS `t0`
INNER JOIN ((SELECT `key`, `value`
FROM `default`.`src`
WHERE `key` IS NOT NULL) AS `t2` INNER JOIN (SELECT `key`, `value`
FROM `default`.`src1`
WHERE `key` IS NOT NULL AND `value` IS NOT NULL) AS `t4` ON `t2`.`key` = `t4`.`key`) ON `t0`.`value` = `t4`.`value`
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-2 depends on stages: Stage-1
Stage-0 depends on stages: Stage-2
Stage-3 depends on stages: Stage-0
STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Map 1 <- Map 2 (BROADCAST_EDGE)
Map 3 <- Map 1 (BROADCAST_EDGE)
Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: y
filterExpr: key is not null (type: boolean)
probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_52_container, bigKeyColName:key, smallTablePos:1, keyRatio:0.078
Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
Map Join Operator
condition map:
Inner Join 0 to 1
Estimated key counts: Map 2 => 25
keys:
0 _col0 (type: string)
1 _col0 (type: string)
outputColumnNames: _col1, _col2, _col3
input vertices:
1 Map 2
Position of Big Table: 0
Statistics: Num rows: 39 Data size: 10374 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
bucketingVersion: 2
key expressions: _col3 (type: string)
null sort order: z
numBuckets: -1
sort order: +
Map-reduce partition columns: _col3 (type: string)
Statistics: Num rows: 39 Data size: 10374 Basic stats: COMPLETE Column stats: COMPLETE
tag: 0
value expressions: _col1 (type: string), _col2 (type: string)
auto parallelism: true
Execution mode: vectorized, llap
LLAP IO: all inputs
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
base file name: src
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
bucket_count -1
bucketing_version 2
column.name.delimiter ,
columns key,value
columns.types string:string
#### A masked pattern was here ####
name default.src
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
bucketing_version 2
column.name.delimiter ,
columns key,value
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
name default.src
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.src
name: default.src
Truncated Path -> Alias:
/src [y]
Map 2
Map Operator Tree:
TableScan
alias: x
filterExpr: (key is not null and value is not null) (type: boolean)
Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: (key is not null and value is not null) (type: boolean)
Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
bucketingVersion: 2
key expressions: _col0 (type: string)
null sort order: z
numBuckets: -1
sort order: +
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
tag: 1
value expressions: _col1 (type: string)
auto parallelism: true
Execution mode: vectorized, llap
LLAP IO: all inputs
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
base file name: src1
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
bucket_count -1
bucketing_version 2
column.name.delimiter ,
columns key,value
columns.types string:string
#### A masked pattern was here ####
name default.src1
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
bucketing_version 2
column.name.delimiter ,
columns key,value
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
name default.src1
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.src1
name: default.src1
Truncated Path -> Alias:
/src1 [x]
Map 3
Map Operator Tree:
TableScan
alias: z
filterExpr: value is not null (type: boolean)
probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_53_container, bigKeyColName:value, smallTablePos:0, keyRatio:0.126
Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: value is not null (type: boolean)
Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: value (type: string)
outputColumnNames: _col0
Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE
Map Join Operator
condition map:
Inner Join 0 to 1
Estimated key counts: Map 1 => 39
keys:
0 _col3 (type: string)
1 _col0 (type: string)
outputColumnNames: _col1, _col2, _col4
input vertices:
0 Map 1
Position of Big Table: 1
Statistics: Num rows: 63 Data size: 16884 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col2 (type: string), _col4 (type: string), _col1 (type: string)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 63 Data size: 16884 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
bucketingVersion: 2
compressed: false
GlobalTableId: 1
#### A masked pattern was here ####
NumFilesPerFileSink: 1
Statistics: Num rows: 63 Data size: 16884 Basic stats: COMPLETE Column stats: COMPLETE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
bucketing_version 2
column.name.delimiter ,
columns key,value,val2
columns.comments
columns.types string:string:string
#### A masked pattern was here ####
name default.dest_j1_n7
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest_j1_n7
TotalFiles: 1
GatherStats: true
MultiFileSpray: false
Select Operator
expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
outputColumnNames: key, value, val2
Statistics: Num rows: 63 Data size: 16884 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector_hll(key), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector_hll(value), max(length(val2)), avg(COALESCE(length(val2),0)), count(val2), compute_bit_vector_hll(val2)
minReductionHashAggr: 0.984127
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
bucketingVersion: 2
null sort order:
numBuckets: -1
sort order:
Statistics: Num rows: 1 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE
tag: -1
value expressions: _col0 (type: int), _col1 (type: struct<count:bigint,sum:double,input:int>), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct<count:bigint,sum:double,input:int>), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct<count:bigint,sum:double,input:int>), _col11 (type: bigint), _col12 (type: binary)
auto parallelism: false
Execution mode: vectorized, llap
LLAP IO: all inputs
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
base file name: hr=11
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-08
hr 11
properties:
column.name.delimiter ,
columns key,value
columns.types string:string
#### A masked pattern was here ####
name default.srcpart
partition_columns ds/hr
partition_columns.types string:string
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
bucketing_version 2
column.name.delimiter ,
columns key,value
columns.comments 'default','default'
columns.types string:string
#### A masked pattern was here ####
name default.srcpart
partition_columns ds/hr
partition_columns.types string:string
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcpart
name: default.srcpart
Truncated Path -> Alias:
/srcpart/ds=2008-04-08/hr=11 [z]
Reducer 4
Execution mode: vectorized, llap
Needs Tagging: false
Reduce Operator Tree:
Group By Operator
aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector_hll(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector_hll(VALUE._col12)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
Statistics: Num rows: 1 Data size: 500 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col9,0)) (type: bigint), COALESCE(_col10,0) (type: double), (_col2 - _col11) (type: bigint), COALESCE(ndv_compute_bit_vector(_col12),0) (type: bigint), _col12 (type: binary)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17
Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
bucketingVersion: 2
compressed: false
GlobalTableId: 0
#### A masked pattern was here ####
NumFilesPerFileSink: 1
Statistics: Num rows: 1 Data size: 798 Basic stats: COMPLETE Column stats: COMPLETE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
properties:
bucketing_version -1
columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17
columns.types string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary:string:bigint:double:bigint:bigint:binary
escape.delim \
hive.serialization.extend.additional.nesting.levels true
serialization.escape.crlf true
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
Stage: Stage-2
Dependency Collection
Stage: Stage-0
Move Operator
tables:
replace: true
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
bucketing_version 2
column.name.delimiter ,
columns key,value,val2
columns.comments
columns.types string:string:string
#### A masked pattern was here ####
name default.dest_j1_n7
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest_j1_n7
Stage: Stage-3
Stats Work
Basic Stats Work:
#### A masked pattern was here ####
Column Stats Desc:
Columns: key, value, val2
Column Types: string, string, string
Table: default.dest_j1_n7
Is Table Level Stats: true
PREHOOK: query: INSERT OVERWRITE TABLE dest_j1_n7
SELECT x.key, z.value, y.value
FROM src1 x JOIN src y ON (x.key = y.key)
JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11)
PREHOOK: type: QUERY
PREHOOK: Input: default@src
PREHOOK: Input: default@src1
PREHOOK: Input: default@srcpart
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
PREHOOK: Output: default@dest_j1_n7
POSTHOOK: query: INSERT OVERWRITE TABLE dest_j1_n7
SELECT x.key, z.value, y.value
FROM src1 x JOIN src y ON (x.key = y.key)
JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
POSTHOOK: Input: default@src1
POSTHOOK: Input: default@srcpart
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
POSTHOOK: Output: default@dest_j1_n7
POSTHOOK: Lineage: dest_j1_n7.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: dest_j1_n7.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: dest_j1_n7.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: select * from dest_j1_n7
PREHOOK: type: QUERY
PREHOOK: Input: default@dest_j1_n7
#### A masked pattern was here ####
POSTHOOK: query: select * from dest_j1_n7
POSTHOOK: type: QUERY
POSTHOOK: Input: default@dest_j1_n7
#### A masked pattern was here ####
146 val_146 val_146
146 val_146 val_146
146 val_146 val_146
146 val_146 val_146
150 val_150 val_150
213 val_213 val_213
213 val_213 val_213
213 val_213 val_213
213 val_213 val_213
238 val_238 val_238
238 val_238 val_238
238 val_238 val_238
238 val_238 val_238
255 val_255 val_255
255 val_255 val_255
255 val_255 val_255
255 val_255 val_255
273 val_273 val_273
273 val_273 val_273
273 val_273 val_273
273 val_273 val_273
273 val_273 val_273
273 val_273 val_273
273 val_273 val_273
273 val_273 val_273
273 val_273 val_273
278 val_278 val_278
278 val_278 val_278
278 val_278 val_278
278 val_278 val_278
311 val_311 val_311
311 val_311 val_311
311 val_311 val_311
311 val_311 val_311
311 val_311 val_311
311 val_311 val_311
311 val_311 val_311
311 val_311 val_311
311 val_311 val_311
401 val_401 val_401
401 val_401 val_401
401 val_401 val_401
401 val_401 val_401
401 val_401 val_401
401 val_401 val_401
401 val_401 val_401
401 val_401 val_401
401 val_401 val_401
401 val_401 val_401
401 val_401 val_401
401 val_401 val_401
401 val_401 val_401
401 val_401 val_401
401 val_401 val_401
401 val_401 val_401
401 val_401 val_401
401 val_401 val_401
401 val_401 val_401
401 val_401 val_401
401 val_401 val_401
401 val_401 val_401
401 val_401 val_401
401 val_401 val_401
401 val_401 val_401
406 val_406 val_406
406 val_406 val_406
406 val_406 val_406
406 val_406 val_406
406 val_406 val_406
406 val_406 val_406
406 val_406 val_406
406 val_406 val_406
406 val_406 val_406
406 val_406 val_406
406 val_406 val_406
406 val_406 val_406
406 val_406 val_406
406 val_406 val_406
406 val_406 val_406
406 val_406 val_406
66 val_66 val_66
98 val_98 val_98
98 val_98 val_98
98 val_98 val_98
98 val_98 val_98