blob: af4dd77c3b048f52a10e3dc6430418bddda66752 [file] [log] [blame]
PREHOOK: query: CREATE TABLE test_table1_n14 (key STRING, value STRING)
CLUSTERED BY (key) SORTED BY (key DESC) INTO 2 BUCKETS
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@test_table1_n14
POSTHOOK: query: CREATE TABLE test_table1_n14 (key STRING, value STRING)
CLUSTERED BY (key) SORTED BY (key DESC) INTO 2 BUCKETS
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@test_table1_n14
PREHOOK: query: CREATE TABLE test_table2_n13 (key STRING, value STRING)
CLUSTERED BY (key) SORTED BY (key DESC) INTO 2 BUCKETS
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@test_table2_n13
POSTHOOK: query: CREATE TABLE test_table2_n13 (key STRING, value STRING)
CLUSTERED BY (key) SORTED BY (key DESC) INTO 2 BUCKETS
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@test_table2_n13
PREHOOK: query: INSERT OVERWRITE TABLE test_table1_n14 SELECT key, value FROM src
PREHOOK: type: QUERY
PREHOOK: Input: default@src
PREHOOK: Output: default@test_table1_n14
POSTHOOK: query: INSERT OVERWRITE TABLE test_table1_n14 SELECT key, value FROM src
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
POSTHOOK: Output: default@test_table1_n14
POSTHOOK: Lineage: test_table1_n14.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table1_n14.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: INSERT OVERWRITE TABLE test_table2_n13 SELECT key, value FROM src
PREHOOK: type: QUERY
PREHOOK: Input: default@src
PREHOOK: Output: default@test_table2_n13
POSTHOOK: query: INSERT OVERWRITE TABLE test_table2_n13 SELECT key, value FROM src
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
POSTHOOK: Output: default@test_table2_n13
POSTHOOK: Lineage: test_table2_n13.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2_n13.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: CREATE TABLE test_table_out_n0 (key STRING, value STRING) PARTITIONED BY (part STRING)
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@test_table_out_n0
POSTHOOK: query: CREATE TABLE test_table_out_n0 (key STRING, value STRING) PARTITIONED BY (part STRING)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@test_table_out_n0
PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_table_out_n0 PARTITION (part = '1')
SELECT key, count(*) FROM test_table1_n14 GROUP BY key
PREHOOK: type: QUERY
POSTHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_table_out_n0 PARTITION (part = '1')
SELECT key, count(*) FROM test_table1_n14 GROUP BY key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5
Stage-4
Stage-0 depends on stages: Stage-4, Stage-3, Stage-6
Stage-2 depends on stages: Stage-0
Stage-3
Stage-5
Stage-6 depends on stages: Stage-5
STAGE PLANS:
Stage: Stage-1
Map Reduce
Map Operator Tree:
TableScan
alias: test_table1_n14
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
outputColumnNames: key
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
keys: key (type: string)
mode: final
outputColumnNames: _col0, _col1
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: string), UDFToString(_col1) (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.test_table_out_n0
Select Operator
expressions: _col0 (type: string), _col1 (type: string), '1' (type: string)
outputColumnNames: key, value, part
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
keys: part (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
Reduce Operator Tree:
Group By Operator
aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col1 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-7
Conditional Operator
Stage: Stage-4
Move Operator
files:
hdfs directory: true
destination: hdfs://### HDFS PATH ###
Stage: Stage-0
Move Operator
tables:
partition:
part 1
replace: true
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.test_table_out_n0
Stage: Stage-2
Stats Work
Basic Stats Work:
Column Stats Desc:
Columns: key, value
Column Types: string, string
Table: default.test_table_out_n0
Stage: Stage-3
Map Reduce
Map Operator Tree:
TableScan
File Output Operator
compressed: false
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.test_table_out_n0
Stage: Stage-5
Map Reduce
Map Operator Tree:
TableScan
File Output Operator
compressed: false
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.test_table_out_n0
Stage: Stage-6
Move Operator
files:
hdfs directory: true
destination: hdfs://### HDFS PATH ###
PREHOOK: query: INSERT OVERWRITE TABLE test_table_out_n0 PARTITION (part = '1')
SELECT key, count(*) FROM test_table1_n14 GROUP BY key
PREHOOK: type: QUERY
PREHOOK: Input: default@test_table1_n14
PREHOOK: Output: default@test_table_out_n0@part=1
POSTHOOK: query: INSERT OVERWRITE TABLE test_table_out_n0 PARTITION (part = '1')
SELECT key, count(*) FROM test_table1_n14 GROUP BY key
POSTHOOK: type: QUERY
POSTHOOK: Input: default@test_table1_n14
POSTHOOK: Output: default@test_table_out_n0@part=1
POSTHOOK: Lineage: test_table_out_n0 PARTITION(part=1).key SIMPLE [(test_table1_n14)test_table1_n14.FieldSchema(name:key, type:string, comment:null), ]
POSTHOOK: Lineage: test_table_out_n0 PARTITION(part=1).value EXPRESSION [(test_table1_n14)test_table1_n14.null, ]
PREHOOK: query: DESCRIBE FORMATTED test_table_out_n0 PARTITION (part = '1')
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@test_table_out_n0
POSTHOOK: query: DESCRIBE FORMATTED test_table_out_n0 PARTITION (part = '1')
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@test_table_out_n0
# col_name data_type comment
key string
value string
# Partition Information
# col_name data_type comment
part string
# Detailed Partition Information
Partition Value: [1]
Database: default
Table: test_table_out_n0
#### A masked pattern was here ####
Partition Parameters:
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
numFiles 1
numRows 309
rawDataSize 1482
totalSize 1791
#### A masked pattern was here ####
# Storage Information
SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Compressed: No
Num Buckets: -1
Bucket Columns: []
Sort Columns: []
Storage Desc Params:
serialization.format 1
WARNING: Comparing a bigint and a string may result in a loss of precision.
PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_table_out_n0 PARTITION (part = '1')
SELECT a.key, a.value FROM (
SELECT key, count(*) AS value FROM test_table1_n14 GROUP BY key
) a JOIN (
SELECT key, value FROM src
) b
ON (a.value = b.value)
PREHOOK: type: QUERY
POSTHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_table_out_n0 PARTITION (part = '1')
SELECT a.key, a.value FROM (
SELECT key, count(*) AS value FROM test_table1_n14 GROUP BY key
) a JOIN (
SELECT key, value FROM src
) b
ON (a.value = b.value)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
Stage-2 depends on stages: Stage-0, Stage-3
Stage-3 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
Map Reduce
Map Operator Tree:
TableScan
alias: test_table1_n14
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
outputColumnNames: key
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
keys: key (type: string)
mode: final
outputColumnNames: _col0, _col1
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: _col1 is not null (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: UDFToDouble(_col1) (type: double)
sort order: +
Map-reduce partition columns: UDFToDouble(_col1) (type: double)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: string), _col1 (type: bigint)
TableScan
alias: src
Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: value is not null (type: boolean)
Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: value (type: string)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: UDFToDouble(_col0) (type: double)
sort order: +
Map-reduce partition columns: UDFToDouble(_col0) (type: double)
Statistics: Num rows: 1 Data size: 58120 Basic stats: COMPLETE Column stats: NONE
Reduce Operator Tree:
Join Operator
condition map:
Inner Join 0 to 1
keys:
0 UDFToDouble(_col1) (type: double)
1 UDFToDouble(_col0) (type: double)
outputColumnNames: _col0, _col1
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: string), UDFToString(_col1) (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.test_table_out_n0
Select Operator
expressions: _col0 (type: string), _col1 (type: string), '1' (type: string)
outputColumnNames: key, value, part
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
keys: part (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-0
Move Operator
tables:
partition:
part 1
replace: true
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.test_table_out_n0
Stage: Stage-2
Stats Work
Basic Stats Work:
Column Stats Desc:
Columns: key, value
Column Types: string, string
Table: default.test_table_out_n0
Stage: Stage-3
Map Reduce
Map Operator Tree:
TableScan
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
Execution mode: vectorized
Reduce Operator Tree:
Group By Operator
aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col1 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
PREHOOK: query: INSERT OVERWRITE TABLE test_table_out_n0 PARTITION (part = '1')
SELECT a.key, a.value FROM (
SELECT key, cast(count(*) AS STRING) AS value FROM test_table1_n14 GROUP BY key
) a JOIN (
SELECT key, value FROM src
) b
ON (a.value = b.value)
PREHOOK: type: QUERY
PREHOOK: Input: default@src
PREHOOK: Input: default@test_table1_n14
PREHOOK: Output: default@test_table_out_n0@part=1
POSTHOOK: query: INSERT OVERWRITE TABLE test_table_out_n0 PARTITION (part = '1')
SELECT a.key, a.value FROM (
SELECT key, cast(count(*) AS STRING) AS value FROM test_table1_n14 GROUP BY key
) a JOIN (
SELECT key, value FROM src
) b
ON (a.value = b.value)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
POSTHOOK: Input: default@test_table1_n14
POSTHOOK: Output: default@test_table_out_n0@part=1
POSTHOOK: Lineage: test_table_out_n0 PARTITION(part=1).key SIMPLE [(test_table1_n14)test_table1_n14.FieldSchema(name:key, type:string, comment:null), ]
POSTHOOK: Lineage: test_table_out_n0 PARTITION(part=1).value EXPRESSION [(test_table1_n14)test_table1_n14.null, ]
PREHOOK: query: DESCRIBE FORMATTED test_table_out_n0 PARTITION (part = '1')
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@test_table_out_n0
POSTHOOK: query: DESCRIBE FORMATTED test_table_out_n0 PARTITION (part = '1')
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@test_table_out_n0
# col_name data_type comment
key string
value string
# Partition Information
# col_name data_type comment
part string
# Detailed Partition Information
Partition Value: [1]
Database: default
Table: test_table_out_n0
#### A masked pattern was here ####
Partition Parameters:
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
numFiles 1
numRows 0
rawDataSize 0
totalSize 0
#### A masked pattern was here ####
# Storage Information
SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Compressed: No
Num Buckets: 1
Bucket Columns: [value]
Sort Columns: [Order(col:value, order:1)]
Storage Desc Params:
serialization.format 1
PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_table_out_n0 PARTITION (part = '1')
SELECT /*+ MAPJOIN(a) */ a.key, b.value FROM test_table1_n14 a JOIN test_table2_n13 b ON a.key = b.key
PREHOOK: type: QUERY
POSTHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_table_out_n0 PARTITION (part = '1')
SELECT /*+ MAPJOIN(a) */ a.key, b.value FROM test_table1_n14 a JOIN test_table2_n13 b ON a.key = b.key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6
Stage-5
Stage-0 depends on stages: Stage-5, Stage-4, Stage-7
Stage-3 depends on stages: Stage-0
Stage-4
Stage-6
Stage-7 depends on stages: Stage-6
STAGE PLANS:
Stage: Stage-1
Map Reduce
Map Operator Tree:
TableScan
alias: b
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Sorted Merge Bucket Map Join Operator
condition map:
Inner Join 0 to 1
keys:
0 key (type: string)
1 key (type: string)
outputColumnNames: _col0, _col6
Select Operator
expressions: _col0 (type: string), _col6 (type: string)
outputColumnNames: _col0, _col1
File Output Operator
compressed: false
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.test_table_out_n0
Select Operator
expressions: _col0 (type: string), _col1 (type: string)
outputColumnNames: key, value
Group By Operator
aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
keys: '1' (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
Reduce Output Operator
key expressions: '1' (type: string)
sort order: +
Map-reduce partition columns: '1' (type: string)
value expressions: _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
Reduce Operator Tree:
Group By Operator
aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
keys: '1' (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
Select Operator
expressions: _col1 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), '1' (type: string)
outputColumnNames: _col0, _col1, _col2
File Output Operator
compressed: false
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-8
Conditional Operator
Stage: Stage-5
Move Operator
files:
hdfs directory: true
destination: hdfs://### HDFS PATH ###
Stage: Stage-0
Move Operator
tables:
partition:
part 1
replace: true
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.test_table_out_n0
Stage: Stage-3
Stats Work
Basic Stats Work:
Column Stats Desc:
Columns: key, value
Column Types: string, string
Table: default.test_table_out_n0
Stage: Stage-4
Map Reduce
Map Operator Tree:
TableScan
File Output Operator
compressed: false
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.test_table_out_n0
Stage: Stage-6
Map Reduce
Map Operator Tree:
TableScan
File Output Operator
compressed: false
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.test_table_out_n0
Stage: Stage-7
Move Operator
files:
hdfs directory: true
destination: hdfs://### HDFS PATH ###
PREHOOK: query: INSERT OVERWRITE TABLE test_table_out_n0 PARTITION (part = '1')
SELECT /*+ MAPJOIN(a) */ a.key, b.value FROM test_table1_n14 a JOIN test_table2_n13 b ON a.key = b.key
PREHOOK: type: QUERY
PREHOOK: Input: default@test_table1_n14
PREHOOK: Input: default@test_table2_n13
PREHOOK: Output: default@test_table_out_n0@part=1
POSTHOOK: query: INSERT OVERWRITE TABLE test_table_out_n0 PARTITION (part = '1')
SELECT /*+ MAPJOIN(a) */ a.key, b.value FROM test_table1_n14 a JOIN test_table2_n13 b ON a.key = b.key
POSTHOOK: type: QUERY
POSTHOOK: Input: default@test_table1_n14
POSTHOOK: Input: default@test_table2_n13
POSTHOOK: Output: default@test_table_out_n0@part=1
POSTHOOK: Lineage: test_table_out_n0 PARTITION(part=1).key SIMPLE [(test_table1_n14)a.FieldSchema(name:key, type:string, comment:null), ]
POSTHOOK: Lineage: test_table_out_n0 PARTITION(part=1).value SIMPLE [(test_table2_n13)b.FieldSchema(name:value, type:string, comment:null), ]
PREHOOK: query: DESCRIBE FORMATTED test_table_out_n0 PARTITION (part = '1')
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@test_table_out_n0
POSTHOOK: query: DESCRIBE FORMATTED test_table_out_n0 PARTITION (part = '1')
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@test_table_out_n0
# col_name data_type comment
key string
value string
# Partition Information
# col_name data_type comment
part string
# Detailed Partition Information
Partition Value: [1]
Database: default
Table: test_table_out_n0
#### A masked pattern was here ####
Partition Parameters:
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
numFiles 1
numRows 1028
rawDataSize 10968
totalSize 11996
#### A masked pattern was here ####
# Storage Information
SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Compressed: No
Num Buckets: -1
Bucket Columns: []
Sort Columns: []
Storage Desc Params:
serialization.format 1
PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_table_out_n0 PARTITION (part = '1')
SELECT /*+ MAPJOIN(a) */ b.value, count(*) FROM test_table1_n14 a JOIN test_table2_n13 b ON a.key = b.key
GROUP BY b.value
PREHOOK: type: QUERY
POSTHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_table_out_n0 PARTITION (part = '1')
SELECT /*+ MAPJOIN(a) */ b.value, count(*) FROM test_table1_n14 a JOIN test_table2_n13 b ON a.key = b.key
GROUP BY b.value
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
Stage-3 depends on stages: Stage-0, Stage-4
Stage-4 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
Map Reduce
Map Operator Tree:
TableScan
alias: b
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Sorted Merge Bucket Map Join Operator
condition map:
Inner Join 0 to 1
keys:
0 key (type: string)
1 key (type: string)
outputColumnNames: _col6
Group By Operator
aggregations: count()
keys: _col6 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
value expressions: _col1 (type: bigint)
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1
Select Operator
expressions: _col0 (type: string), UDFToString(_col1) (type: string)
outputColumnNames: _col0, _col1
File Output Operator
compressed: false
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.test_table_out_n0
Select Operator
expressions: _col0 (type: string), _col1 (type: string)
outputColumnNames: key, value
Group By Operator
aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
keys: '1' (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
File Output Operator
compressed: false
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-0
Move Operator
tables:
partition:
part 1
replace: true
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.test_table_out_n0
Stage: Stage-3
Stats Work
Basic Stats Work:
Column Stats Desc:
Columns: key, value
Column Types: string, string
Table: default.test_table_out_n0
Stage: Stage-4
Map Reduce
Map Operator Tree:
TableScan
Reduce Output Operator
key expressions: '1' (type: string)
sort order: +
Map-reduce partition columns: '1' (type: string)
value expressions: _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
Execution mode: vectorized
Reduce Operator Tree:
Group By Operator
aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
keys: '1' (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
Select Operator
expressions: _col1 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), '1' (type: string)
outputColumnNames: _col0, _col1, _col2
File Output Operator
compressed: false
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
PREHOOK: query: INSERT OVERWRITE TABLE test_table_out_n0 PARTITION (part = '1')
SELECT /*+ MAPJOIN(a) */ b.value, count(*) FROM test_table1_n14 a JOIN test_table2_n13 b ON a.key = b.key
GROUP BY b.value
PREHOOK: type: QUERY
PREHOOK: Input: default@test_table1_n14
PREHOOK: Input: default@test_table2_n13
PREHOOK: Output: default@test_table_out_n0@part=1
POSTHOOK: query: INSERT OVERWRITE TABLE test_table_out_n0 PARTITION (part = '1')
SELECT /*+ MAPJOIN(a) */ b.value, count(*) FROM test_table1_n14 a JOIN test_table2_n13 b ON a.key = b.key
GROUP BY b.value
POSTHOOK: type: QUERY
POSTHOOK: Input: default@test_table1_n14
POSTHOOK: Input: default@test_table2_n13
POSTHOOK: Output: default@test_table_out_n0@part=1
POSTHOOK: Lineage: test_table_out_n0 PARTITION(part=1).key SIMPLE [(test_table2_n13)b.FieldSchema(name:value, type:string, comment:null), ]
POSTHOOK: Lineage: test_table_out_n0 PARTITION(part=1).value EXPRESSION [(test_table1_n14)a.null, (test_table2_n13)b.null, ]
PREHOOK: query: DESCRIBE FORMATTED test_table_out_n0 PARTITION (part = '1')
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@test_table_out_n0
POSTHOOK: query: DESCRIBE FORMATTED test_table_out_n0 PARTITION (part = '1')
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@test_table_out_n0
# col_name data_type comment
key string
value string
# Partition Information
# col_name data_type comment
part string
# Detailed Partition Information
Partition Value: [1]
Database: default
Table: test_table_out_n0
#### A masked pattern was here ####
Partition Parameters:
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
numFiles 1
numRows 309
rawDataSize 2728
totalSize 3037
#### A masked pattern was here ####
# Storage Information
SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Compressed: No
Num Buckets: 1
Bucket Columns: [key]
Sort Columns: [Order(col:key, order:1)]
Storage Desc Params:
serialization.format 1