blob: 33fac4528a61856f0b8356161b42817f2ac428cd [file] [log] [blame]
PREHOOK: query: CREATE TABLE srcbucket_mapjoin_n0(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@srcbucket_mapjoin_n0
POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_n0(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@srcbucket_mapjoin_n0
PREHOOK: query: load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_n0
PREHOOK: type: LOAD
#### A masked pattern was here ####
PREHOOK: Output: default@srcbucket_mapjoin_n0
POSTHOOK: query: load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_n0
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@srcbucket_mapjoin_n0
PREHOOK: query: load data local inpath '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_n0
PREHOOK: type: LOAD
#### A masked pattern was here ####
PREHOOK: Output: default@srcbucket_mapjoin_n0
POSTHOOK: query: load data local inpath '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_n0
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@srcbucket_mapjoin_n0
PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_n0 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@srcbucket_mapjoin_part_n0
POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_n0 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@srcbucket_mapjoin_part_n0
PREHOOK: query: load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part_n0 partition(ds='2008-04-08')
PREHOOK: type: LOAD
#### A masked pattern was here ####
PREHOOK: Output: default@srcbucket_mapjoin_part_n0
POSTHOOK: query: load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part_n0 partition(ds='2008-04-08')
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@srcbucket_mapjoin_part_n0
POSTHOOK: Output: default@srcbucket_mapjoin_part_n0@ds=2008-04-08
PREHOOK: query: load data local inpath '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part_n0 partition(ds='2008-04-08')
PREHOOK: type: LOAD
#### A masked pattern was here ####
PREHOOK: Output: default@srcbucket_mapjoin_part_n0@ds=2008-04-08
POSTHOOK: query: load data local inpath '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part_n0 partition(ds='2008-04-08')
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@srcbucket_mapjoin_part_n0@ds=2008-04-08
PREHOOK: query: load data local inpath '../../data/files/bmj/000002_0' INTO TABLE srcbucket_mapjoin_part_n0 partition(ds='2008-04-08')
PREHOOK: type: LOAD
#### A masked pattern was here ####
PREHOOK: Output: default@srcbucket_mapjoin_part_n0@ds=2008-04-08
POSTHOOK: query: load data local inpath '../../data/files/bmj/000002_0' INTO TABLE srcbucket_mapjoin_part_n0 partition(ds='2008-04-08')
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@srcbucket_mapjoin_part_n0@ds=2008-04-08
PREHOOK: query: load data local inpath '../../data/files/bmj/000003_0' INTO TABLE srcbucket_mapjoin_part_n0 partition(ds='2008-04-08')
PREHOOK: type: LOAD
#### A masked pattern was here ####
PREHOOK: Output: default@srcbucket_mapjoin_part_n0@ds=2008-04-08
POSTHOOK: query: load data local inpath '../../data/files/bmj/000003_0' INTO TABLE srcbucket_mapjoin_part_n0 partition(ds='2008-04-08')
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@srcbucket_mapjoin_part_n0@ds=2008-04-08
PREHOOK: query: load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part_n0 partition(ds='2008-04-09')
PREHOOK: type: LOAD
#### A masked pattern was here ####
PREHOOK: Output: default@srcbucket_mapjoin_part_n0
POSTHOOK: query: load data local inpath '../../data/files/bmj/000000_0' INTO TABLE srcbucket_mapjoin_part_n0 partition(ds='2008-04-09')
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@srcbucket_mapjoin_part_n0
POSTHOOK: Output: default@srcbucket_mapjoin_part_n0@ds=2008-04-09
PREHOOK: query: load data local inpath '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part_n0 partition(ds='2008-04-09')
PREHOOK: type: LOAD
#### A masked pattern was here ####
PREHOOK: Output: default@srcbucket_mapjoin_part_n0@ds=2008-04-09
POSTHOOK: query: load data local inpath '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapjoin_part_n0 partition(ds='2008-04-09')
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@srcbucket_mapjoin_part_n0@ds=2008-04-09
PREHOOK: query: load data local inpath '../../data/files/bmj/000002_0' INTO TABLE srcbucket_mapjoin_part_n0 partition(ds='2008-04-09')
PREHOOK: type: LOAD
#### A masked pattern was here ####
PREHOOK: Output: default@srcbucket_mapjoin_part_n0@ds=2008-04-09
POSTHOOK: query: load data local inpath '../../data/files/bmj/000002_0' INTO TABLE srcbucket_mapjoin_part_n0 partition(ds='2008-04-09')
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@srcbucket_mapjoin_part_n0@ds=2008-04-09
PREHOOK: query: load data local inpath '../../data/files/bmj/000003_0' INTO TABLE srcbucket_mapjoin_part_n0 partition(ds='2008-04-09')
PREHOOK: type: LOAD
#### A masked pattern was here ####
PREHOOK: Output: default@srcbucket_mapjoin_part_n0@ds=2008-04-09
POSTHOOK: query: load data local inpath '../../data/files/bmj/000003_0' INTO TABLE srcbucket_mapjoin_part_n0 partition(ds='2008-04-09')
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@srcbucket_mapjoin_part_n0@ds=2008-04-09
PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@srcbucket_mapjoin_part_2
POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@srcbucket_mapjoin_part_2
PREHOOK: query: load data local inpath '../../data/files/bmj2/000000_0' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08')
PREHOOK: type: LOAD
#### A masked pattern was here ####
PREHOOK: Output: default@srcbucket_mapjoin_part_2
POSTHOOK: query: load data local inpath '../../data/files/bmj2/000000_0' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08')
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@srcbucket_mapjoin_part_2
POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08
PREHOOK: query: load data local inpath '../../data/files/bmj2/000001_0' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08')
PREHOOK: type: LOAD
#### A masked pattern was here ####
PREHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08
POSTHOOK: query: load data local inpath '../../data/files/bmj2/000001_0' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08')
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08
PREHOOK: query: load data local inpath '../../data/files/bmj2/000000_0' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09')
PREHOOK: type: LOAD
#### A masked pattern was here ####
PREHOOK: Output: default@srcbucket_mapjoin_part_2
POSTHOOK: query: load data local inpath '../../data/files/bmj2/000000_0' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09')
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@srcbucket_mapjoin_part_2
POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-09
PREHOOK: query: load data local inpath '../../data/files/bmj2/000001_0' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09')
PREHOOK: type: LOAD
#### A masked pattern was here ####
PREHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-09
POSTHOOK: query: load data local inpath '../../data/files/bmj2/000001_0' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09')
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-09
PREHOOK: query: create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint)
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@bucketmapjoin_hash_result_1
POSTHOOK: query: create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@bucketmapjoin_hash_result_1
PREHOOK: query: create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint)
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@bucketmapjoin_hash_result_2
POSTHOOK: query: create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@bucketmapjoin_hash_result_2
PREHOOK: query: create table bucketmapjoin_tmp_result (key string , value1 string, value2 string)
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@bucketmapjoin_tmp_result
POSTHOOK: query: create table bucketmapjoin_tmp_result (key string , value1 string, value2 string)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@bucketmapjoin_tmp_result
PREHOOK: query: explain extended
insert overwrite table bucketmapjoin_tmp_result
select /*+mapjoin(a)*/ a.key, a.value, b.value
from srcbucket_mapjoin_n0 a join srcbucket_mapjoin_part_n0 b
on a.key=b.key
PREHOOK: type: QUERY
POSTHOOK: query: explain extended
insert overwrite table bucketmapjoin_tmp_result
select /*+mapjoin(a)*/ a.key, a.value, b.value
from srcbucket_mapjoin_n0 a join srcbucket_mapjoin_part_n0 b
on a.key=b.key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-9 is a root stage
Stage-1 depends on stages: Stage-9
Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6
Stage-5
Stage-0 depends on stages: Stage-5, Stage-4, Stage-7
Stage-3 depends on stages: Stage-0
Stage-4
Stage-6
Stage-7 depends on stages: Stage-6
STAGE PLANS:
Stage: Stage-9
Map Reduce Local Work
Alias -> Map Local Tables:
a
Fetch Operator
limit: -1
Alias -> Map Local Operator Tree:
a
TableScan
alias: a
Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE
HashTable Sink Operator
keys:
0 key (type: int)
1 key (type: int)
Position of Big Table: 1
Bucket Mapjoin Context:
Alias Bucket Base File Name Mapping:
a {ds=2008-04-08/000000_0=[000000_0], ds=2008-04-08/000001_0=[000001_0], ds=2008-04-08/000002_0=[000000_0], ds=2008-04-08/000003_0=[000001_0], ds=2008-04-09/000000_0=[000000_0], ds=2008-04-09/000001_0=[000001_0], ds=2008-04-09/000002_0=[000000_0], ds=2008-04-09/000003_0=[000001_0]}
Alias Bucket File Name Mapping:
#### A masked pattern was here ####
Alias Bucket Output File Name Mapping:
#### A masked pattern was here ####
Stage: Stage-1
Map Reduce
Map Operator Tree:
TableScan
alias: b
Statistics: Num rows: 298 Data size: 116240 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 298 Data size: 116240 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
keys:
0 key (type: int)
1 key (type: int)
outputColumnNames: _col0, _col1, _col6
Position of Big Table: 1
Statistics: Num rows: 327 Data size: 127864 Basic stats: COMPLETE Column stats: NONE
BucketMapJoin: true
Select Operator
expressions: UDFToString(_col0) (type: string), _col1 (type: string), _col6 (type: string)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 327 Data size: 127864 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
GlobalTableId: 1
#### A masked pattern was here ####
NumFilesPerFileSink: 1
Statistics: Num rows: 327 Data size: 127864 Basic stats: COMPLETE Column stats: NONE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
bucket_count -1
bucketing_version 2
column.name.delimiter ,
columns key,value1,value2
columns.comments
columns.types string:string:string
#### A masked pattern was here ####
name default.bucketmapjoin_tmp_result
numFiles 0
numRows 0
rawDataSize 0
serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 0
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.bucketmapjoin_tmp_result
TotalFiles: 1
GatherStats: true
MultiFileSpray: false
Select Operator
expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
outputColumnNames: key, value1, value2
Statistics: Num rows: 327 Data size: 127864 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll')
mode: hash
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
null sort order:
sort order:
Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
tag: -1
value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
auto parallelism: false
Local Work:
Map Reduce Local Work
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-08
properties:
bucket_count 4
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
name default.srcbucket_mapjoin_part_n0
numFiles 4
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
serialization.ddl struct srcbucket_mapjoin_part_n0 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
bucket_count 4
bucket_field_name key
bucketing_version 2
column.name.delimiter ,
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
name default.srcbucket_mapjoin_part_n0
partition_columns ds
partition_columns.types string
serialization.ddl struct srcbucket_mapjoin_part_n0 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcbucket_mapjoin_part_n0
name: default.srcbucket_mapjoin_part_n0
#### A masked pattern was here ####
Partition
base file name: ds=2008-04-09
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-09
properties:
bucket_count 4
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
name default.srcbucket_mapjoin_part_n0
numFiles 4
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
serialization.ddl struct srcbucket_mapjoin_part_n0 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 5812
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
bucket_count 4
bucket_field_name key
bucketing_version 2
column.name.delimiter ,
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
name default.srcbucket_mapjoin_part_n0
partition_columns ds
partition_columns.types string
serialization.ddl struct srcbucket_mapjoin_part_n0 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcbucket_mapjoin_part_n0
name: default.srcbucket_mapjoin_part_n0
Truncated Path -> Alias:
/srcbucket_mapjoin_part_n0/ds=2008-04-08 [b]
/srcbucket_mapjoin_part_n0/ds=2008-04-09 [b]
Needs Tagging: false
Reduce Operator Tree:
Group By Operator
aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
GlobalTableId: 0
#### A masked pattern was here ####
NumFilesPerFileSink: 1
Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
properties:
columns _col0,_col1,_col2
columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>
escape.delim \
hive.serialization.extend.additional.nesting.levels true
serialization.escape.crlf true
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
Stage: Stage-8
Conditional Operator
Stage: Stage-5
Move Operator
files:
hdfs directory: true
#### A masked pattern was here ####
Stage: Stage-0
Move Operator
tables:
replace: true
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
bucket_count -1
bucketing_version 2
column.name.delimiter ,
columns key,value1,value2
columns.comments
columns.types string:string:string
#### A masked pattern was here ####
name default.bucketmapjoin_tmp_result
numFiles 0
numRows 0
rawDataSize 0
serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 0
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.bucketmapjoin_tmp_result
Stage: Stage-3
Stats Work
Basic Stats Work:
#### A masked pattern was here ####
Column Stats Desc:
Columns: key, value1, value2
Column Types: string, string, string
Table: default.bucketmapjoin_tmp_result
Is Table Level Stats: true
Stage: Stage-4
Map Reduce
Map Operator Tree:
TableScan
GatherStats: false
File Output Operator
compressed: false
GlobalTableId: 0
#### A masked pattern was here ####
NumFilesPerFileSink: 1
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
bucket_count -1
bucketing_version 2
column.name.delimiter ,
columns key,value1,value2
columns.comments
columns.types string:string:string
#### A masked pattern was here ####
name default.bucketmapjoin_tmp_result
numFiles 0
numRows 0
rawDataSize 0
serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 0
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.bucketmapjoin_tmp_result
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
base file name: -ext-10002
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
bucket_count -1
bucketing_version 2
column.name.delimiter ,
columns key,value1,value2
columns.comments
columns.types string:string:string
#### A masked pattern was here ####
name default.bucketmapjoin_tmp_result
numFiles 0
numRows 0
rawDataSize 0
serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 0
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
bucket_count -1
bucketing_version 2
column.name.delimiter ,
columns key,value1,value2
columns.comments
columns.types string:string:string
#### A masked pattern was here ####
name default.bucketmapjoin_tmp_result
numFiles 0
numRows 0
rawDataSize 0
serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 0
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.bucketmapjoin_tmp_result
name: default.bucketmapjoin_tmp_result
Truncated Path -> Alias:
#### A masked pattern was here ####
Stage: Stage-6
Map Reduce
Map Operator Tree:
TableScan
GatherStats: false
File Output Operator
compressed: false
GlobalTableId: 0
#### A masked pattern was here ####
NumFilesPerFileSink: 1
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
bucket_count -1
bucketing_version 2
column.name.delimiter ,
columns key,value1,value2
columns.comments
columns.types string:string:string
#### A masked pattern was here ####
name default.bucketmapjoin_tmp_result
numFiles 0
numRows 0
rawDataSize 0
serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 0
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.bucketmapjoin_tmp_result
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
base file name: -ext-10002
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
bucket_count -1
bucketing_version 2
column.name.delimiter ,
columns key,value1,value2
columns.comments
columns.types string:string:string
#### A masked pattern was here ####
name default.bucketmapjoin_tmp_result
numFiles 0
numRows 0
rawDataSize 0
serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 0
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
bucket_count -1
bucketing_version 2
column.name.delimiter ,
columns key,value1,value2
columns.comments
columns.types string:string:string
#### A masked pattern was here ####
name default.bucketmapjoin_tmp_result
numFiles 0
numRows 0
rawDataSize 0
serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 0
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.bucketmapjoin_tmp_result
name: default.bucketmapjoin_tmp_result
Truncated Path -> Alias:
#### A masked pattern was here ####
Stage: Stage-7
Move Operator
files:
hdfs directory: true
#### A masked pattern was here ####
PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result
select /*+mapjoin(a)*/ a.key, a.value, b.value
from srcbucket_mapjoin_n0 a join srcbucket_mapjoin_part_n0 b
on a.key=b.key
PREHOOK: type: QUERY
PREHOOK: Input: default@srcbucket_mapjoin_n0
PREHOOK: Input: default@srcbucket_mapjoin_part_n0
PREHOOK: Input: default@srcbucket_mapjoin_part_n0@ds=2008-04-08
PREHOOK: Input: default@srcbucket_mapjoin_part_n0@ds=2008-04-09
PREHOOK: Output: default@bucketmapjoin_tmp_result
POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result
select /*+mapjoin(a)*/ a.key, a.value, b.value
from srcbucket_mapjoin_n0 a join srcbucket_mapjoin_part_n0 b
on a.key=b.key
POSTHOOK: type: QUERY
POSTHOOK: Input: default@srcbucket_mapjoin_n0
POSTHOOK: Input: default@srcbucket_mapjoin_part_n0
POSTHOOK: Input: default@srcbucket_mapjoin_part_n0@ds=2008-04-08
POSTHOOK: Input: default@srcbucket_mapjoin_part_n0@ds=2008-04-09
POSTHOOK: Output: default@bucketmapjoin_tmp_result
POSTHOOK: Lineage: bucketmapjoin_tmp_result.key EXPRESSION [(srcbucket_mapjoin_n0)a.FieldSchema(name:key, type:int, comment:null), ]
POSTHOOK: Lineage: bucketmapjoin_tmp_result.value1 SIMPLE [(srcbucket_mapjoin_n0)a.FieldSchema(name:value, type:string, comment:null), ]
POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin_part_n0)b.FieldSchema(name:value, type:string, comment:null), ]
PREHOOK: query: select count(1) from bucketmapjoin_tmp_result
PREHOOK: type: QUERY
PREHOOK: Input: default@bucketmapjoin_tmp_result
#### A masked pattern was here ####
POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result
POSTHOOK: type: QUERY
POSTHOOK: Input: default@bucketmapjoin_tmp_result
#### A masked pattern was here ####
928
PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_1
select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result
PREHOOK: type: QUERY
PREHOOK: Input: default@bucketmapjoin_tmp_result
PREHOOK: Output: default@bucketmapjoin_hash_result_1
POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_1
select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result
POSTHOOK: type: QUERY
POSTHOOK: Input: default@bucketmapjoin_tmp_result
POSTHOOK: Output: default@bucketmapjoin_hash_result_1
POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ]
POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ]
POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value2 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2, type:string, comment:null), ]
PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result
select /*+mapjoin(a)*/ a.key, a.value, b.value
from srcbucket_mapjoin_n0 a join srcbucket_mapjoin_part_n0 b
on a.key=b.key
PREHOOK: type: QUERY
PREHOOK: Input: default@srcbucket_mapjoin_n0
PREHOOK: Input: default@srcbucket_mapjoin_part_n0
PREHOOK: Input: default@srcbucket_mapjoin_part_n0@ds=2008-04-08
PREHOOK: Input: default@srcbucket_mapjoin_part_n0@ds=2008-04-09
PREHOOK: Output: default@bucketmapjoin_tmp_result
POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result
select /*+mapjoin(a)*/ a.key, a.value, b.value
from srcbucket_mapjoin_n0 a join srcbucket_mapjoin_part_n0 b
on a.key=b.key
POSTHOOK: type: QUERY
POSTHOOK: Input: default@srcbucket_mapjoin_n0
POSTHOOK: Input: default@srcbucket_mapjoin_part_n0
POSTHOOK: Input: default@srcbucket_mapjoin_part_n0@ds=2008-04-08
POSTHOOK: Input: default@srcbucket_mapjoin_part_n0@ds=2008-04-09
POSTHOOK: Output: default@bucketmapjoin_tmp_result
POSTHOOK: Lineage: bucketmapjoin_tmp_result.key EXPRESSION [(srcbucket_mapjoin_n0)a.FieldSchema(name:key, type:int, comment:null), ]
POSTHOOK: Lineage: bucketmapjoin_tmp_result.value1 SIMPLE [(srcbucket_mapjoin_n0)a.FieldSchema(name:value, type:string, comment:null), ]
POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin_part_n0)b.FieldSchema(name:value, type:string, comment:null), ]
PREHOOK: query: select count(1) from bucketmapjoin_tmp_result
PREHOOK: type: QUERY
PREHOOK: Input: default@bucketmapjoin_tmp_result
#### A masked pattern was here ####
POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result
POSTHOOK: type: QUERY
POSTHOOK: Input: default@bucketmapjoin_tmp_result
#### A masked pattern was here ####
928
PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_2
select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result
PREHOOK: type: QUERY
PREHOOK: Input: default@bucketmapjoin_tmp_result
PREHOOK: Output: default@bucketmapjoin_hash_result_2
POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_2
select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result
POSTHOOK: type: QUERY
POSTHOOK: Input: default@bucketmapjoin_tmp_result
POSTHOOK: Output: default@bucketmapjoin_hash_result_2
POSTHOOK: Lineage: bucketmapjoin_hash_result_2.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ]
POSTHOOK: Lineage: bucketmapjoin_hash_result_2.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ]
POSTHOOK: Lineage: bucketmapjoin_hash_result_2.value2 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2, type:string, comment:null), ]
PREHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2
from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b
on a.key = b.key
PREHOOK: type: QUERY
PREHOOK: Input: default@bucketmapjoin_hash_result_1
PREHOOK: Input: default@bucketmapjoin_hash_result_2
#### A masked pattern was here ####
POSTHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2
from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b
on a.key = b.key
POSTHOOK: type: QUERY
POSTHOOK: Input: default@bucketmapjoin_hash_result_1
POSTHOOK: Input: default@bucketmapjoin_hash_result_2
#### A masked pattern was here ####
0 0 0
PREHOOK: query: explain extended
insert overwrite table bucketmapjoin_tmp_result
select /*+mapjoin(a)*/ a.key, a.value, b.value
from srcbucket_mapjoin_n0 a join srcbucket_mapjoin_part_2 b
on a.key=b.key
PREHOOK: type: QUERY
POSTHOOK: query: explain extended
insert overwrite table bucketmapjoin_tmp_result
select /*+mapjoin(a)*/ a.key, a.value, b.value
from srcbucket_mapjoin_n0 a join srcbucket_mapjoin_part_2 b
on a.key=b.key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-9 is a root stage
Stage-1 depends on stages: Stage-9
Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6
Stage-5
Stage-0 depends on stages: Stage-5, Stage-4, Stage-7
Stage-3 depends on stages: Stage-0
Stage-4
Stage-6
Stage-7 depends on stages: Stage-6
STAGE PLANS:
Stage: Stage-9
Map Reduce Local Work
Alias -> Map Local Tables:
a
Fetch Operator
limit: -1
Alias -> Map Local Operator Tree:
a
TableScan
alias: a
Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 1 Data size: 27500 Basic stats: COMPLETE Column stats: NONE
HashTable Sink Operator
keys:
0 key (type: int)
1 key (type: int)
Position of Big Table: 1
Bucket Mapjoin Context:
Alias Bucket Base File Name Mapping:
a {ds=2008-04-08/000000_0=[000000_0], ds=2008-04-08/000001_0=[000001_0], ds=2008-04-09/000000_0=[000000_0], ds=2008-04-09/000001_0=[000001_0]}
Alias Bucket File Name Mapping:
#### A masked pattern was here ####
Alias Bucket Output File Name Mapping:
#### A masked pattern was here ####
Stage: Stage-1
Map Reduce
Map Operator Tree:
TableScan
alias: b
Statistics: Num rows: 156 Data size: 61240 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
Statistics: Num rows: 156 Data size: 61240 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
keys:
0 key (type: int)
1 key (type: int)
outputColumnNames: _col0, _col1, _col6
Position of Big Table: 1
Statistics: Num rows: 171 Data size: 67364 Basic stats: COMPLETE Column stats: NONE
BucketMapJoin: true
Select Operator
expressions: UDFToString(_col0) (type: string), _col1 (type: string), _col6 (type: string)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 171 Data size: 67364 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
GlobalTableId: 1
#### A masked pattern was here ####
NumFilesPerFileSink: 1
Statistics: Num rows: 171 Data size: 67364 Basic stats: COMPLETE Column stats: NONE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
bucket_count -1
bucketing_version 2
column.name.delimiter ,
columns key,value1,value2
columns.comments
columns.types string:string:string
#### A masked pattern was here ####
name default.bucketmapjoin_tmp_result
numFiles 1
numRows 928
rawDataSize 17038
serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 17966
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.bucketmapjoin_tmp_result
TotalFiles: 1
GatherStats: true
MultiFileSpray: false
Select Operator
expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
outputColumnNames: key, value1, value2
Statistics: Num rows: 171 Data size: 67364 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll')
mode: hash
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
null sort order:
sort order:
Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
tag: -1
value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
auto parallelism: false
Local Work:
Map Reduce Local Work
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
base file name: ds=2008-04-08
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-08
properties:
bucket_count 2
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
name default.srcbucket_mapjoin_part_2
numFiles 2
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 3062
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
bucket_count 2
bucket_field_name key
bucketing_version 2
column.name.delimiter ,
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
name default.srcbucket_mapjoin_part_2
partition_columns ds
partition_columns.types string
serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcbucket_mapjoin_part_2
name: default.srcbucket_mapjoin_part_2
#### A masked pattern was here ####
Partition
base file name: ds=2008-04-09
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
ds 2008-04-09
properties:
bucket_count 2
bucket_field_name key
column.name.delimiter ,
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
name default.srcbucket_mapjoin_part_2
numFiles 2
numRows 0
partition_columns ds
partition_columns.types string
rawDataSize 0
serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 3062
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
bucket_count 2
bucket_field_name key
bucketing_version 2
column.name.delimiter ,
columns key,value
columns.comments
columns.types int:string
#### A masked pattern was here ####
name default.srcbucket_mapjoin_part_2
partition_columns ds
partition_columns.types string
serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.srcbucket_mapjoin_part_2
name: default.srcbucket_mapjoin_part_2
Truncated Path -> Alias:
/srcbucket_mapjoin_part_2/ds=2008-04-08 [b]
/srcbucket_mapjoin_part_2/ds=2008-04-09 [b]
Needs Tagging: false
Reduce Operator Tree:
Group By Operator
aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
GlobalTableId: 0
#### A masked pattern was here ####
NumFilesPerFileSink: 1
Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
properties:
columns _col0,_col1,_col2
columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>
escape.delim \
hive.serialization.extend.additional.nesting.levels true
serialization.escape.crlf true
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
Stage: Stage-8
Conditional Operator
Stage: Stage-5
Move Operator
files:
hdfs directory: true
#### A masked pattern was here ####
Stage: Stage-0
Move Operator
tables:
replace: true
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
bucket_count -1
bucketing_version 2
column.name.delimiter ,
columns key,value1,value2
columns.comments
columns.types string:string:string
#### A masked pattern was here ####
name default.bucketmapjoin_tmp_result
numFiles 1
numRows 928
rawDataSize 17038
serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 17966
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.bucketmapjoin_tmp_result
Stage: Stage-3
Stats Work
Basic Stats Work:
#### A masked pattern was here ####
Column Stats Desc:
Columns: key, value1, value2
Column Types: string, string, string
Table: default.bucketmapjoin_tmp_result
Is Table Level Stats: true
Stage: Stage-4
Map Reduce
Map Operator Tree:
TableScan
GatherStats: false
File Output Operator
compressed: false
GlobalTableId: 0
#### A masked pattern was here ####
NumFilesPerFileSink: 1
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
bucket_count -1
bucketing_version 2
column.name.delimiter ,
columns key,value1,value2
columns.comments
columns.types string:string:string
#### A masked pattern was here ####
name default.bucketmapjoin_tmp_result
numFiles 1
numRows 928
rawDataSize 17038
serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 17966
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.bucketmapjoin_tmp_result
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
base file name: -ext-10002
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
bucket_count -1
bucketing_version 2
column.name.delimiter ,
columns key,value1,value2
columns.comments
columns.types string:string:string
#### A masked pattern was here ####
name default.bucketmapjoin_tmp_result
numFiles 1
numRows 928
rawDataSize 17038
serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 17966
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
bucket_count -1
bucketing_version 2
column.name.delimiter ,
columns key,value1,value2
columns.comments
columns.types string:string:string
#### A masked pattern was here ####
name default.bucketmapjoin_tmp_result
numFiles 1
numRows 928
rawDataSize 17038
serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 17966
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.bucketmapjoin_tmp_result
name: default.bucketmapjoin_tmp_result
Truncated Path -> Alias:
#### A masked pattern was here ####
Stage: Stage-6
Map Reduce
Map Operator Tree:
TableScan
GatherStats: false
File Output Operator
compressed: false
GlobalTableId: 0
#### A masked pattern was here ####
NumFilesPerFileSink: 1
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
bucket_count -1
bucketing_version 2
column.name.delimiter ,
columns key,value1,value2
columns.comments
columns.types string:string:string
#### A masked pattern was here ####
name default.bucketmapjoin_tmp_result
numFiles 1
numRows 928
rawDataSize 17038
serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 17966
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.bucketmapjoin_tmp_result
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
#### A masked pattern was here ####
Partition
base file name: -ext-10002
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
bucket_count -1
bucketing_version 2
column.name.delimiter ,
columns key,value1,value2
columns.comments
columns.types string:string:string
#### A masked pattern was here ####
name default.bucketmapjoin_tmp_result
numFiles 1
numRows 928
rawDataSize 17038
serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 17966
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
bucket_count -1
bucketing_version 2
column.name.delimiter ,
columns key,value1,value2
columns.comments
columns.types string:string:string
#### A masked pattern was here ####
name default.bucketmapjoin_tmp_result
numFiles 1
numRows 928
rawDataSize 17038
serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 17966
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.bucketmapjoin_tmp_result
name: default.bucketmapjoin_tmp_result
Truncated Path -> Alias:
#### A masked pattern was here ####
Stage: Stage-7
Move Operator
files:
hdfs directory: true
#### A masked pattern was here ####
PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result
select /*+mapjoin(a)*/ a.key, a.value, b.value
from srcbucket_mapjoin_n0 a join srcbucket_mapjoin_part_2 b
on a.key=b.key
PREHOOK: type: QUERY
PREHOOK: Input: default@srcbucket_mapjoin_n0
PREHOOK: Input: default@srcbucket_mapjoin_part_2
PREHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08
PREHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-09
PREHOOK: Output: default@bucketmapjoin_tmp_result
POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result
select /*+mapjoin(a)*/ a.key, a.value, b.value
from srcbucket_mapjoin_n0 a join srcbucket_mapjoin_part_2 b
on a.key=b.key
POSTHOOK: type: QUERY
POSTHOOK: Input: default@srcbucket_mapjoin_n0
POSTHOOK: Input: default@srcbucket_mapjoin_part_2
POSTHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08
POSTHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-09
POSTHOOK: Output: default@bucketmapjoin_tmp_result
POSTHOOK: Lineage: bucketmapjoin_tmp_result.key EXPRESSION [(srcbucket_mapjoin_n0)a.FieldSchema(name:key, type:int, comment:null), ]
POSTHOOK: Lineage: bucketmapjoin_tmp_result.value1 SIMPLE [(srcbucket_mapjoin_n0)a.FieldSchema(name:value, type:string, comment:null), ]
POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin_part_2)b.FieldSchema(name:value, type:string, comment:null), ]
PREHOOK: query: select count(1) from bucketmapjoin_tmp_result
PREHOOK: type: QUERY
PREHOOK: Input: default@bucketmapjoin_tmp_result
#### A masked pattern was here ####
POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result
POSTHOOK: type: QUERY
POSTHOOK: Input: default@bucketmapjoin_tmp_result
#### A masked pattern was here ####
0
PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_1
select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result
PREHOOK: type: QUERY
PREHOOK: Input: default@bucketmapjoin_tmp_result
PREHOOK: Output: default@bucketmapjoin_hash_result_1
POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_1
select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result
POSTHOOK: type: QUERY
POSTHOOK: Input: default@bucketmapjoin_tmp_result
POSTHOOK: Output: default@bucketmapjoin_hash_result_1
POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ]
POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ]
POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value2 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2, type:string, comment:null), ]
PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result
select /*+mapjoin(a)*/ a.key, a.value, b.value
from srcbucket_mapjoin_n0 a join srcbucket_mapjoin_part_2 b
on a.key=b.key
PREHOOK: type: QUERY
PREHOOK: Input: default@srcbucket_mapjoin_n0
PREHOOK: Input: default@srcbucket_mapjoin_part_2
PREHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08
PREHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-09
PREHOOK: Output: default@bucketmapjoin_tmp_result
POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result
select /*+mapjoin(a)*/ a.key, a.value, b.value
from srcbucket_mapjoin_n0 a join srcbucket_mapjoin_part_2 b
on a.key=b.key
POSTHOOK: type: QUERY
POSTHOOK: Input: default@srcbucket_mapjoin_n0
POSTHOOK: Input: default@srcbucket_mapjoin_part_2
POSTHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08
POSTHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-09
POSTHOOK: Output: default@bucketmapjoin_tmp_result
POSTHOOK: Lineage: bucketmapjoin_tmp_result.key EXPRESSION [(srcbucket_mapjoin_n0)a.FieldSchema(name:key, type:int, comment:null), ]
POSTHOOK: Lineage: bucketmapjoin_tmp_result.value1 SIMPLE [(srcbucket_mapjoin_n0)a.FieldSchema(name:value, type:string, comment:null), ]
POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin_part_2)b.FieldSchema(name:value, type:string, comment:null), ]
PREHOOK: query: select count(1) from bucketmapjoin_tmp_result
PREHOOK: type: QUERY
PREHOOK: Input: default@bucketmapjoin_tmp_result
#### A masked pattern was here ####
POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result
POSTHOOK: type: QUERY
POSTHOOK: Input: default@bucketmapjoin_tmp_result
#### A masked pattern was here ####
0
PREHOOK: query: insert overwrite table bucketmapjoin_hash_result_2
select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result
PREHOOK: type: QUERY
PREHOOK: Input: default@bucketmapjoin_tmp_result
PREHOOK: Output: default@bucketmapjoin_hash_result_2
POSTHOOK: query: insert overwrite table bucketmapjoin_hash_result_2
select sum(hash(key)), sum(hash(value1)), sum(hash(value2)) from bucketmapjoin_tmp_result
POSTHOOK: type: QUERY
POSTHOOK: Input: default@bucketmapjoin_tmp_result
POSTHOOK: Output: default@bucketmapjoin_hash_result_2
POSTHOOK: Lineage: bucketmapjoin_hash_result_2.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ]
POSTHOOK: Lineage: bucketmapjoin_hash_result_2.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ]
POSTHOOK: Lineage: bucketmapjoin_hash_result_2.value2 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2, type:string, comment:null), ]
PREHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2
from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b
on a.key = b.key
PREHOOK: type: QUERY
PREHOOK: Input: default@bucketmapjoin_hash_result_1
PREHOOK: Input: default@bucketmapjoin_hash_result_2
#### A masked pattern was here ####
POSTHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2
from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b
on a.key = b.key
POSTHOOK: type: QUERY
POSTHOOK: Input: default@bucketmapjoin_hash_result_1
POSTHOOK: Input: default@bucketmapjoin_hash_result_2
#### A masked pattern was here ####
NULL NULL NULL