blob: 836d872f68ffed42ba23098335396ea4a1428910 [file] [log] [blame]
PREHOOK: query: drop table if exists TJOIN1
PREHOOK: type: DROPTABLE
POSTHOOK: query: drop table if exists TJOIN1
POSTHOOK: type: DROPTABLE
PREHOOK: query: drop table if exists TJOIN2
PREHOOK: type: DROPTABLE
POSTHOOK: query: drop table if exists TJOIN2
POSTHOOK: type: DROPTABLE
PREHOOK: query: create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) STORED AS orc
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@TJOIN1
POSTHOOK: query: create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) STORED AS orc
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@TJOIN1
PREHOOK: query: create table if not exists TJOIN2 (RNUM int , C1 int, C2 char(2)) STORED AS orc
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@TJOIN2
POSTHOOK: query: create table if not exists TJOIN2 (RNUM int , C1 int, C2 char(2)) STORED AS orc
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@TJOIN2
PREHOOK: query: create table if not exists TJOIN1STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@TJOIN1STAGE
POSTHOOK: query: create table if not exists TJOIN1STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@TJOIN1STAGE
PREHOOK: query: create table if not exists TJOIN2STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@TJOIN2STAGE
POSTHOOK: query: create table if not exists TJOIN2STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@TJOIN2STAGE
PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' OVERWRITE INTO TABLE TJOIN1STAGE
PREHOOK: type: LOAD
#### A masked pattern was here ####
PREHOOK: Output: default@tjoin1stage
POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' OVERWRITE INTO TABLE TJOIN1STAGE
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@tjoin1stage
PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' OVERWRITE INTO TABLE TJOIN2STAGE
PREHOOK: type: LOAD
#### A masked pattern was here ####
PREHOOK: Output: default@tjoin2stage
POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' OVERWRITE INTO TABLE TJOIN2STAGE
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@tjoin2stage
PREHOOK: query: INSERT INTO TABLE TJOIN1 SELECT * from TJOIN1STAGE
PREHOOK: type: QUERY
PREHOOK: Input: default@tjoin1stage
PREHOOK: Output: default@tjoin1
POSTHOOK: query: INSERT INTO TABLE TJOIN1 SELECT * from TJOIN1STAGE
POSTHOOK: type: QUERY
POSTHOOK: Input: default@tjoin1stage
POSTHOOK: Output: default@tjoin1
POSTHOOK: Lineage: tjoin1.c1 SIMPLE [(tjoin1stage)tjoin1stage.FieldSchema(name:c1, type:int, comment:null), ]
POSTHOOK: Lineage: tjoin1.c2 EXPRESSION [(tjoin1stage)tjoin1stage.FieldSchema(name:c2, type:char(2), comment:null), ]
POSTHOOK: Lineage: tjoin1.rnum SIMPLE [(tjoin1stage)tjoin1stage.FieldSchema(name:rnum, type:int, comment:null), ]
_col0 _col1 _col2
PREHOOK: query: INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE
PREHOOK: type: QUERY
PREHOOK: Input: default@tjoin2stage
PREHOOK: Output: default@tjoin2
POSTHOOK: query: INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE
POSTHOOK: type: QUERY
POSTHOOK: Input: default@tjoin2stage
POSTHOOK: Output: default@tjoin2
POSTHOOK: Lineage: tjoin2.c1 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c1, type:int, comment:null), ]
POSTHOOK: Lineage: tjoin2.c2 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c2, type:char(2), comment:null), ]
POSTHOOK: Lineage: tjoin2.rnum SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:rnum, type:int, comment:null), ]
tjoin2stage.rnum tjoin2stage.c1 tjoin2stage.c2
PREHOOK: query: explain vectorization detail
select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 )
PREHOOK: type: QUERY
PREHOOK: Input: default@tjoin1
PREHOOK: Input: default@tjoin2
#### A masked pattern was here ####
POSTHOOK: query: explain vectorization detail
select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 )
POSTHOOK: type: QUERY
POSTHOOK: Input: default@tjoin1
POSTHOOK: Input: default@tjoin2
#### A masked pattern was here ####
Explain
PLAN VECTORIZATION:
enabled: false
enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false]
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Map 1 <- Map 2 (BROADCAST_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: tjoin1
Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: rnum (type: int), c1 (type: int), c2 (type: int), (c2 > 15) (type: boolean)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
Map Join Operator
condition map:
Left Outer Join 0 to 1
filter predicates:
0 {_col3}
1
keys:
0 _col1 (type: int)
1 _col0 (type: int)
outputColumnNames: _col0, _col1, _col2, _col5
input vertices:
1 Map 2
Statistics: Num rows: 4 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col5 (type: char(2))
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 4 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
Statistics: Num rows: 4 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: llap
LLAP IO: all inputs
Map 2
Map Operator Tree:
TableScan
alias: tjoin2
filterExpr: c1 is not null (type: boolean)
Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: c1 is not null (type: boolean)
Statistics: Num rows: 3 Data size: 270 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: c1 (type: int), c2 (type: char(2))
outputColumnNames: _col0, _col1
Statistics: Num rows: 3 Data size: 270 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 3 Data size: 270 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: char(2))
Execution mode: llap
LLAP IO: all inputs
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink
PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 )
PREHOOK: type: QUERY
PREHOOK: Input: default@tjoin1
PREHOOK: Input: default@tjoin2
#### A masked pattern was here ####
POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 )
POSTHOOK: type: QUERY
POSTHOOK: Input: default@tjoin1
POSTHOOK: Input: default@tjoin2
#### A masked pattern was here ####
tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2
0 10 15 NULL
1 20 25 NULL
2 NULL 50 NULL
PREHOOK: query: explain vectorization detail
select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 )
PREHOOK: type: QUERY
PREHOOK: Input: default@tjoin1
PREHOOK: Input: default@tjoin2
#### A masked pattern was here ####
POSTHOOK: query: explain vectorization detail
select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 )
POSTHOOK: type: QUERY
POSTHOOK: Input: default@tjoin1
POSTHOOK: Input: default@tjoin2
#### A masked pattern was here ####
Explain
PLAN VECTORIZATION:
enabled: false
enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false]
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Map 1 <- Map 2 (BROADCAST_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: tjoin1
Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: rnum (type: int), c1 (type: int), c2 (type: int), (c2 > 15) (type: boolean)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
Map Join Operator
condition map:
Left Outer Join 0 to 1
filter predicates:
0 {_col3}
1
keys:
0 _col1 (type: int)
1 _col0 (type: int)
outputColumnNames: _col0, _col1, _col2, _col5
input vertices:
1 Map 2
Statistics: Num rows: 4 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col5 (type: char(2))
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 4 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
Statistics: Num rows: 4 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: llap
LLAP IO: all inputs
Map 2
Map Operator Tree:
TableScan
alias: tjoin2
filterExpr: c1 is not null (type: boolean)
Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: c1 is not null (type: boolean)
Statistics: Num rows: 3 Data size: 270 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: c1 (type: int), c2 (type: char(2))
outputColumnNames: _col0, _col1
Statistics: Num rows: 3 Data size: 270 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 3 Data size: 270 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: char(2))
Execution mode: llap
LLAP IO: all inputs
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink
PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 )
PREHOOK: type: QUERY
PREHOOK: Input: default@tjoin1
PREHOOK: Input: default@tjoin2
#### A masked pattern was here ####
POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 )
POSTHOOK: type: QUERY
POSTHOOK: Input: default@tjoin1
POSTHOOK: Input: default@tjoin2
#### A masked pattern was here ####
tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2
0 10 15 NULL
1 20 25 NULL
2 NULL 50 NULL
PREHOOK: query: explain vectorization detail
select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 )
PREHOOK: type: QUERY
PREHOOK: Input: default@tjoin1
PREHOOK: Input: default@tjoin2
#### A masked pattern was here ####
POSTHOOK: query: explain vectorization detail
select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 )
POSTHOOK: type: QUERY
POSTHOOK: Input: default@tjoin1
POSTHOOK: Input: default@tjoin2
#### A masked pattern was here ####
Explain
PLAN VECTORIZATION:
enabled: true
enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Map 1 <- Map 2 (BROADCAST_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: tjoin1
Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
Select Operator
expressions: rnum (type: int), c1 (type: int), c2 (type: int), (c2 > 15) (type: boolean)
outputColumnNames: _col0, _col1, _col2, _col3
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 1, 2, 4]
selectExpressions: LongColGreaterLongScalar(col 2:int, val 15) -> 4:boolean
Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
Map Join Operator
condition map:
Left Outer Join 0 to 1
filter predicates:
0 {_col3}
1
keys:
0 _col1 (type: int)
1 _col0 (type: int)
Map Join Vectorization:
bigTableKeyExpressions: col 1:int
bigTableValueExpressions: col 0:int, col 1:int, col 2:int
className: VectorMapJoinOuterFilteredOperator
native: false
nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true
nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false
outputColumnNames: _col0, _col1, _col2, _col5
input vertices:
1 Map 2
Statistics: Num rows: 4 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col5 (type: char(2))
outputColumnNames: _col0, _col1, _col2, _col3
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 1, 2, 3]
Statistics: Num rows: 4 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
Statistics: Num rows: 4 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
inputFormatFeatureSupport: [DECIMAL_64]
featureSupportInUse: [DECIMAL_64]
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: false
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 3
includeColumns: [0, 1, 2]
dataColumns: rnum:int, c1:int, c2:int
partitionColumnCount: 0
scratchColumnTypeNames: [bigint, string]
Map 2
Map Operator Tree:
TableScan
alias: tjoin2
filterExpr: c1 is not null (type: boolean)
Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:char(2), 3:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
Filter Operator
Filter Vectorization:
className: VectorFilterOperator
native: true
predicateExpression: SelectColumnIsNotNull(col 1:int)
predicate: c1 is not null (type: boolean)
Statistics: Num rows: 3 Data size: 270 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: c1 (type: int), c2 (type: char(2))
outputColumnNames: _col0, _col1
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [1, 2]
Statistics: Num rows: 3 Data size: 270 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type: int)
Reduce Sink Vectorization:
className: VectorReduceSinkLongOperator
keyColumns: 1:int
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
valueColumns: 2:char(2)
Statistics: Num rows: 3 Data size: 270 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: char(2))
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
inputFormatFeatureSupport: [DECIMAL_64]
featureSupportInUse: [DECIMAL_64]
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 3
includeColumns: [1, 2]
dataColumns: rnum:int, c1:int, c2:char(2)
partitionColumnCount: 0
scratchColumnTypeNames: []
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink
PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 )
PREHOOK: type: QUERY
PREHOOK: Input: default@tjoin1
PREHOOK: Input: default@tjoin2
#### A masked pattern was here ####
POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 )
POSTHOOK: type: QUERY
POSTHOOK: Input: default@tjoin1
POSTHOOK: Input: default@tjoin2
#### A masked pattern was here ####
tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2
0 10 15 NULL
1 20 25 NULL
2 NULL 50 NULL
PREHOOK: query: explain vectorization detail
select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 )
PREHOOK: type: QUERY
PREHOOK: Input: default@tjoin1
PREHOOK: Input: default@tjoin2
#### A masked pattern was here ####
POSTHOOK: query: explain vectorization detail
select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 )
POSTHOOK: type: QUERY
POSTHOOK: Input: default@tjoin1
POSTHOOK: Input: default@tjoin2
#### A masked pattern was here ####
Explain
PLAN VECTORIZATION:
enabled: true
enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Map 1 <- Map 2 (BROADCAST_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: tjoin1
Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
Select Operator
expressions: rnum (type: int), c1 (type: int), c2 (type: int), (c2 > 15) (type: boolean)
outputColumnNames: _col0, _col1, _col2, _col3
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 1, 2, 4]
selectExpressions: LongColGreaterLongScalar(col 2:int, val 15) -> 4:boolean
Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
Map Join Operator
condition map:
Left Outer Join 0 to 1
filter predicates:
0 {_col3}
1
keys:
0 _col1 (type: int)
1 _col0 (type: int)
Map Join Vectorization:
bigTableKeyExpressions: col 1:int
bigTableValueExpressions: col 0:int, col 1:int, col 2:int
className: VectorMapJoinOuterFilteredOperator
native: false
nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true
nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false
outputColumnNames: _col0, _col1, _col2, _col5
input vertices:
1 Map 2
Statistics: Num rows: 4 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col5 (type: char(2))
outputColumnNames: _col0, _col1, _col2, _col3
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 1, 2, 3]
Statistics: Num rows: 4 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
Statistics: Num rows: 4 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
inputFormatFeatureSupport: [DECIMAL_64]
featureSupportInUse: [DECIMAL_64]
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: false
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 3
includeColumns: [0, 1, 2]
dataColumns: rnum:int, c1:int, c2:int
partitionColumnCount: 0
scratchColumnTypeNames: [bigint, string]
Map 2
Map Operator Tree:
TableScan
alias: tjoin2
filterExpr: c1 is not null (type: boolean)
Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:char(2), 3:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
Filter Operator
Filter Vectorization:
className: VectorFilterOperator
native: true
predicateExpression: SelectColumnIsNotNull(col 1:int)
predicate: c1 is not null (type: boolean)
Statistics: Num rows: 3 Data size: 270 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: c1 (type: int), c2 (type: char(2))
outputColumnNames: _col0, _col1
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [1, 2]
Statistics: Num rows: 3 Data size: 270 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type: int)
Reduce Sink Vectorization:
className: VectorReduceSinkLongOperator
keyColumns: 1:int
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
valueColumns: 2:char(2)
Statistics: Num rows: 3 Data size: 270 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: char(2))
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
inputFormatFeatureSupport: [DECIMAL_64]
featureSupportInUse: [DECIMAL_64]
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 3
includeColumns: [1, 2]
dataColumns: rnum:int, c1:int, c2:char(2)
partitionColumnCount: 0
scratchColumnTypeNames: []
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink
PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 )
PREHOOK: type: QUERY
PREHOOK: Input: default@tjoin1
PREHOOK: Input: default@tjoin2
#### A masked pattern was here ####
POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 )
POSTHOOK: type: QUERY
POSTHOOK: Input: default@tjoin1
POSTHOOK: Input: default@tjoin2
#### A masked pattern was here ####
tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2
0 10 15 NULL
1 20 25 NULL
2 NULL 50 NULL
PREHOOK: query: explain vectorization detail
select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 )
PREHOOK: type: QUERY
PREHOOK: Input: default@tjoin1
PREHOOK: Input: default@tjoin2
#### A masked pattern was here ####
POSTHOOK: query: explain vectorization detail
select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 )
POSTHOOK: type: QUERY
POSTHOOK: Input: default@tjoin1
POSTHOOK: Input: default@tjoin2
#### A masked pattern was here ####
Explain
PLAN VECTORIZATION:
enabled: true
enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Map 1 <- Map 2 (BROADCAST_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: tjoin1
Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
Select Operator
expressions: rnum (type: int), c1 (type: int), c2 (type: int), (c2 > 15) (type: boolean)
outputColumnNames: _col0, _col1, _col2, _col3
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 1, 2, 4]
selectExpressions: LongColGreaterLongScalar(col 2:int, val 15) -> 4:boolean
Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
Map Join Operator
condition map:
Left Outer Join 0 to 1
filter predicates:
0 {_col3}
1
keys:
0 _col1 (type: int)
1 _col0 (type: int)
Map Join Vectorization:
bigTableFilterExpressions: SelectColumnIsTrue(col 4:boolean)
bigTableKeyColumns: 1:int
bigTableRetainColumnNums: [0, 1, 2]
bigTableValueColumns: 0:int, 1:int, 2:int
className: VectorMapJoinOuterLongOperator
native: true
nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true
projectedOutput: 0:int, 1:int, 2:int, 5:char(2)
smallTableValueMapping: 5:char(2)
hashTableImplementationType: OPTIMIZED
outputColumnNames: _col0, _col1, _col2, _col5
input vertices:
1 Map 2
Statistics: Num rows: 4 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col5 (type: char(2))
outputColumnNames: _col0, _col1, _col2, _col3
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 1, 2, 5]
Statistics: Num rows: 4 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
Statistics: Num rows: 4 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
inputFormatFeatureSupport: [DECIMAL_64]
featureSupportInUse: [DECIMAL_64]
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: false
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 3
includeColumns: [0, 1, 2]
dataColumns: rnum:int, c1:int, c2:int
partitionColumnCount: 0
scratchColumnTypeNames: [bigint, string]
Map 2
Map Operator Tree:
TableScan
alias: tjoin2
filterExpr: c1 is not null (type: boolean)
Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:char(2), 3:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
Filter Operator
Filter Vectorization:
className: VectorFilterOperator
native: true
predicateExpression: SelectColumnIsNotNull(col 1:int)
predicate: c1 is not null (type: boolean)
Statistics: Num rows: 3 Data size: 270 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: c1 (type: int), c2 (type: char(2))
outputColumnNames: _col0, _col1
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [1, 2]
Statistics: Num rows: 3 Data size: 270 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type: int)
Reduce Sink Vectorization:
className: VectorReduceSinkLongOperator
keyColumns: 1:int
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
valueColumns: 2:char(2)
Statistics: Num rows: 3 Data size: 270 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: char(2))
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
inputFormatFeatureSupport: [DECIMAL_64]
featureSupportInUse: [DECIMAL_64]
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 3
includeColumns: [1, 2]
dataColumns: rnum:int, c1:int, c2:char(2)
partitionColumnCount: 0
scratchColumnTypeNames: []
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink
PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 )
PREHOOK: type: QUERY
PREHOOK: Input: default@tjoin1
PREHOOK: Input: default@tjoin2
#### A masked pattern was here ####
POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 )
POSTHOOK: type: QUERY
POSTHOOK: Input: default@tjoin1
POSTHOOK: Input: default@tjoin2
#### A masked pattern was here ####
tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2
0 10 15 NULL
1 20 25 NULL
2 NULL 50 NULL
PREHOOK: query: explain vectorization detail
select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 )
PREHOOK: type: QUERY
PREHOOK: Input: default@tjoin1
PREHOOK: Input: default@tjoin2
#### A masked pattern was here ####
POSTHOOK: query: explain vectorization detail
select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 )
POSTHOOK: type: QUERY
POSTHOOK: Input: default@tjoin1
POSTHOOK: Input: default@tjoin2
#### A masked pattern was here ####
Explain
PLAN VECTORIZATION:
enabled: true
enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Map 1 <- Map 2 (BROADCAST_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: tjoin1
Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
Select Operator
expressions: rnum (type: int), c1 (type: int), c2 (type: int), (c2 > 15) (type: boolean)
outputColumnNames: _col0, _col1, _col2, _col3
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 1, 2, 4]
selectExpressions: LongColGreaterLongScalar(col 2:int, val 15) -> 4:boolean
Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
Map Join Operator
condition map:
Left Outer Join 0 to 1
filter predicates:
0 {_col3}
1
keys:
0 _col1 (type: int)
1 _col0 (type: int)
Map Join Vectorization:
bigTableFilterExpressions: SelectColumnIsTrue(col 4:boolean)
bigTableKeyColumns: 1:int
bigTableRetainColumnNums: [0, 1, 2]
bigTableValueColumns: 0:int, 1:int, 2:int
className: VectorMapJoinOuterLongOperator
native: true
nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true
projectedOutput: 0:int, 1:int, 2:int, 5:char(2)
smallTableValueMapping: 5:char(2)
hashTableImplementationType: OPTIMIZED
outputColumnNames: _col0, _col1, _col2, _col5
input vertices:
1 Map 2
Statistics: Num rows: 4 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col5 (type: char(2))
outputColumnNames: _col0, _col1, _col2, _col3
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 1, 2, 5]
Statistics: Num rows: 4 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
Statistics: Num rows: 4 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
inputFormatFeatureSupport: [DECIMAL_64]
featureSupportInUse: [DECIMAL_64]
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: false
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 3
includeColumns: [0, 1, 2]
dataColumns: rnum:int, c1:int, c2:int
partitionColumnCount: 0
scratchColumnTypeNames: [bigint, string]
Map 2
Map Operator Tree:
TableScan
alias: tjoin2
filterExpr: c1 is not null (type: boolean)
Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:char(2), 3:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
Filter Operator
Filter Vectorization:
className: VectorFilterOperator
native: true
predicateExpression: SelectColumnIsNotNull(col 1:int)
predicate: c1 is not null (type: boolean)
Statistics: Num rows: 3 Data size: 270 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: c1 (type: int), c2 (type: char(2))
outputColumnNames: _col0, _col1
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [1, 2]
Statistics: Num rows: 3 Data size: 270 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type: int)
Reduce Sink Vectorization:
className: VectorReduceSinkLongOperator
keyColumns: 1:int
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
valueColumns: 2:char(2)
Statistics: Num rows: 3 Data size: 270 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: char(2))
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
inputFormatFeatureSupport: [DECIMAL_64]
featureSupportInUse: [DECIMAL_64]
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 3
includeColumns: [1, 2]
dataColumns: rnum:int, c1:int, c2:char(2)
partitionColumnCount: 0
scratchColumnTypeNames: []
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink
PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 )
PREHOOK: type: QUERY
PREHOOK: Input: default@tjoin1
PREHOOK: Input: default@tjoin2
#### A masked pattern was here ####
POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 )
POSTHOOK: type: QUERY
POSTHOOK: Input: default@tjoin1
POSTHOOK: Input: default@tjoin2
#### A masked pattern was here ####
tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2
0 10 15 NULL
1 20 25 NULL
2 NULL 50 NULL