blob: 1511b4a404fb15d3b0f9bc2501d0372f5ad71f75 [file] [log] [blame]
PREHOOK: query: CREATE TABLE orc_table_1(v1 STRING, a INT) STORED AS ORC
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@orc_table_1
POSTHOOK: query: CREATE TABLE orc_table_1(v1 STRING, a INT) STORED AS ORC
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@orc_table_1
PREHOOK: query: CREATE TABLE orc_table_2(c INT, v2 STRING) STORED AS ORC
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@orc_table_2
POSTHOOK: query: CREATE TABLE orc_table_2(c INT, v2 STRING) STORED AS ORC
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@orc_table_2
PREHOOK: query: insert into table orc_table_1 values ("<null1>", null),("one", 1),("one", 1),("two", 2),("three", 3),("<null2>", null)
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@orc_table_1
POSTHOOK: query: insert into table orc_table_1 values ("<null1>", null),("one", 1),("one", 1),("two", 2),("three", 3),("<null2>", null)
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@orc_table_1
POSTHOOK: Lineage: orc_table_1.a SCRIPT []
POSTHOOK: Lineage: orc_table_1.v1 SCRIPT []
PREHOOK: query: insert into table orc_table_2 values (0, "ZERO"),(2, "TWO"), (3, "THREE"),(null, "<NULL1>"),(4, "FOUR"),(null, "<NULL2>")
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@orc_table_2
POSTHOOK: query: insert into table orc_table_2 values (0, "ZERO"),(2, "TWO"), (3, "THREE"),(null, "<NULL1>"),(4, "FOUR"),(null, "<NULL2>")
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@orc_table_2
POSTHOOK: Lineage: orc_table_2.c SCRIPT []
POSTHOOK: Lineage: orc_table_2.v2 SCRIPT []
PREHOOK: query: select * from orc_table_1
PREHOOK: type: QUERY
PREHOOK: Input: default@orc_table_1
#### A masked pattern was here ####
POSTHOOK: query: select * from orc_table_1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@orc_table_1
#### A masked pattern was here ####
<null1> NULL
<null2> NULL
one 1
one 1
three 3
two 2
PREHOOK: query: select * from orc_table_2
PREHOOK: type: QUERY
PREHOOK: Input: default@orc_table_2
#### A masked pattern was here ####
POSTHOOK: query: select * from orc_table_2
POSTHOOK: type: QUERY
POSTHOOK: Input: default@orc_table_2
#### A masked pattern was here ####
0 ZERO
2 TWO
3 THREE
4 FOUR
NULL <NULL1>
NULL <NULL2>
PREHOOK: query: explain vectorization detail
select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 left outer join orc_table_2 t2 on t1.a = t2.c
PREHOOK: type: QUERY
PREHOOK: Input: default@orc_table_1
PREHOOK: Input: default@orc_table_2
#### A masked pattern was here ####
POSTHOOK: query: explain vectorization detail
select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 left outer join orc_table_2 t2 on t1.a = t2.c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@orc_table_1
POSTHOOK: Input: default@orc_table_2
#### A masked pattern was here ####
PLAN VECTORIZATION:
enabled: true
enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Map 1 <- Map 2 (BROADCAST_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: t1
Statistics: Num rows: 6 Data size: 554 Basic stats: COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:v1:string, 1:a:int, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
Select Operator
expressions: v1 (type: string), a (type: int)
outputColumnNames: _col0, _col1
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 1]
Statistics: Num rows: 6 Data size: 554 Basic stats: COMPLETE Column stats: COMPLETE
Map Join Operator
condition map:
Left Outer Join 0 to 1
keys:
0 _col1 (type: int)
1 _col0 (type: int)
Map Join Vectorization:
bigTableKeyColumns: 1:int
bigTableRetainColumnNums: [0, 1]
bigTableValueColumns: 0:string, 1:int
className: VectorMapJoinOuterLongOperator
native: true
nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true
outerSmallTableKeyMapping: 1 -> 3
projectedOutput: 0:string, 1:int, 3:int, 4:string
smallTableValueMapping: 4:string
hashTableImplementationType: OPTIMIZED
outputColumnNames: _col0, _col1, _col2, _col3
input vertices:
1 Map 2
Statistics: Num rows: 9 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
Statistics: Num rows: 9 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
inputFormatFeatureSupport: [DECIMAL_64]
featureSupportInUse: [DECIMAL_64]
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: false
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 2
includeColumns: [0, 1]
dataColumns: v1:string, a:int
partitionColumnCount: 0
scratchColumnTypeNames: [bigint, string]
Map 2
Map Operator Tree:
TableScan
alias: t2
filterExpr: c is not null (type: boolean)
Statistics: Num rows: 6 Data size: 554 Basic stats: COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:c:int, 1:v2:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
Filter Operator
Filter Vectorization:
className: VectorFilterOperator
native: true
predicateExpression: SelectColumnIsNotNull(col 0:int)
predicate: c is not null (type: boolean)
Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: c (type: int), v2 (type: string)
outputColumnNames: _col0, _col1
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 1]
Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type: int)
Reduce Sink Vectorization:
className: VectorReduceSinkLongOperator
keyColumns: 0:int
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
valueColumns: 1:string
Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: string)
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
inputFormatFeatureSupport: [DECIMAL_64]
featureSupportInUse: [DECIMAL_64]
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 2
includeColumns: [0, 1]
dataColumns: c:int, v2:string
partitionColumnCount: 0
scratchColumnTypeNames: []
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink
PREHOOK: query: select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 left outer join orc_table_2 t2 on t1.a = t2.c
PREHOOK: type: QUERY
PREHOOK: Input: default@orc_table_1
PREHOOK: Input: default@orc_table_2
#### A masked pattern was here ####
POSTHOOK: query: select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 left outer join orc_table_2 t2 on t1.a = t2.c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@orc_table_1
POSTHOOK: Input: default@orc_table_2
#### A masked pattern was here ####
<null1> NULL NULL NULL
<null2> NULL NULL NULL
one 1 NULL NULL
one 1 NULL NULL
three 3 3 THREE
two 2 2 TWO
PREHOOK: query: explain vectorization detail
select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 right outer join orc_table_2 t2 on t1.a = t2.c
PREHOOK: type: QUERY
PREHOOK: Input: default@orc_table_1
PREHOOK: Input: default@orc_table_2
#### A masked pattern was here ####
POSTHOOK: query: explain vectorization detail
select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 right outer join orc_table_2 t2 on t1.a = t2.c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@orc_table_1
POSTHOOK: Input: default@orc_table_2
#### A masked pattern was here ####
PLAN VECTORIZATION:
enabled: true
enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Map 2 <- Map 1 (BROADCAST_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: t1
filterExpr: a is not null (type: boolean)
Statistics: Num rows: 6 Data size: 554 Basic stats: COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:v1:string, 1:a:int, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
Filter Operator
Filter Vectorization:
className: VectorFilterOperator
native: true
predicateExpression: SelectColumnIsNotNull(col 1:int)
predicate: a is not null (type: boolean)
Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: v1 (type: string), a (type: int)
outputColumnNames: _col0, _col1
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 1]
Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col1 (type: int)
null sort order: z
sort order: +
Map-reduce partition columns: _col1 (type: int)
Reduce Sink Vectorization:
className: VectorReduceSinkLongOperator
keyColumns: 1:int
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
valueColumns: 0:string
Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: string)
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
inputFormatFeatureSupport: [DECIMAL_64]
featureSupportInUse: [DECIMAL_64]
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 2
includeColumns: [0, 1]
dataColumns: v1:string, a:int
partitionColumnCount: 0
scratchColumnTypeNames: []
Map 2
Map Operator Tree:
TableScan
alias: t2
Statistics: Num rows: 6 Data size: 554 Basic stats: COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:c:int, 1:v2:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
Select Operator
expressions: c (type: int), v2 (type: string)
outputColumnNames: _col0, _col1
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 1]
Statistics: Num rows: 6 Data size: 554 Basic stats: COMPLETE Column stats: COMPLETE
Map Join Operator
condition map:
Right Outer Join 0 to 1
keys:
0 _col1 (type: int)
1 _col0 (type: int)
Map Join Vectorization:
bigTableKeyColumns: 0:int
bigTableRetainColumnNums: [0, 1]
bigTableValueColumns: 0:int, 1:string
className: VectorMapJoinOuterLongOperator
native: true
nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true
outerSmallTableKeyMapping: 0 -> 4
projectedOutput: 3:string, 4:int, 0:int, 1:string
smallTableValueMapping: 3:string
hashTableImplementationType: OPTIMIZED
outputColumnNames: _col0, _col1, _col2, _col3
input vertices:
0 Map 1
Statistics: Num rows: 9 Data size: 1484 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
Statistics: Num rows: 9 Data size: 1484 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
inputFormatFeatureSupport: [DECIMAL_64]
featureSupportInUse: [DECIMAL_64]
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: false
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 2
includeColumns: [0, 1]
dataColumns: c:int, v2:string
partitionColumnCount: 0
scratchColumnTypeNames: [string, bigint]
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink
PREHOOK: query: select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 right outer join orc_table_2 t2 on t1.a = t2.c
PREHOOK: type: QUERY
PREHOOK: Input: default@orc_table_1
PREHOOK: Input: default@orc_table_2
#### A masked pattern was here ####
POSTHOOK: query: select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 right outer join orc_table_2 t2 on t1.a = t2.c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@orc_table_1
POSTHOOK: Input: default@orc_table_2
#### A masked pattern was here ####
NULL NULL 0 ZERO
NULL NULL 4 FOUR
NULL NULL NULL <NULL1>
NULL NULL NULL <NULL2>
three 3 3 THREE
two 2 2 TWO