| PREHOOK: query: CREATE TABLE orc_table_1(v1 STRING, a INT) STORED AS ORC |
| PREHOOK: type: CREATETABLE |
| PREHOOK: Output: database:default |
| PREHOOK: Output: default@orc_table_1 |
| POSTHOOK: query: CREATE TABLE orc_table_1(v1 STRING, a INT) STORED AS ORC |
| POSTHOOK: type: CREATETABLE |
| POSTHOOK: Output: database:default |
| POSTHOOK: Output: default@orc_table_1 |
| PREHOOK: query: CREATE TABLE orc_table_2(c INT, v2 STRING) STORED AS ORC |
| PREHOOK: type: CREATETABLE |
| PREHOOK: Output: database:default |
| PREHOOK: Output: default@orc_table_2 |
| POSTHOOK: query: CREATE TABLE orc_table_2(c INT, v2 STRING) STORED AS ORC |
| POSTHOOK: type: CREATETABLE |
| POSTHOOK: Output: database:default |
| POSTHOOK: Output: default@orc_table_2 |
| PREHOOK: query: insert into table orc_table_1 values ("<null1>", null),("one", 1),("one", 1),("two", 2),("three", 3),("<null2>", null) |
| PREHOOK: type: QUERY |
| PREHOOK: Input: _dummy_database@_dummy_table |
| PREHOOK: Output: default@orc_table_1 |
| POSTHOOK: query: insert into table orc_table_1 values ("<null1>", null),("one", 1),("one", 1),("two", 2),("three", 3),("<null2>", null) |
| POSTHOOK: type: QUERY |
| POSTHOOK: Input: _dummy_database@_dummy_table |
| POSTHOOK: Output: default@orc_table_1 |
| POSTHOOK: Lineage: orc_table_1.a SCRIPT [] |
| POSTHOOK: Lineage: orc_table_1.v1 SCRIPT [] |
| PREHOOK: query: insert into table orc_table_2 values (0, "ZERO"),(2, "TWO"), (3, "THREE"),(null, "<NULL1>"),(4, "FOUR"),(null, "<NULL2>") |
| PREHOOK: type: QUERY |
| PREHOOK: Input: _dummy_database@_dummy_table |
| PREHOOK: Output: default@orc_table_2 |
| POSTHOOK: query: insert into table orc_table_2 values (0, "ZERO"),(2, "TWO"), (3, "THREE"),(null, "<NULL1>"),(4, "FOUR"),(null, "<NULL2>") |
| POSTHOOK: type: QUERY |
| POSTHOOK: Input: _dummy_database@_dummy_table |
| POSTHOOK: Output: default@orc_table_2 |
| POSTHOOK: Lineage: orc_table_2.c SCRIPT [] |
| POSTHOOK: Lineage: orc_table_2.v2 SCRIPT [] |
| PREHOOK: query: select * from orc_table_1 |
| PREHOOK: type: QUERY |
| PREHOOK: Input: default@orc_table_1 |
| #### A masked pattern was here #### |
| POSTHOOK: query: select * from orc_table_1 |
| POSTHOOK: type: QUERY |
| POSTHOOK: Input: default@orc_table_1 |
| #### A masked pattern was here #### |
| <null1> NULL |
| <null2> NULL |
| one 1 |
| one 1 |
| three 3 |
| two 2 |
| PREHOOK: query: select * from orc_table_2 |
| PREHOOK: type: QUERY |
| PREHOOK: Input: default@orc_table_2 |
| #### A masked pattern was here #### |
| POSTHOOK: query: select * from orc_table_2 |
| POSTHOOK: type: QUERY |
| POSTHOOK: Input: default@orc_table_2 |
| #### A masked pattern was here #### |
| 0 ZERO |
| 2 TWO |
| 3 THREE |
| 4 FOUR |
| NULL <NULL1> |
| NULL <NULL2> |
| PREHOOK: query: explain vectorization detail |
| select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 left outer join orc_table_2 t2 on t1.a = t2.c |
| PREHOOK: type: QUERY |
| PREHOOK: Input: default@orc_table_1 |
| PREHOOK: Input: default@orc_table_2 |
| #### A masked pattern was here #### |
| POSTHOOK: query: explain vectorization detail |
| select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 left outer join orc_table_2 t2 on t1.a = t2.c |
| POSTHOOK: type: QUERY |
| POSTHOOK: Input: default@orc_table_1 |
| POSTHOOK: Input: default@orc_table_2 |
| #### A masked pattern was here #### |
| PLAN VECTORIZATION: |
| enabled: true |
| enabledConditionsMet: [hive.vectorized.execution.enabled IS true] |
| |
| STAGE DEPENDENCIES: |
| Stage-1 is a root stage |
| Stage-0 depends on stages: Stage-1 |
| |
| STAGE PLANS: |
| Stage: Stage-1 |
| Tez |
| #### A masked pattern was here #### |
| Edges: |
| Map 1 <- Map 2 (BROADCAST_EDGE) |
| #### A masked pattern was here #### |
| Vertices: |
| Map 1 |
| Map Operator Tree: |
| TableScan |
| alias: t1 |
| Statistics: Num rows: 6 Data size: 554 Basic stats: COMPLETE Column stats: COMPLETE |
| TableScan Vectorization: |
| native: true |
| vectorizationSchemaColumns: [0:v1:string, 1:a:int, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] |
| Select Operator |
| expressions: v1 (type: string), a (type: int) |
| outputColumnNames: _col0, _col1 |
| Select Vectorization: |
| className: VectorSelectOperator |
| native: true |
| projectedOutputColumnNums: [0, 1] |
| Statistics: Num rows: 6 Data size: 554 Basic stats: COMPLETE Column stats: COMPLETE |
| Map Join Operator |
| condition map: |
| Left Outer Join 0 to 1 |
| keys: |
| 0 _col1 (type: int) |
| 1 _col0 (type: int) |
| Map Join Vectorization: |
| bigTableKeyColumns: 1:int |
| bigTableRetainColumnNums: [0, 1] |
| bigTableValueColumns: 0:string, 1:int |
| className: VectorMapJoinOuterLongOperator |
| native: true |
| nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true |
| outerSmallTableKeyMapping: 1 -> 3 |
| projectedOutput: 0:string, 1:int, 3:int, 4:string |
| smallTableValueMapping: 4:string |
| hashTableImplementationType: OPTIMIZED |
| outputColumnNames: _col0, _col1, _col2, _col3 |
| input vertices: |
| 1 Map 2 |
| Statistics: Num rows: 9 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE |
| File Output Operator |
| compressed: false |
| File Sink Vectorization: |
| className: VectorFileSinkOperator |
| native: false |
| Statistics: Num rows: 9 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE |
| table: |
| input format: org.apache.hadoop.mapred.SequenceFileInputFormat |
| output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat |
| serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe |
| Execution mode: vectorized, llap |
| LLAP IO: all inputs |
| Map Vectorization: |
| enabled: true |
| enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true |
| inputFormatFeatureSupport: [DECIMAL_64] |
| featureSupportInUse: [DECIMAL_64] |
| inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat |
| allNative: false |
| usesVectorUDFAdaptor: false |
| vectorized: true |
| rowBatchContext: |
| dataColumnCount: 2 |
| includeColumns: [0, 1] |
| dataColumns: v1:string, a:int |
| partitionColumnCount: 0 |
| scratchColumnTypeNames: [bigint, string] |
| Map 2 |
| Map Operator Tree: |
| TableScan |
| alias: t2 |
| filterExpr: c is not null (type: boolean) |
| Statistics: Num rows: 6 Data size: 554 Basic stats: COMPLETE Column stats: COMPLETE |
| TableScan Vectorization: |
| native: true |
| vectorizationSchemaColumns: [0:c:int, 1:v2:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] |
| Filter Operator |
| Filter Vectorization: |
| className: VectorFilterOperator |
| native: true |
| predicateExpression: SelectColumnIsNotNull(col 0:int) |
| predicate: c is not null (type: boolean) |
| Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE |
| Select Operator |
| expressions: c (type: int), v2 (type: string) |
| outputColumnNames: _col0, _col1 |
| Select Vectorization: |
| className: VectorSelectOperator |
| native: true |
| projectedOutputColumnNums: [0, 1] |
| Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE |
| Reduce Output Operator |
| key expressions: _col0 (type: int) |
| null sort order: z |
| sort order: + |
| Map-reduce partition columns: _col0 (type: int) |
| Reduce Sink Vectorization: |
| className: VectorReduceSinkLongOperator |
| keyColumns: 0:int |
| native: true |
| nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true |
| valueColumns: 1:string |
| Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE |
| value expressions: _col1 (type: string) |
| Execution mode: vectorized, llap |
| LLAP IO: all inputs |
| Map Vectorization: |
| enabled: true |
| enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true |
| inputFormatFeatureSupport: [DECIMAL_64] |
| featureSupportInUse: [DECIMAL_64] |
| inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat |
| allNative: true |
| usesVectorUDFAdaptor: false |
| vectorized: true |
| rowBatchContext: |
| dataColumnCount: 2 |
| includeColumns: [0, 1] |
| dataColumns: c:int, v2:string |
| partitionColumnCount: 0 |
| scratchColumnTypeNames: [] |
| |
| Stage: Stage-0 |
| Fetch Operator |
| limit: -1 |
| Processor Tree: |
| ListSink |
| |
| PREHOOK: query: select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 left outer join orc_table_2 t2 on t1.a = t2.c |
| PREHOOK: type: QUERY |
| PREHOOK: Input: default@orc_table_1 |
| PREHOOK: Input: default@orc_table_2 |
| #### A masked pattern was here #### |
| POSTHOOK: query: select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 left outer join orc_table_2 t2 on t1.a = t2.c |
| POSTHOOK: type: QUERY |
| POSTHOOK: Input: default@orc_table_1 |
| POSTHOOK: Input: default@orc_table_2 |
| #### A masked pattern was here #### |
| <null1> NULL NULL NULL |
| <null2> NULL NULL NULL |
| one 1 NULL NULL |
| one 1 NULL NULL |
| three 3 3 THREE |
| two 2 2 TWO |
| PREHOOK: query: explain vectorization detail |
| select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 right outer join orc_table_2 t2 on t1.a = t2.c |
| PREHOOK: type: QUERY |
| PREHOOK: Input: default@orc_table_1 |
| PREHOOK: Input: default@orc_table_2 |
| #### A masked pattern was here #### |
| POSTHOOK: query: explain vectorization detail |
| select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 right outer join orc_table_2 t2 on t1.a = t2.c |
| POSTHOOK: type: QUERY |
| POSTHOOK: Input: default@orc_table_1 |
| POSTHOOK: Input: default@orc_table_2 |
| #### A masked pattern was here #### |
| PLAN VECTORIZATION: |
| enabled: true |
| enabledConditionsMet: [hive.vectorized.execution.enabled IS true] |
| |
| STAGE DEPENDENCIES: |
| Stage-1 is a root stage |
| Stage-0 depends on stages: Stage-1 |
| |
| STAGE PLANS: |
| Stage: Stage-1 |
| Tez |
| #### A masked pattern was here #### |
| Edges: |
| Map 2 <- Map 1 (BROADCAST_EDGE) |
| #### A masked pattern was here #### |
| Vertices: |
| Map 1 |
| Map Operator Tree: |
| TableScan |
| alias: t1 |
| filterExpr: a is not null (type: boolean) |
| Statistics: Num rows: 6 Data size: 554 Basic stats: COMPLETE Column stats: COMPLETE |
| TableScan Vectorization: |
| native: true |
| vectorizationSchemaColumns: [0:v1:string, 1:a:int, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] |
| Filter Operator |
| Filter Vectorization: |
| className: VectorFilterOperator |
| native: true |
| predicateExpression: SelectColumnIsNotNull(col 1:int) |
| predicate: a is not null (type: boolean) |
| Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE |
| Select Operator |
| expressions: v1 (type: string), a (type: int) |
| outputColumnNames: _col0, _col1 |
| Select Vectorization: |
| className: VectorSelectOperator |
| native: true |
| projectedOutputColumnNums: [0, 1] |
| Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE |
| Reduce Output Operator |
| key expressions: _col1 (type: int) |
| null sort order: z |
| sort order: + |
| Map-reduce partition columns: _col1 (type: int) |
| Reduce Sink Vectorization: |
| className: VectorReduceSinkLongOperator |
| keyColumns: 1:int |
| native: true |
| nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true |
| valueColumns: 0:string |
| Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE |
| value expressions: _col0 (type: string) |
| Execution mode: vectorized, llap |
| LLAP IO: all inputs |
| Map Vectorization: |
| enabled: true |
| enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true |
| inputFormatFeatureSupport: [DECIMAL_64] |
| featureSupportInUse: [DECIMAL_64] |
| inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat |
| allNative: true |
| usesVectorUDFAdaptor: false |
| vectorized: true |
| rowBatchContext: |
| dataColumnCount: 2 |
| includeColumns: [0, 1] |
| dataColumns: v1:string, a:int |
| partitionColumnCount: 0 |
| scratchColumnTypeNames: [] |
| Map 2 |
| Map Operator Tree: |
| TableScan |
| alias: t2 |
| Statistics: Num rows: 6 Data size: 554 Basic stats: COMPLETE Column stats: COMPLETE |
| TableScan Vectorization: |
| native: true |
| vectorizationSchemaColumns: [0:c:int, 1:v2:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] |
| Select Operator |
| expressions: c (type: int), v2 (type: string) |
| outputColumnNames: _col0, _col1 |
| Select Vectorization: |
| className: VectorSelectOperator |
| native: true |
| projectedOutputColumnNums: [0, 1] |
| Statistics: Num rows: 6 Data size: 554 Basic stats: COMPLETE Column stats: COMPLETE |
| Map Join Operator |
| condition map: |
| Right Outer Join 0 to 1 |
| keys: |
| 0 _col1 (type: int) |
| 1 _col0 (type: int) |
| Map Join Vectorization: |
| bigTableKeyColumns: 0:int |
| bigTableRetainColumnNums: [0, 1] |
| bigTableValueColumns: 0:int, 1:string |
| className: VectorMapJoinOuterLongOperator |
| native: true |
| nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true |
| outerSmallTableKeyMapping: 0 -> 4 |
| projectedOutput: 3:string, 4:int, 0:int, 1:string |
| smallTableValueMapping: 3:string |
| hashTableImplementationType: OPTIMIZED |
| outputColumnNames: _col0, _col1, _col2, _col3 |
| input vertices: |
| 0 Map 1 |
| Statistics: Num rows: 9 Data size: 1484 Basic stats: COMPLETE Column stats: COMPLETE |
| File Output Operator |
| compressed: false |
| File Sink Vectorization: |
| className: VectorFileSinkOperator |
| native: false |
| Statistics: Num rows: 9 Data size: 1484 Basic stats: COMPLETE Column stats: COMPLETE |
| table: |
| input format: org.apache.hadoop.mapred.SequenceFileInputFormat |
| output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat |
| serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe |
| Execution mode: vectorized, llap |
| LLAP IO: all inputs |
| Map Vectorization: |
| enabled: true |
| enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true |
| inputFormatFeatureSupport: [DECIMAL_64] |
| featureSupportInUse: [DECIMAL_64] |
| inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat |
| allNative: false |
| usesVectorUDFAdaptor: false |
| vectorized: true |
| rowBatchContext: |
| dataColumnCount: 2 |
| includeColumns: [0, 1] |
| dataColumns: c:int, v2:string |
| partitionColumnCount: 0 |
| scratchColumnTypeNames: [string, bigint] |
| |
| Stage: Stage-0 |
| Fetch Operator |
| limit: -1 |
| Processor Tree: |
| ListSink |
| |
| PREHOOK: query: select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 right outer join orc_table_2 t2 on t1.a = t2.c |
| PREHOOK: type: QUERY |
| PREHOOK: Input: default@orc_table_1 |
| PREHOOK: Input: default@orc_table_2 |
| #### A masked pattern was here #### |
| POSTHOOK: query: select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 right outer join orc_table_2 t2 on t1.a = t2.c |
| POSTHOOK: type: QUERY |
| POSTHOOK: Input: default@orc_table_1 |
| POSTHOOK: Input: default@orc_table_2 |
| #### A masked pattern was here #### |
| NULL NULL 0 ZERO |
| NULL NULL 4 FOUR |
| NULL NULL NULL <NULL1> |
| NULL NULL NULL <NULL2> |
| three 3 3 THREE |
| two 2 2 TWO |