| PREHOOK: query: create table test_bucket(age int, name string, dept string) clustered by (age, name) sorted by (age asc, name asc) into 2 buckets stored as ORC |
| PREHOOK: type: CREATETABLE |
| PREHOOK: Output: database:default |
| PREHOOK: Output: default@test_bucket |
| POSTHOOK: query: create table test_bucket(age int, name string, dept string) clustered by (age, name) sorted by (age asc, name asc) into 2 buckets stored as ORC |
| POSTHOOK: type: CREATETABLE |
| POSTHOOK: Output: database:default |
| POSTHOOK: Output: default@test_bucket |
| PREHOOK: query: insert into test_bucket values (1, 'user1', 'dept1'), ( 2, 'user2' , 'dept2') |
| PREHOOK: type: QUERY |
| PREHOOK: Input: _dummy_database@_dummy_table |
| PREHOOK: Output: default@test_bucket |
| POSTHOOK: query: insert into test_bucket values (1, 'user1', 'dept1'), ( 2, 'user2' , 'dept2') |
| POSTHOOK: type: QUERY |
| POSTHOOK: Input: _dummy_database@_dummy_table |
| POSTHOOK: Output: default@test_bucket |
| POSTHOOK: Lineage: test_bucket.age SCRIPT [] |
| POSTHOOK: Lineage: test_bucket.dept SCRIPT [] |
| POSTHOOK: Lineage: test_bucket.name SCRIPT [] |
| PREHOOK: query: insert into test_bucket values (1, 'user1', 'dept1'), ( 2, 'user2' , 'dept2') |
| PREHOOK: type: QUERY |
| PREHOOK: Input: _dummy_database@_dummy_table |
| PREHOOK: Output: default@test_bucket |
| POSTHOOK: query: insert into test_bucket values (1, 'user1', 'dept1'), ( 2, 'user2' , 'dept2') |
| POSTHOOK: type: QUERY |
| POSTHOOK: Input: _dummy_database@_dummy_table |
| POSTHOOK: Output: default@test_bucket |
| POSTHOOK: Lineage: test_bucket.age SCRIPT [] |
| POSTHOOK: Lineage: test_bucket.dept SCRIPT [] |
| POSTHOOK: Lineage: test_bucket.name SCRIPT [] |
| PREHOOK: query: explain vectorization detail select age, name, count(*) from test_bucket group by age, name having count(*) > 1 |
| PREHOOK: type: QUERY |
| PREHOOK: Input: default@test_bucket |
| #### A masked pattern was here #### |
| POSTHOOK: query: explain vectorization detail select age, name, count(*) from test_bucket group by age, name having count(*) > 1 |
| POSTHOOK: type: QUERY |
| POSTHOOK: Input: default@test_bucket |
| #### A masked pattern was here #### |
| PLAN VECTORIZATION: |
| enabled: true |
| enabledConditionsMet: [hive.vectorized.execution.enabled IS true] |
| |
| STAGE DEPENDENCIES: |
| Stage-1 is a root stage |
| Stage-0 depends on stages: Stage-1 |
| |
| STAGE PLANS: |
| Stage: Stage-1 |
| Tez |
| #### A masked pattern was here #### |
| Edges: |
| Reducer 2 <- Map 1 (SIMPLE_EDGE) |
| #### A masked pattern was here #### |
| Vertices: |
| Map 1 |
| Map Operator Tree: |
| TableScan |
| alias: test_bucket |
| Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE |
| TableScan Vectorization: |
| native: true |
| vectorizationSchemaColumns: [0:age:int, 1:name:string, 2:dept:string, 3:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>, 4:ROW__IS__DELETED:boolean] |
| Select Operator |
| expressions: age (type: int), name (type: string) |
| outputColumnNames: age, name |
| Select Vectorization: |
| className: VectorSelectOperator |
| native: true |
| projectedOutputColumnNums: [0, 1] |
| Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE |
| Group By Operator |
| aggregations: count() |
| bucketGroup: true |
| Group By Vectorization: |
| aggregators: VectorUDAFCountStar(*) -> bigint |
| className: VectorGroupByOperator |
| groupByMode: HASH |
| keyExpressions: col 0:int, col 1:string |
| native: false |
| vectorProcessingMode: HASH |
| projectedOutputColumnNums: [0] |
| keys: age (type: int), name (type: string) |
| minReductionHashAggr: 0.5 |
| mode: hash |
| outputColumnNames: _col0, _col1, _col2 |
| Statistics: Num rows: 2 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE |
| Reduce Output Operator |
| key expressions: _col0 (type: int), _col1 (type: string) |
| null sort order: zz |
| sort order: ++ |
| Map-reduce partition columns: _col0 (type: int), _col1 (type: string) |
| Reduce Sink Vectorization: |
| className: VectorReduceSinkMultiKeyOperator |
| keyColumns: 0:int, 1:string |
| native: true |
| nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true |
| valueColumns: 2:bigint |
| Statistics: Num rows: 2 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE |
| value expressions: _col2 (type: bigint) |
| Execution mode: vectorized, llap |
| LLAP IO: all inputs |
| Map Vectorization: |
| enabled: true |
| enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true |
| inputFormatFeatureSupport: [DECIMAL_64] |
| featureSupportInUse: [DECIMAL_64] |
| inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat |
| allNative: false |
| usesVectorUDFAdaptor: false |
| vectorized: true |
| rowBatchContext: |
| dataColumnCount: 3 |
| includeColumns: [0, 1] |
| dataColumns: age:int, name:string, dept:string |
| partitionColumnCount: 0 |
| scratchColumnTypeNames: [] |
| Reducer 2 |
| Execution mode: vectorized, llap |
| Reduce Vectorization: |
| enabled: true |
| enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true |
| reduceColumnNullOrder: zz |
| reduceColumnSortOrder: ++ |
| allNative: false |
| usesVectorUDFAdaptor: false |
| vectorized: true |
| rowBatchContext: |
| dataColumnCount: 3 |
| dataColumns: KEY._col0:int, KEY._col1:string, VALUE._col0:bigint |
| partitionColumnCount: 0 |
| scratchColumnTypeNames: [] |
| Reduce Operator Tree: |
| Group By Operator |
| aggregations: count(VALUE._col0) |
| Group By Vectorization: |
| aggregators: VectorUDAFCountMerge(col 2:bigint) -> bigint |
| className: VectorGroupByOperator |
| groupByMode: MERGEPARTIAL |
| keyExpressions: col 0:int, col 1:string |
| native: false |
| vectorProcessingMode: MERGE_PARTIAL |
| projectedOutputColumnNums: [0] |
| keys: KEY._col0 (type: int), KEY._col1 (type: string) |
| mode: mergepartial |
| outputColumnNames: _col0, _col1, _col2 |
| Statistics: Num rows: 2 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE |
| Filter Operator |
| Filter Vectorization: |
| className: VectorFilterOperator |
| native: true |
| predicateExpression: FilterLongColGreaterLongScalar(col 2:bigint, val 1) |
| predicate: (_col2 > 1L) (type: boolean) |
| Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE |
| File Output Operator |
| compressed: false |
| File Sink Vectorization: |
| className: VectorFileSinkOperator |
| native: false |
| Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE |
| table: |
| input format: org.apache.hadoop.mapred.SequenceFileInputFormat |
| output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat |
| serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe |
| |
| Stage: Stage-0 |
| Fetch Operator |
| limit: -1 |
| Processor Tree: |
| ListSink |
| |
| PREHOOK: query: select age, name, count(*) from test_bucket group by age, name having count(*) > 1 |
| PREHOOK: type: QUERY |
| PREHOOK: Input: default@test_bucket |
| #### A masked pattern was here #### |
| POSTHOOK: query: select age, name, count(*) from test_bucket group by age, name having count(*) > 1 |
| POSTHOOK: type: QUERY |
| POSTHOOK: Input: default@test_bucket |
| #### A masked pattern was here #### |
| 1 user1 2 |
| 2 user2 2 |