blob: 780cb6ccd27bbf05a3e2f7775504515a979b20e4 [file] [log] [blame]
PREHOOK: query: create table test_bucket(age int, name string, dept string) clustered by (age, name) sorted by (age asc, name asc) into 2 buckets stored as ORC
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@test_bucket
POSTHOOK: query: create table test_bucket(age int, name string, dept string) clustered by (age, name) sorted by (age asc, name asc) into 2 buckets stored as ORC
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@test_bucket
PREHOOK: query: insert into test_bucket values (1, 'user1', 'dept1'), ( 2, 'user2' , 'dept2')
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@test_bucket
POSTHOOK: query: insert into test_bucket values (1, 'user1', 'dept1'), ( 2, 'user2' , 'dept2')
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@test_bucket
POSTHOOK: Lineage: test_bucket.age SCRIPT []
POSTHOOK: Lineage: test_bucket.dept SCRIPT []
POSTHOOK: Lineage: test_bucket.name SCRIPT []
PREHOOK: query: insert into test_bucket values (1, 'user1', 'dept1'), ( 2, 'user2' , 'dept2')
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@test_bucket
POSTHOOK: query: insert into test_bucket values (1, 'user1', 'dept1'), ( 2, 'user2' , 'dept2')
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@test_bucket
POSTHOOK: Lineage: test_bucket.age SCRIPT []
POSTHOOK: Lineage: test_bucket.dept SCRIPT []
POSTHOOK: Lineage: test_bucket.name SCRIPT []
PREHOOK: query: explain vectorization detail select age, name, count(*) from test_bucket group by age, name having count(*) > 1
PREHOOK: type: QUERY
PREHOOK: Input: default@test_bucket
#### A masked pattern was here ####
POSTHOOK: query: explain vectorization detail select age, name, count(*) from test_bucket group by age, name having count(*) > 1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@test_bucket
#### A masked pattern was here ####
PLAN VECTORIZATION:
enabled: true
enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: test_bucket
Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:age:int, 1:name:string, 2:dept:string, 3:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>, 4:ROW__IS__DELETED:boolean]
Select Operator
expressions: age (type: int), name (type: string)
outputColumnNames: age, name
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 1]
Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
bucketGroup: true
Group By Vectorization:
aggregators: VectorUDAFCountStar(*) -> bigint
className: VectorGroupByOperator
groupByMode: HASH
keyExpressions: col 0:int, col 1:string
native: false
vectorProcessingMode: HASH
projectedOutputColumnNums: [0]
keys: age (type: int), name (type: string)
minReductionHashAggr: 0.5
mode: hash
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 2 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int), _col1 (type: string)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col0 (type: int), _col1 (type: string)
Reduce Sink Vectorization:
className: VectorReduceSinkMultiKeyOperator
keyColumns: 0:int, 1:string
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
valueColumns: 2:bigint
Statistics: Num rows: 2 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col2 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
inputFormatFeatureSupport: [DECIMAL_64]
featureSupportInUse: [DECIMAL_64]
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: false
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 3
includeColumns: [0, 1]
dataColumns: age:int, name:string, dept:string
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
Execution mode: vectorized, llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
reduceColumnNullOrder: zz
reduceColumnSortOrder: ++
allNative: false
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
dataColumnCount: 3
dataColumns: KEY._col0:int, KEY._col1:string, VALUE._col0:bigint
partitionColumnCount: 0
scratchColumnTypeNames: []
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
Group By Vectorization:
aggregators: VectorUDAFCountMerge(col 2:bigint) -> bigint
className: VectorGroupByOperator
groupByMode: MERGEPARTIAL
keyExpressions: col 0:int, col 1:string
native: false
vectorProcessingMode: MERGE_PARTIAL
projectedOutputColumnNums: [0]
keys: KEY._col0 (type: int), KEY._col1 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 2 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
Filter Vectorization:
className: VectorFilterOperator
native: true
predicateExpression: FilterLongColGreaterLongScalar(col 2:bigint, val 1)
predicate: (_col2 > 1L) (type: boolean)
Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink
PREHOOK: query: select age, name, count(*) from test_bucket group by age, name having count(*) > 1
PREHOOK: type: QUERY
PREHOOK: Input: default@test_bucket
#### A masked pattern was here ####
POSTHOOK: query: select age, name, count(*) from test_bucket group by age, name having count(*) > 1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@test_bucket
#### A masked pattern was here ####
1 user1 2
2 user2 2