blob: 34daf8169ad9007473c34ed6eae5e8ad989e8e47 [file] [log] [blame]
PREHOOK: query: CREATE TABLE srcorc_n1 STORED AS ORC AS SELECT * FROM src
PREHOOK: type: CREATETABLE_AS_SELECT
PREHOOK: Input: default@src
PREHOOK: Output: database:default
PREHOOK: Output: default@srcorc_n1
POSTHOOK: query: CREATE TABLE srcorc_n1 STORED AS ORC AS SELECT * FROM src
POSTHOOK: type: CREATETABLE_AS_SELECT
POSTHOOK: Input: default@src
POSTHOOK: Output: database:default
POSTHOOK: Output: default@srcorc_n1
POSTHOOK: Lineage: srcorc_n1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: srcorc_n1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: CREATE TABLE dest1_n154(c1 STRING) STORED AS ORC
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@dest1_n154
POSTHOOK: query: CREATE TABLE dest1_n154(c1 STRING) STORED AS ORC
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@dest1_n154
PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
FROM srcorc_n1
INSERT OVERWRITE TABLE dest1_n154 SELECT substr(srcorc_n1.key,1,1) GROUP BY substr(srcorc_n1.key,1,1)
PREHOOK: type: QUERY
PREHOOK: Input: default@srcorc_n1
PREHOOK: Output: default@dest1_n154
POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
FROM srcorc_n1
INSERT OVERWRITE TABLE dest1_n154 SELECT substr(srcorc_n1.key,1,1) GROUP BY substr(srcorc_n1.key,1,1)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@srcorc_n1
POSTHOOK: Output: default@dest1_n154
PLAN VECTORIZATION:
enabled: true
enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-2 depends on stages: Stage-1
Stage-0 depends on stages: Stage-2
Stage-3 depends on stages: Stage-0
STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: srcorc_n1
Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
Select Operator
expressions: substr(key, 1, 1) (type: string)
outputColumnNames: _col0
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [3]
selectExpressions: StringSubstrColStartLen(col 0:string, start 0, length 1) -> 3:string
Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string)
null sort order: z
sort order: +
Map-reduce partition columns: rand() (type: double)
Reduce Sink Vectorization:
className: VectorReduceSinkObjectHashOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
inputFormatFeatureSupport: [DECIMAL_64]
featureSupportInUse: [DECIMAL_64]
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
Reducer 2
Execution mode: vectorized, llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
allNative: false
usesVectorUDFAdaptor: false
vectorized: true
Reduce Operator Tree:
Group By Operator
Group By Vectorization:
className: VectorGroupByOperator
groupByMode: PARTIAL1
keyExpressions: col 0:string
native: false
vectorProcessingMode: STREAMING
projectedOutputColumnNums: []
keys: KEY._col0 (type: string)
mode: partial1
outputColumnNames: _col0
Statistics: Num rows: 500 Data size: 42500 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string)
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type: string)
Reduce Sink Vectorization:
className: VectorReduceSinkStringOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 500 Data size: 42500 Basic stats: COMPLETE Column stats: COMPLETE
Reducer 3
Execution mode: vectorized, llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
allNative: false
usesVectorUDFAdaptor: false
vectorized: true
Reduce Operator Tree:
Group By Operator
Group By Vectorization:
className: VectorGroupByOperator
groupByMode: FINAL
keyExpressions: col 0:string
native: false
vectorProcessingMode: STREAMING
projectedOutputColumnNums: []
keys: KEY._col0 (type: string)
mode: final
outputColumnNames: _col0
Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.dest1_n154
Select Operator
expressions: _col0 (type: string)
outputColumnNames: c1
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0]
Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
null sort order:
sort order:
Map-reduce partition columns: rand() (type: double)
Reduce Sink Vectorization:
className: VectorReduceSinkObjectHashOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
valueExpressions: StringLength(col 0:string) -> 1:int, VectorCoalesce(columns [2, 3])(children: StringLength(col 0:string) -> 2:int, ConstantVectorExpression(val 0) -> 3:int) -> 4:int
Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: length(c1) (type: int), COALESCE(length(c1),0) (type: int), c1 (type: string)
Reducer 4
Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported
vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: max(VALUE._col0), avg(VALUE._col1), count(1), count(VALUE._col3), compute_bit_vector(VALUE._col3, 'hll')
mode: partial1
outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
null sort order:
sort order:
Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: int), _col1 (type: struct<count:bigint,sum:double,input:int>), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary)
Reducer 5
Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_bit_vector not supported
vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector(VALUE._col4)
mode: final
outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
Statistics: Num rows: 1 Data size: 266 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-2
Dependency Collection
Stage: Stage-0
Move Operator
tables:
replace: true
table:
input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.dest1_n154
Stage: Stage-3
Stats Work
Basic Stats Work:
Column Stats Desc:
Columns: c1
Column Types: string
Table: default.dest1_n154
PREHOOK: query: FROM srcorc_n1
INSERT OVERWRITE TABLE dest1_n154 SELECT substr(srcorc_n1.key,1,1) GROUP BY substr(srcorc_n1.key,1,1)
PREHOOK: type: QUERY
PREHOOK: Input: default@srcorc_n1
PREHOOK: Output: default@dest1_n154
POSTHOOK: query: FROM srcorc_n1
INSERT OVERWRITE TABLE dest1_n154 SELECT substr(srcorc_n1.key,1,1) GROUP BY substr(srcorc_n1.key,1,1)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@srcorc_n1
POSTHOOK: Output: default@dest1_n154
POSTHOOK: Lineage: dest1_n154.c1 EXPRESSION [(srcorc_n1)srcorc_n1.FieldSchema(name:key, type:string, comment:null), ]
PREHOOK: query: SELECT dest1_n154.* FROM dest1_n154
PREHOOK: type: QUERY
PREHOOK: Input: default@dest1_n154
#### A masked pattern was here ####
POSTHOOK: query: SELECT dest1_n154.* FROM dest1_n154
POSTHOOK: type: QUERY
POSTHOOK: Input: default@dest1_n154
#### A masked pattern was here ####
0
1
2
3
4
5
6
7
8
9