blob: 03488a1cba727ee0f735e6d8f8e9bd399c43a2ca [file] [log] [blame]
PREHOOK: query: DROP TABLE parquet_complex_types_staging
PREHOOK: type: DROPTABLE
POSTHOOK: query: DROP TABLE parquet_complex_types_staging
POSTHOOK: type: DROPTABLE
PREHOOK: query: DROP TABLE parquet_complex_types
PREHOOK: type: DROPTABLE
POSTHOOK: query: DROP TABLE parquet_complex_types
POSTHOOK: type: DROPTABLE
PREHOOK: query: CREATE TABLE parquet_complex_types_staging (
id int,
m1 map<string, varchar(5)>,
l1 array<int>,
st1 struct<c1:int, c2:string>,
listIndex int
) ROW FORMAT DELIMITED
FIELDS TERMINATED BY '|'
COLLECTION ITEMS TERMINATED BY ','
MAP KEYS TERMINATED BY ':'
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@parquet_complex_types_staging
POSTHOOK: query: CREATE TABLE parquet_complex_types_staging (
id int,
m1 map<string, varchar(5)>,
l1 array<int>,
st1 struct<c1:int, c2:string>,
listIndex int
) ROW FORMAT DELIMITED
FIELDS TERMINATED BY '|'
COLLECTION ITEMS TERMINATED BY ','
MAP KEYS TERMINATED BY ':'
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@parquet_complex_types_staging
PREHOOK: query: CREATE TABLE parquet_complex_types (
id int,
m1 map<string, varchar(5)>,
l1 array<int>,
st1 struct<c1:int, c2:string>,
listIndex int
) STORED AS PARQUET
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@parquet_complex_types
POSTHOOK: query: CREATE TABLE parquet_complex_types (
id int,
m1 map<string, varchar(5)>,
l1 array<int>,
st1 struct<c1:int, c2:string>,
listIndex int
) STORED AS PARQUET
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@parquet_complex_types
PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_complex_types.txt' OVERWRITE INTO TABLE parquet_complex_types_staging
PREHOOK: type: LOAD
#### A masked pattern was here ####
PREHOOK: Output: default@parquet_complex_types_staging
POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_complex_types.txt' OVERWRITE INTO TABLE parquet_complex_types_staging
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@parquet_complex_types_staging
PREHOOK: query: INSERT OVERWRITE TABLE parquet_complex_types
SELECT id, m1, l1, st1, listIndex FROM parquet_complex_types_staging where id < 1024
PREHOOK: type: QUERY
PREHOOK: Input: default@parquet_complex_types_staging
PREHOOK: Output: default@parquet_complex_types
POSTHOOK: query: INSERT OVERWRITE TABLE parquet_complex_types
SELECT id, m1, l1, st1, listIndex FROM parquet_complex_types_staging where id < 1024
POSTHOOK: type: QUERY
POSTHOOK: Input: default@parquet_complex_types_staging
POSTHOOK: Output: default@parquet_complex_types
POSTHOOK: Lineage: parquet_complex_types.id SIMPLE [(parquet_complex_types_staging)parquet_complex_types_staging.FieldSchema(name:id, type:int, comment:null), ]
POSTHOOK: Lineage: parquet_complex_types.l1 SIMPLE [(parquet_complex_types_staging)parquet_complex_types_staging.FieldSchema(name:l1, type:array<int>, comment:null), ]
POSTHOOK: Lineage: parquet_complex_types.listindex SIMPLE [(parquet_complex_types_staging)parquet_complex_types_staging.FieldSchema(name:listindex, type:int, comment:null), ]
POSTHOOK: Lineage: parquet_complex_types.m1 SIMPLE [(parquet_complex_types_staging)parquet_complex_types_staging.FieldSchema(name:m1, type:map<string,varchar(5)>, comment:null), ]
POSTHOOK: Lineage: parquet_complex_types.st1 SIMPLE [(parquet_complex_types_staging)parquet_complex_types_staging.FieldSchema(name:st1, type:struct<c1:int,c2:string>, comment:null), ]
PREHOOK: query: select count(*) from parquet_complex_types
PREHOOK: type: QUERY
PREHOOK: Input: default@parquet_complex_types
#### A masked pattern was here ####
POSTHOOK: query: select count(*) from parquet_complex_types
POSTHOOK: type: QUERY
POSTHOOK: Input: default@parquet_complex_types
#### A masked pattern was here ####
1023
PREHOOK: query: explain vectorization expression select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10
PREHOOK: type: QUERY
POSTHOOK: query: explain vectorization expression select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10
POSTHOOK: type: QUERY
PLAN VECTORIZATION:
enabled: true
enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
Map Reduce
Map Operator Tree:
TableScan
alias: parquet_complex_types
Statistics: Num rows: 1023 Data size: 5115 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
Select Operator
expressions: l1 (type: array<int>), l1[0] (type: int), l1[1] (type: int), l1[listindex] (type: int), listindex (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [2, 6, 7, 8, 4]
selectExpressions: ListIndexColScalar(col 2:array<int>, col 0:int) -> 6:int, ListIndexColScalar(col 2:array<int>, col 1:int) -> 7:int, ListIndexColColumn(col 2:array<int>, col 4:int) -> 8:int
Statistics: Num rows: 1023 Data size: 5115 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 10
Limit Vectorization:
className: VectorLimitOperator
native: true
Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
inputFormatFeatureSupport: []
featureSupportInUse: []
inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
allNative: false
usesVectorUDFAdaptor: false
vectorized: true
Stage: Stage-0
Fetch Operator
limit: 10
Processor Tree:
ListSink
PREHOOK: query: select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10
PREHOOK: type: QUERY
PREHOOK: Input: default@parquet_complex_types
#### A masked pattern was here ####
POSTHOOK: query: select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10
POSTHOOK: type: QUERY
POSTHOOK: Input: default@parquet_complex_types
#### A masked pattern was here ####
[100,101] 100 101 100 0
[102,103] 102 103 103 1
[104,105] 104 105 104 0
[106,107] 106 107 107 1
[108,109] 108 109 108 0
[110,111] 110 111 111 1
[112,113] 112 113 112 0
[114,115] 114 115 115 1
[116,117] 116 117 116 0
[118,119] 118 119 119 1
PREHOOK: query: explain vectorization expression select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] limit 10
PREHOOK: type: QUERY
POSTHOOK: query: explain vectorization expression select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] limit 10
POSTHOOK: type: QUERY
PLAN VECTORIZATION:
enabled: true
enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-2 depends on stages: Stage-1
Stage-0 depends on stages: Stage-2
STAGE PLANS:
Stage: Stage-1
Map Reduce
Map Operator Tree:
TableScan
alias: parquet_complex_types
Statistics: Num rows: 1023 Data size: 5115 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
Filter Operator
Filter Vectorization:
className: VectorFilterOperator
native: true
predicateExpression: FilterLongColGreaterLongScalar(col 6:int, val 1000)(children: ListIndexColScalar(col 2:array<int>, col 0:int) -> 6:int)
predicate: (l1[0] > 1000) (type: boolean)
Statistics: Num rows: 341 Data size: 1705 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: l1[1] (type: int), l1[0] (type: int)
outputColumnNames: _col0, _col1
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [6, 7]
selectExpressions: ListIndexColScalar(col 2:array<int>, col 1:int) -> 6:int, ListIndexColScalar(col 2:array<int>, col 0:int) -> 7:int
Statistics: Num rows: 341 Data size: 1705 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(_col1)
Group By Vectorization:
aggregators: VectorUDAFSumLong(col 7:int) -> bigint
className: VectorGroupByOperator
groupByMode: HASH
keyExpressions: col 6:int
native: false
vectorProcessingMode: HASH
projectedOutputColumnNums: [0]
keys: _col0 (type: int)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 341 Data size: 1705 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
Reduce Sink Vectorization:
className: VectorReduceSinkOperator
native: false
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
Statistics: Num rows: 341 Data size: 1705 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.1
value expressions: _col1 (type: bigint)
Execution mode: vectorized
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
inputFormatFeatureSupport: []
featureSupportInUse: []
inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
allNative: false
usesVectorUDFAdaptor: false
vectorized: true
Reduce Vectorization:
enabled: false
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
Reduce Operator Tree:
Group By Operator
aggregations: sum(VALUE._col0)
keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 170 Data size: 850 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col1 (type: bigint), _col0 (type: int)
outputColumnNames: _col0, _col2
Statistics: Num rows: 170 Data size: 850 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-2
Map Reduce
Map Operator Tree:
TableScan
TableScan Vectorization:
native: true
Reduce Output Operator
key expressions: _col2 (type: int)
sort order: +
Reduce Sink Vectorization:
className: VectorReduceSinkOperator
native: false
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
Statistics: Num rows: 170 Data size: 850 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.1
value expressions: _col0 (type: bigint)
Execution mode: vectorized
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
inputFormatFeatureSupport: []
featureSupportInUse: []
inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat
allNative: false
usesVectorUDFAdaptor: false
vectorized: true
Reduce Vectorization:
enabled: false
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
Reduce Operator Tree:
Select Operator
expressions: VALUE._col0 (type: bigint), KEY.reducesinkkey0 (type: int)
outputColumnNames: _col0, _col1
Statistics: Num rows: 170 Data size: 850 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 10
Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink
PREHOOK: query: select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] desc limit 10
PREHOOK: type: QUERY
PREHOOK: Input: default@parquet_complex_types
#### A masked pattern was here ####
POSTHOOK: query: select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] desc limit 10
POSTHOOK: type: QUERY
POSTHOOK: Input: default@parquet_complex_types
#### A masked pattern was here ####
2144 2145
2142 2143
2140 2141
2138 2139
2136 2137
2134 2135
2132 2133
2130 2131
2128 2129
2126 2127
PREHOOK: query: INSERT OVERWRITE TABLE parquet_complex_types
SELECT id, m1, l1, st1, listIndex FROM parquet_complex_types_staging where id < 1025
PREHOOK: type: QUERY
PREHOOK: Input: default@parquet_complex_types_staging
PREHOOK: Output: default@parquet_complex_types
POSTHOOK: query: INSERT OVERWRITE TABLE parquet_complex_types
SELECT id, m1, l1, st1, listIndex FROM parquet_complex_types_staging where id < 1025
POSTHOOK: type: QUERY
POSTHOOK: Input: default@parquet_complex_types_staging
POSTHOOK: Output: default@parquet_complex_types
POSTHOOK: Lineage: parquet_complex_types.id SIMPLE [(parquet_complex_types_staging)parquet_complex_types_staging.FieldSchema(name:id, type:int, comment:null), ]
POSTHOOK: Lineage: parquet_complex_types.l1 SIMPLE [(parquet_complex_types_staging)parquet_complex_types_staging.FieldSchema(name:l1, type:array<int>, comment:null), ]
POSTHOOK: Lineage: parquet_complex_types.listindex SIMPLE [(parquet_complex_types_staging)parquet_complex_types_staging.FieldSchema(name:listindex, type:int, comment:null), ]
POSTHOOK: Lineage: parquet_complex_types.m1 SIMPLE [(parquet_complex_types_staging)parquet_complex_types_staging.FieldSchema(name:m1, type:map<string,varchar(5)>, comment:null), ]
POSTHOOK: Lineage: parquet_complex_types.st1 SIMPLE [(parquet_complex_types_staging)parquet_complex_types_staging.FieldSchema(name:st1, type:struct<c1:int,c2:string>, comment:null), ]
PREHOOK: query: select count(*) from parquet_complex_types
PREHOOK: type: QUERY
PREHOOK: Input: default@parquet_complex_types
#### A masked pattern was here ####
POSTHOOK: query: select count(*) from parquet_complex_types
POSTHOOK: type: QUERY
POSTHOOK: Input: default@parquet_complex_types
#### A masked pattern was here ####
1024
PREHOOK: query: explain vectorization expression select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10
PREHOOK: type: QUERY
POSTHOOK: query: explain vectorization expression select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10
POSTHOOK: type: QUERY
PLAN VECTORIZATION:
enabled: true
enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
Map Reduce
Map Operator Tree:
TableScan
alias: parquet_complex_types
Statistics: Num rows: 1024 Data size: 5120 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
Select Operator
expressions: l1 (type: array<int>), l1[0] (type: int), l1[1] (type: int), l1[listindex] (type: int), listindex (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [2, 6, 7, 8, 4]
selectExpressions: ListIndexColScalar(col 2:array<int>, col 0:int) -> 6:int, ListIndexColScalar(col 2:array<int>, col 1:int) -> 7:int, ListIndexColColumn(col 2:array<int>, col 4:int) -> 8:int
Statistics: Num rows: 1024 Data size: 5120 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 10
Limit Vectorization:
className: VectorLimitOperator
native: true
Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
inputFormatFeatureSupport: []
featureSupportInUse: []
inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
allNative: false
usesVectorUDFAdaptor: false
vectorized: true
Stage: Stage-0
Fetch Operator
limit: 10
Processor Tree:
ListSink
PREHOOK: query: select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10
PREHOOK: type: QUERY
PREHOOK: Input: default@parquet_complex_types
#### A masked pattern was here ####
POSTHOOK: query: select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10
POSTHOOK: type: QUERY
POSTHOOK: Input: default@parquet_complex_types
#### A masked pattern was here ####
[100,101] 100 101 100 0
[102,103] 102 103 103 1
[104,105] 104 105 104 0
[106,107] 106 107 107 1
[108,109] 108 109 108 0
[110,111] 110 111 111 1
[112,113] 112 113 112 0
[114,115] 114 115 115 1
[116,117] 116 117 116 0
[118,119] 118 119 119 1
PREHOOK: query: explain vectorization expression select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] limit 10
PREHOOK: type: QUERY
POSTHOOK: query: explain vectorization expression select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] limit 10
POSTHOOK: type: QUERY
PLAN VECTORIZATION:
enabled: true
enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-2 depends on stages: Stage-1
Stage-0 depends on stages: Stage-2
STAGE PLANS:
Stage: Stage-1
Map Reduce
Map Operator Tree:
TableScan
alias: parquet_complex_types
Statistics: Num rows: 1024 Data size: 5120 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
Filter Operator
Filter Vectorization:
className: VectorFilterOperator
native: true
predicateExpression: FilterLongColGreaterLongScalar(col 6:int, val 1000)(children: ListIndexColScalar(col 2:array<int>, col 0:int) -> 6:int)
predicate: (l1[0] > 1000) (type: boolean)
Statistics: Num rows: 341 Data size: 1705 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: l1[1] (type: int), l1[0] (type: int)
outputColumnNames: _col0, _col1
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [6, 7]
selectExpressions: ListIndexColScalar(col 2:array<int>, col 1:int) -> 6:int, ListIndexColScalar(col 2:array<int>, col 0:int) -> 7:int
Statistics: Num rows: 341 Data size: 1705 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(_col1)
Group By Vectorization:
aggregators: VectorUDAFSumLong(col 7:int) -> bigint
className: VectorGroupByOperator
groupByMode: HASH
keyExpressions: col 6:int
native: false
vectorProcessingMode: HASH
projectedOutputColumnNums: [0]
keys: _col0 (type: int)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 341 Data size: 1705 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
Reduce Sink Vectorization:
className: VectorReduceSinkOperator
native: false
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
Statistics: Num rows: 341 Data size: 1705 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.1
value expressions: _col1 (type: bigint)
Execution mode: vectorized
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
inputFormatFeatureSupport: []
featureSupportInUse: []
inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
allNative: false
usesVectorUDFAdaptor: false
vectorized: true
Reduce Vectorization:
enabled: false
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
Reduce Operator Tree:
Group By Operator
aggregations: sum(VALUE._col0)
keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 170 Data size: 850 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col1 (type: bigint), _col0 (type: int)
outputColumnNames: _col0, _col2
Statistics: Num rows: 170 Data size: 850 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-2
Map Reduce
Map Operator Tree:
TableScan
TableScan Vectorization:
native: true
Reduce Output Operator
key expressions: _col2 (type: int)
sort order: +
Reduce Sink Vectorization:
className: VectorReduceSinkOperator
native: false
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
Statistics: Num rows: 170 Data size: 850 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.1
value expressions: _col0 (type: bigint)
Execution mode: vectorized
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
inputFormatFeatureSupport: []
featureSupportInUse: []
inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat
allNative: false
usesVectorUDFAdaptor: false
vectorized: true
Reduce Vectorization:
enabled: false
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
Reduce Operator Tree:
Select Operator
expressions: VALUE._col0 (type: bigint), KEY.reducesinkkey0 (type: int)
outputColumnNames: _col0, _col1
Statistics: Num rows: 170 Data size: 850 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 10
Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink
PREHOOK: query: select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] desc limit 10
PREHOOK: type: QUERY
PREHOOK: Input: default@parquet_complex_types
#### A masked pattern was here ####
POSTHOOK: query: select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] desc limit 10
POSTHOOK: type: QUERY
POSTHOOK: Input: default@parquet_complex_types
#### A masked pattern was here ####
2146 2147
2144 2145
2142 2143
2140 2141
2138 2139
2136 2137
2134 2135
2132 2133
2130 2131
2128 2129
PREHOOK: query: INSERT OVERWRITE TABLE parquet_complex_types
SELECT id, m1, l1, st1, listIndex FROM parquet_complex_types_staging
PREHOOK: type: QUERY
PREHOOK: Input: default@parquet_complex_types_staging
PREHOOK: Output: default@parquet_complex_types
POSTHOOK: query: INSERT OVERWRITE TABLE parquet_complex_types
SELECT id, m1, l1, st1, listIndex FROM parquet_complex_types_staging
POSTHOOK: type: QUERY
POSTHOOK: Input: default@parquet_complex_types_staging
POSTHOOK: Output: default@parquet_complex_types
POSTHOOK: Lineage: parquet_complex_types.id SIMPLE [(parquet_complex_types_staging)parquet_complex_types_staging.FieldSchema(name:id, type:int, comment:null), ]
POSTHOOK: Lineage: parquet_complex_types.l1 SIMPLE [(parquet_complex_types_staging)parquet_complex_types_staging.FieldSchema(name:l1, type:array<int>, comment:null), ]
POSTHOOK: Lineage: parquet_complex_types.listindex SIMPLE [(parquet_complex_types_staging)parquet_complex_types_staging.FieldSchema(name:listindex, type:int, comment:null), ]
POSTHOOK: Lineage: parquet_complex_types.m1 SIMPLE [(parquet_complex_types_staging)parquet_complex_types_staging.FieldSchema(name:m1, type:map<string,varchar(5)>, comment:null), ]
POSTHOOK: Lineage: parquet_complex_types.st1 SIMPLE [(parquet_complex_types_staging)parquet_complex_types_staging.FieldSchema(name:st1, type:struct<c1:int,c2:string>, comment:null), ]
PREHOOK: query: select count(*) from parquet_complex_types
PREHOOK: type: QUERY
PREHOOK: Input: default@parquet_complex_types
#### A masked pattern was here ####
POSTHOOK: query: select count(*) from parquet_complex_types
POSTHOOK: type: QUERY
POSTHOOK: Input: default@parquet_complex_types
#### A masked pattern was here ####
1025
PREHOOK: query: explain vectorization expression select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10
PREHOOK: type: QUERY
POSTHOOK: query: explain vectorization expression select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10
POSTHOOK: type: QUERY
PLAN VECTORIZATION:
enabled: true
enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
Map Reduce
Map Operator Tree:
TableScan
alias: parquet_complex_types
Statistics: Num rows: 1025 Data size: 5125 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
Select Operator
expressions: l1 (type: array<int>), l1[0] (type: int), l1[1] (type: int), l1[listindex] (type: int), listindex (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [2, 6, 7, 8, 4]
selectExpressions: ListIndexColScalar(col 2:array<int>, col 0:int) -> 6:int, ListIndexColScalar(col 2:array<int>, col 1:int) -> 7:int, ListIndexColColumn(col 2:array<int>, col 4:int) -> 8:int
Statistics: Num rows: 1025 Data size: 5125 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 10
Limit Vectorization:
className: VectorLimitOperator
native: true
Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
inputFormatFeatureSupport: []
featureSupportInUse: []
inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
allNative: false
usesVectorUDFAdaptor: false
vectorized: true
Stage: Stage-0
Fetch Operator
limit: 10
Processor Tree:
ListSink
PREHOOK: query: select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10
PREHOOK: type: QUERY
PREHOOK: Input: default@parquet_complex_types
#### A masked pattern was here ####
POSTHOOK: query: select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10
POSTHOOK: type: QUERY
POSTHOOK: Input: default@parquet_complex_types
#### A masked pattern was here ####
[100,101] 100 101 100 0
[102,103] 102 103 103 1
[104,105] 104 105 104 0
[106,107] 106 107 107 1
[108,109] 108 109 108 0
[110,111] 110 111 111 1
[112,113] 112 113 112 0
[114,115] 114 115 115 1
[116,117] 116 117 116 0
[118,119] 118 119 119 1
PREHOOK: query: explain vectorization expression select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] limit 10
PREHOOK: type: QUERY
POSTHOOK: query: explain vectorization expression select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] limit 10
POSTHOOK: type: QUERY
PLAN VECTORIZATION:
enabled: true
enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-2 depends on stages: Stage-1
Stage-0 depends on stages: Stage-2
STAGE PLANS:
Stage: Stage-1
Map Reduce
Map Operator Tree:
TableScan
alias: parquet_complex_types
Statistics: Num rows: 1025 Data size: 5125 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
Filter Operator
Filter Vectorization:
className: VectorFilterOperator
native: true
predicateExpression: FilterLongColGreaterLongScalar(col 6:int, val 1000)(children: ListIndexColScalar(col 2:array<int>, col 0:int) -> 6:int)
predicate: (l1[0] > 1000) (type: boolean)
Statistics: Num rows: 341 Data size: 1705 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: l1[1] (type: int), l1[0] (type: int)
outputColumnNames: _col0, _col1
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [6, 7]
selectExpressions: ListIndexColScalar(col 2:array<int>, col 1:int) -> 6:int, ListIndexColScalar(col 2:array<int>, col 0:int) -> 7:int
Statistics: Num rows: 341 Data size: 1705 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(_col1)
Group By Vectorization:
aggregators: VectorUDAFSumLong(col 7:int) -> bigint
className: VectorGroupByOperator
groupByMode: HASH
keyExpressions: col 6:int
native: false
vectorProcessingMode: HASH
projectedOutputColumnNums: [0]
keys: _col0 (type: int)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 341 Data size: 1705 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
Reduce Sink Vectorization:
className: VectorReduceSinkOperator
native: false
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
Statistics: Num rows: 341 Data size: 1705 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.1
value expressions: _col1 (type: bigint)
Execution mode: vectorized
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
inputFormatFeatureSupport: []
featureSupportInUse: []
inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
allNative: false
usesVectorUDFAdaptor: false
vectorized: true
Reduce Vectorization:
enabled: false
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
Reduce Operator Tree:
Group By Operator
aggregations: sum(VALUE._col0)
keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 170 Data size: 850 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col1 (type: bigint), _col0 (type: int)
outputColumnNames: _col0, _col2
Statistics: Num rows: 170 Data size: 850 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-2
Map Reduce
Map Operator Tree:
TableScan
TableScan Vectorization:
native: true
Reduce Output Operator
key expressions: _col2 (type: int)
sort order: +
Reduce Sink Vectorization:
className: VectorReduceSinkOperator
native: false
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
Statistics: Num rows: 170 Data size: 850 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.1
value expressions: _col0 (type: bigint)
Execution mode: vectorized
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
inputFormatFeatureSupport: []
featureSupportInUse: []
inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat
allNative: false
usesVectorUDFAdaptor: false
vectorized: true
Reduce Vectorization:
enabled: false
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
Reduce Operator Tree:
Select Operator
expressions: VALUE._col0 (type: bigint), KEY.reducesinkkey0 (type: int)
outputColumnNames: _col0, _col1
Statistics: Num rows: 170 Data size: 850 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 10
Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink
PREHOOK: query: select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] desc limit 10
PREHOOK: type: QUERY
PREHOOK: Input: default@parquet_complex_types
#### A masked pattern was here ####
POSTHOOK: query: select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] desc limit 10
POSTHOOK: type: QUERY
POSTHOOK: Input: default@parquet_complex_types
#### A masked pattern was here ####
2148 2149
2146 2147
2144 2145
2142 2143
2140 2141
2138 2139
2136 2137
2134 2135
2132 2133
2130 2131